###  Import Required Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import os

### Load the Raw FIRMS Datasets

In [4]:
# Change filenames if yours differ
archive_path = 'data/fire_archive_SV-C2_628186.csv'
nrt_path = 'data/fire_nrt_SV-C2_628186.csv'

# Load CSV files
df_archive = pd.read_csv(archive_path)
df_nrt = pd.read_csv(nrt_path)

# Concatenate both datasets
fire_df = pd.concat([df_archive, df_nrt], ignore_index=True)

# Show structure
print("Total Rows:", fire_df.shape[0])
fire_df.head()

Total Rows: 378307


Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
0,27.95333,84.80014,338.45,0.42,0.38,2013-06-04,735,N,VIIRS,n,2,297.5,5.93,D,0.0
1,27.94984,84.80045,331.72,0.42,0.38,2013-06-04,735,N,VIIRS,n,2,298.06,2.23,D,0.0
2,29.45337,80.66618,337.33,0.4,0.37,2013-06-04,736,N,VIIRS,n,2,297.92,4.12,D,0.0
3,29.60177,81.93739,338.12,0.4,0.37,2013-06-09,742,N,VIIRS,n,2,301.43,4.1,D,0.0
4,29.77567,81.93168,336.25,0.4,0.37,2013-06-09,742,N,VIIRS,n,2,305.14,9.79,D,0.0


#### Filter for Nepal Only (Bounding Box)

In [5]:
# Nepal rough lat-lon box
nepal_df = fire_df[
    (fire_df['latitude'] >= 26.3) & (fire_df['latitude'] <= 30.5) &
    (fire_df['longitude'] >= 80.0) & (fire_df['longitude'] <= 89.5)
]

print("Nepal Fire Records:", nepal_df.shape)
nepal_df.head()


Nepal Fire Records: (378307, 15)


Unnamed: 0,latitude,longitude,brightness,scan,track,acq_date,acq_time,satellite,instrument,confidence,version,bright_t31,frp,daynight,type
0,27.95333,84.80014,338.45,0.42,0.38,2013-06-04,735,N,VIIRS,n,2,297.5,5.93,D,0.0
1,27.94984,84.80045,331.72,0.42,0.38,2013-06-04,735,N,VIIRS,n,2,298.06,2.23,D,0.0
2,29.45337,80.66618,337.33,0.4,0.37,2013-06-04,736,N,VIIRS,n,2,297.92,4.12,D,0.0
3,29.60177,81.93739,338.12,0.4,0.37,2013-06-09,742,N,VIIRS,n,2,301.43,4.1,D,0.0
4,29.77567,81.93168,336.25,0.4,0.37,2013-06-09,742,N,VIIRS,n,2,305.14,9.79,D,0.0


### Save Cleaned Nepal Fire Data


In [6]:
nepal_df.to_csv("nepal_fire_data_cleaned.csv", index=False)
print("Saved to 'nepal_fire_data_cleaned.csv'")


Saved to 'nepal_fire_data_cleaned.csv'


In [7]:
# Add Fire Label (1 = fire)
nepal_df['fire_occurred'] = 1

###  Generate No-Fire Points (Random Sampling in Nepal)

In [8]:
# Number of no-fire points = same as fire points
num_samples = len(nepal_df)

# Nepal bounds
min_lat, max_lat = 26.3, 30.5
min_lon, max_lon = 80.0, 89.5

# Random date range similar to fire dataset
date_range = pd.date_range(start='2012-01-01', end='2024-12-31')

# Generate random no-fire points
random.seed(42)
no_fire_data = []

for _ in range(num_samples):
    lat = round(random.uniform(min_lat, max_lat), 4)
    lon = round(random.uniform(min_lon, max_lon), 4)
    date = random.choice(date_range)
    no_fire_data.append([lat, lon, date, 0])

no_fire_df = pd.DataFrame(no_fire_data, columns=['latitude', 'longitude', 'acq_date', 'fire_occurred'])


In [9]:
# No-Fire dataset
no_fire_df.to_csv("nepal_no_fire_data.csv", index=False)
print("Saved 'nepal_no_fire_data.csv'")


Saved 'nepal_no_fire_data.csv'


### Merge Fire + No-Fire for Model Training

In [15]:
# Reformat fire dataframe to match no_fire_df
fire_data = nepal_df[['latitude', 'longitude', 'acq_date', 'fire_occurred']]

# Merge and shuffle
full_df = pd.concat([fire_data, no_fire_df], ignore_index=True)
full_df = full_df.sample(frac=1).reset_index(drop=True)

# Save the base dataset
full_df.to_csv("fire_dataset_base.csv", index=False)
print("Final dataset saved as 'fire_dataset_base.csv'")
fire_data.head()
fire_data.shape

Final dataset saved as 'fire_dataset_base.csv'


(378307, 4)

In [16]:
fire_dataset_base = pd.read_csv('fire_dataset_base.csv')
print(fire_dataset_base.shape)

(756614, 4)


### Sample the Dataset (Smart Downsampling)

In [17]:
# Stratified sampling
fire_df = full_df[full_df['fire_occurred'] == 1]
no_fire_df = full_df[full_df['fire_occurred'] == 0]

fire_sample = fire_df.sample(n=15000, random_state=42)
no_fire_sample = no_fire_df.sample(n=15000, random_state=42)

sample_df = pd.concat([fire_sample, no_fire_sample]).sample(frac=1).reset_index(drop=True)
sample_df.to_csv("sampled_fire_dataset.csv", index=False)
print("Sampled dataset ready with shape:", sample_df.shape)


Sampled dataset ready with shape: (30000, 4)


### Basic Data Cleaning

In [18]:
sample_df['acq_date'] = pd.to_datetime(sample_df['acq_date'], errors='coerce')
sample_df.dropna(subset=['latitude', 'longitude', 'acq_date'], inplace=True)

print("Cleaned sample dataset:", sample_df.shape)
sample_df.head()


Cleaned sample dataset: (30000, 4)


Unnamed: 0,latitude,longitude,acq_date,fire_occurred
0,27.7645,82.7044,2019-01-25,0
1,30.1244,83.1066,2022-10-06,0
2,26.7449,87.5445,2019-04-17,0
3,27.9938,87.3601,2021-10-29,0
4,28.2684,80.3336,2022-03-14,0


In [19]:
sample_df['uid'] = range(1, len(sample_df) + 1) #Handy for Tracking

## Feature Enrichment

In [22]:
#Installing python libraries:
!pip install tqdm requests pandas



In [None]:
import pandas as pd
import requests
from tqdm import tqdm
import time

# Read sampled fire dataset
df = pd.read_csv("sampled_fire_dataset.csv")

# Safely convert 'acq_date' to datetime
df['acq_date'] = pd.to_datetime(df['acq_date'], format='%Y-%m-%d', errors='coerce')

# Drop rows with invalid dates
df = df.dropna(subset=['acq_date'])

# Add empty weather feature columns
df['temperature'] = None
df['humidity'] = None
df['wind_speed'] = None
df['precipitation'] = None

# Define a helper to fetch data from NASA POWER API
def get_weather(lat, lon, date):
    base_url = "https://power.larc.nasa.gov/api/temporal/daily/point"
    params = {
        "parameters": "T2M,RH2M,WS2M,PRECTOTCORR",
        "community": "AG",
        "longitude": lon,
        "latitude": lat,
        "start": date.strftime('%Y%m%d'),
        "end": date.strftime('%Y%m%d'),
        "format": "JSON"
    }

    try:
        response = requests.get(base_url, params=params, timeout=10)
        if response.status_code == 200:
            data = response.json()
            daily = data["properties"]["parameter"]
            key = date.strftime('%Y%m%d')
            return {
                "temperature": daily["T2M"].get(key),
                "humidity": daily["RH2M"].get(key),
                "wind_speed": daily["WS2M"].get(key),
                "precipitation": daily["PRECTOTCORR"].get(key)
            }
    except Exception as e:
        return None

# Enrichment loop
for idx, row in tqdm(df.iterrows(), total=len(df)):
    weather = get_weather(row['latitude'], row['longitude'], row['acq_date'])
    if weather:
        df.at[idx, 'temperature'] = weather["temperature"]
        df.at[idx, 'humidity'] = weather["humidity"]
        df.at[idx, 'wind_speed'] = weather["wind_speed"]
        df.at[idx, 'precipitation'] = weather["precipitation"]
    time.sleep(0.4)  # respect API limits

# Save to file
df.to_csv("fire_dataset_enriched_weather.csv", index=False)
print("Weather data enrichment complete. File saved as 'fire_dataset_enriched_weather.csv'")


 70%|█████████████████████████████████████████████████▌                     | 10478/15000 [14:02:26<4:33:44,  3.63s/it]