In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns 
import geopandas as gpd 
import rasterio
import folium
import plotly.express as px
import os


In [4]:
# Load your dataset
climate_file_path = os.path.join("..", "data", "raw", "climate_data_nepal_district_wise_monthly.csv")

df = pd.read_csv(climate_file_path)

# Select the important columns only
selected_columns = [
    'YEAR', 'MONTH', 'DISTRICT', 'LAT', 'LON',
    'PRECTOT', 'T2M', 'T2M_MAX', 'RH2M', 'WS10M'
]

df_filtered = df[selected_columns]

# Filter rows where YEAR is between 2012 and 2017
df_filtered = df_filtered[(df_filtered['YEAR'] >= 2012) & (df_filtered['YEAR'] <= 2017)]

# Rename columns to simpler names
df_filtered.rename(columns={
    'PRECTOT': 'Prep',
    'T2M': 'AvgTemp',
    'T2M_MAX': 'MaxTemp',
    'RH2M': 'Humidity',
    'WS10M': 'WindSpeed'
}, inplace=True)

output_path = os.path.join("..", "data", "processed", "flitered_climate_data.csv")
df_filtered.to_csv(output_path, index=False)

df_filtered.head(15)


Unnamed: 0,YEAR,MONTH,DISTRICT,LAT,LON,Prep,AvgTemp,MaxTemp,Humidity,WindSpeed
372,2012,1,Arghakhanchi,27.9,83.2,36.46,12.78,19.09,51.21,2.23
373,2012,2,Arghakhanchi,27.9,83.2,10.93,16.4,23.9,39.39,2.85
374,2012,3,Arghakhanchi,27.9,83.2,17.71,20.72,28.99,28.13,3.04
375,2012,4,Arghakhanchi,27.9,83.2,14.04,26.91,34.38,27.54,3.61
376,2012,5,Arghakhanchi,27.9,83.2,7.53,30.94,38.31,19.34,3.14
377,2012,6,Arghakhanchi,27.9,83.2,71.2,32.26,38.69,34.73,3.14
378,2012,7,Arghakhanchi,27.9,83.2,422.26,26.72,30.22,80.56,2.5
379,2012,8,Arghakhanchi,27.9,83.2,326.18,25.5,28.8,84.98,2.25
380,2012,9,Arghakhanchi,27.9,83.2,493.01,24.17,27.51,86.41,2.08
381,2012,10,Arghakhanchi,27.9,83.2,39.75,20.68,25.11,77.63,1.68


In [4]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import os

data_dir = os.path.join("..", "data", "raw", "modis")
shapefile_path = os.path.join("..", "data", "raw", "Shapefiles", "District.shp")

# Load Nepal district shapefile
districts_gdf = gpd.read_file(shapefile_path)
districts_gdf = districts_gdf.to_crs("EPSG:4326")  # Convert to WGS84 if not already

# Years to process
years = list(range(2012, 2018))

# Store all processed data for optional combined file
all_years = []

for year in years:
    file_name = f'modis_{year}_Nepal.csv'
    file_path = os.path.join(data_dir, file_name)
    
    # Load fire data for the year
    df = pd.read_csv(file_path)
    
    # Convert to GeoDataFrame using lat/lon
    geometry = [Point(xy) for xy in zip(df['longitude'], df['latitude'])]
    gdf = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")
    
    # Spatial join with district polygons
    joined = gpd.sjoin(gdf, districts_gdf, how="left", predicate='intersects')
    
    # Rename district column if needed 
    joined = joined.rename(columns={'DIST_NAME': 'District'})  # You might need to inspect column names
    
    # Drop lat/lon and geometry
    joined = joined.drop(columns=['latitude', 'longitude', 'geometry', 'index_right'], errors='ignore')
    
    # Save output file per year
    output_file = f'modis_{year}_Nepal_with_district.csv'
    output_path = os.path.join(data_dir, output_file)
    joined.to_csv(output_path, index=False)
   
    all_years.append(joined)





In [5]:
all_years


[      brightness  scan  track    acq_date  acq_time satellite instrument  \
 0          306.8   1.0    1.0  2012-01-05       746      Aqua      MODIS   
 1          301.2   1.0    1.0  2012-01-06       517     Terra      MODIS   
 2          316.2   1.0    1.0  2012-01-07       733      Aqua      MODIS   
 3          302.5   1.0    1.0  2012-01-14       739      Aqua      MODIS   
 4          309.6   1.1    1.1  2012-01-14       739      Aqua      MODIS   
 ...          ...   ...    ...         ...       ...       ...        ...   
 3576       312.9   1.1    1.1  2012-12-31       740      Aqua      MODIS   
 3577       336.6   1.1    1.1  2012-12-31       740      Aqua      MODIS   
 3578       306.6   1.0    1.0  2012-12-31       740      Aqua      MODIS   
 3579       323.5   1.1    1.1  2012-12-31       741      Aqua      MODIS   
 3580       323.9   1.1    1.1  2012-12-31       741      Aqua      MODIS   
 
       confidence  version  bright_t31   frp daynight  type  OBJECTID  \
 

In [6]:
#  Save combined file
combined_output = os.path.join(data_dir, 'modis_2012_2017_all_districts.csv')
pd.concat(all_years, ignore_index=True).to_csv(combined_output, index=False)

In [7]:
import pandas as pd

# Load the combined fire dataset
modis_path = os.path.join("..", "data", "raw", "modis", "modis_2012_2017_all_districts.csv")
df = pd.read_csv(modis_path)

# Drop unnecessary columns
columns_to_drop = [
    'scan', 'track', 'acq_time', 'satellite', 'instrument',
    'version', 'type', 'OBJECTID', 'State', 'State_Code',
    'Center', 'Area_SQKM'
]

df = df.drop(columns=columns_to_drop, errors='ignore')

df = df.rename(columns={
    'brightness': 'Brightness',
    'acq_date': 'Date',
    'confidence': 'Confidence',
    'bright_t31': 'ThermalData',
    'frp': 'FRP',                 ##Fire Radiative Power
    'daynight': 'DayNight',
    'District':'DISTRICT'
})


df.head()



Unnamed: 0,Brightness,Date,Confidence,ThermalData,FRP,DayNight,DISTRICT
0,306.8,2012-01-05,64,292.8,6.0,D,NAWALPARASI_W
1,301.2,2012-01-06,24,279.2,7.0,D,JAJARKOT
2,316.2,2012-01-07,75,298.4,9.8,D,SUNSARI
3,302.5,2012-01-14,53,292.0,3.3,D,RAUTAHAT
4,309.6,2012-01-14,67,294.3,6.6,D,SAPTARI


In [8]:
df.count()

Brightness     20626
Date           20626
Confidence     20626
ThermalData    20626
FRP            20626
DayNight       20626
DISTRICT       20148
dtype: int64

In [9]:
import pandas as pd


# Convert the 'Date' column to datetime format (handle errors)
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Extract YEAR, MONTH, and DAY from the 'Date' column
df['YEAR'] = df['Date'].dt.year
df['MONTH'] = df['Date'].dt.month
df['DAY'] = df['Date'].dt.day

# Drop the original 'Date' column
df = df.drop(columns=['Date'], errors='ignore')

# Reorganize columns to have YEAR, MONTH, and DAY first
df = df[['YEAR', 'MONTH', 'DAY', 'DISTRICT', 'Brightness', 'Confidence', 'ThermalData', 'FRP', 'DayNight']]

# Display the updated DataFrame
print(df.head())


   YEAR  MONTH  DAY       DISTRICT  Brightness  Confidence  ThermalData  FRP  \
0  2012      1    5  NAWALPARASI_W       306.8          64        292.8  6.0   
1  2012      1    6       JAJARKOT       301.2          24        279.2  7.0   
2  2012      1    7        SUNSARI       316.2          75        298.4  9.8   
3  2012      1   14       RAUTAHAT       302.5          53        292.0  3.3   
4  2012      1   14        SAPTARI       309.6          67        294.3  6.6   

  DayNight  
0        D  
1        D  
2        D  
3        D  
4        D  


In [10]:
df['DISTRICT'] = df['DISTRICT'].replace({
    'RUKUM_E': 'RUKUM',
    'RUKUM_W': 'RUKUM',
    'NAWALPARASI_E': 'NAWALPARASI',
    'NAWALPARASI_W': 'NAWALPARASI'
})

In [11]:
df.count()

YEAR           20626
MONTH          20626
DAY            20626
DISTRICT       20148
Brightness     20626
Confidence     20626
ThermalData    20626
FRP            20626
DayNight       20626
dtype: int64

In [12]:
output_path = os.path.join("..", "data", "raw", "modis", "modis_2012_2017_all_districts.csv")
df.to_csv(output_path, index=False)

In [13]:
df.isnull().sum()

df.dropna(inplace=True)

df.isnull().sum()




YEAR           0
MONTH          0
DAY            0
DISTRICT       0
Brightness     0
Confidence     0
ThermalData    0
FRP            0
DayNight       0
dtype: int64

In [14]:
df.count()

YEAR           20148
MONTH          20148
DAY            20148
DISTRICT       20148
Brightness     20148
Confidence     20148
ThermalData    20148
FRP            20148
DayNight       20148
dtype: int64

In [15]:
output_path = os.path.join("..", "data", "raw", "modis", "modis_2012_2017_all_districts.csv")
df.to_csv(output_path, index=False)

In [16]:
df.nunique()

YEAR              6
MONTH            12
DAY              31
DISTRICT         75
Brightness      694
Confidence      100
ThermalData     491
FRP            1115
DayNight          2
dtype: int64

In [17]:
import geopandas as gpd

climate_path = os.path.join("..", "data", "processed", "flitered_climate_data.csv")
df_climate = gpd.read_file(climate_path)
df_climate.head()

Unnamed: 0,YEAR,MONTH,DISTRICT,LAT,LON,Prep,AvgTemp,MaxTemp,Humidity,WindSpeed
0,2012,1,Arghakhanchi,27.9,83.2,36.46,12.78,19.09,51.21,2.23
1,2012,2,Arghakhanchi,27.9,83.2,10.93,16.4,23.9,39.39,2.85
2,2012,3,Arghakhanchi,27.9,83.2,17.71,20.72,28.99,28.13,3.04
3,2012,4,Arghakhanchi,27.9,83.2,14.04,26.91,34.38,27.54,3.61
4,2012,5,Arghakhanchi,27.9,83.2,7.53,30.94,38.31,19.34,3.14


In [18]:
print(df_climate['DISTRICT'].unique())

['Arghakhanchi' 'Baglung' 'Baitadi' 'Bajang' 'Banke' 'Bara' 'Bardiya'
 'Bhaktapur' 'Chitawan' 'Dadeldhura' 'Dailekh' 'Dang' 'Darchula' 'Dhading'
 'Dhankuta' 'Dhanusa' 'Dolkha' 'Dolpa' 'Doti' 'Gorkha' 'Gulmi' 'Humla'
 'Ilam' 'Jhapa' 'Jumla' 'Kabhre' 'Kailali' 'Kanchanpur' 'Kaski'
 'Kathmandu' 'Lalitpur' 'Lamjung' 'Mahottari' 'Makwanpur' 'Manang'
 'Morang' 'Mugu' 'Mustang' 'Myagdi' 'Nawalparasi' 'Nuwakot' 'Okhaldhunga'
 'Palpa' 'Panchther' 'Parbat' 'Rasuwa' 'Routahat' 'Rukum' 'Rupandehi'
 'Salyan' 'Sankhuwasabha' 'Saptari' 'Sarlahi' 'Sindhuli' 'Solukhumbu'
 'Sunsari' 'Surkhet' 'Syangja' 'Tanahun' 'Taplejung' 'Terhathum'
 'Udayapur']


In [19]:
fire_data_path = os.path.join("..", "data", "raw", "modis", "modis_2012_2017_all_districts.csv")
df_fire = pd.read_csv(fire_data_path)
df_fire.head()

Unnamed: 0,YEAR,MONTH,DAY,DISTRICT,Brightness,Confidence,ThermalData,FRP,DayNight
0,2012,1,5,NAWALPARASI,306.8,64,292.8,6.0,D
1,2012,1,6,JAJARKOT,301.2,24,279.2,7.0,D
2,2012,1,7,SUNSARI,316.2,75,298.4,9.8,D
3,2012,1,14,RAUTAHAT,302.5,53,292.0,3.3,D
4,2012,1,14,SAPTARI,309.6,67,294.3,6.6,D


In [20]:
df_fire.drop(columns=['DayNight'],inplace=True,errors='coerce')

In [21]:
df_fire.head()

Unnamed: 0,YEAR,MONTH,DAY,DISTRICT,Brightness,Confidence,ThermalData,FRP
0,2012,1,5,NAWALPARASI,306.8,64,292.8,6.0
1,2012,1,6,JAJARKOT,301.2,24,279.2,7.0
2,2012,1,7,SUNSARI,316.2,75,298.4,9.8
3,2012,1,14,RAUTAHAT,302.5,53,292.0,3.3
4,2012,1,14,SAPTARI,309.6,67,294.3,6.6


In [22]:

# Group by YEAR, MONTH, DISTRICT and compute meaningful statistics
aggregated_df = df_fire.groupby(['YEAR', 'MONTH', 'DISTRICT']).agg({
    'Brightness': 'mean',       # average intensity
    'Confidence': 'mean',       # average confidence
    'ThermalData': 'mean',      # average thermal reading
    'FRP': 'sum',               # total fire radiative power for month
    
}).reset_index()

# Rename the DayNight column to something meaningful
aggregated_df = aggregated_df.rename(columns={'DayNight': 'DayNight_Unique'})

# Add Fire_Count column
fire_counts = df_fire.groupby(['YEAR', 'MONTH', 'DISTRICT']).size().reset_index(name='Fire_Count')

# Merge fire count into the aggregated dataframe
final_df = pd.merge(aggregated_df, fire_counts, on=['YEAR', 'MONTH', 'DISTRICT'])

# View result
print(final_df.head())


   YEAR  MONTH  DISTRICT  Brightness  Confidence  ThermalData    FRP  \
0  2012      1   BARDIYA  313.650000   74.000000   291.350000   96.6   
1  2012      1  CHITAWAN  317.728571   67.142857   292.457143  221.0   
2  2012      1     DOLPA  314.487500   55.250000   292.150000  179.0   
3  2012      1    GORKHA  314.300000   60.600000   288.980000  345.5   
4  2012      1  JAJARKOT  303.633333   32.333333   288.100000   30.1   

   Fire_Count  
0           2  
1           7  
2           8  
3           5  
4           3  


In [23]:
output_path = os.path.join("..", "data", "processed", "filtered_fire_data.csv")
final_df.to_csv(output_path, index=False)

In [24]:
final_df.count()


YEAR           1608
MONTH          1608
DISTRICT       1608
Brightness     1608
Confidence     1608
ThermalData    1608
FRP            1608
Fire_Count     1608
dtype: int64

In [25]:
final_df['Fire_Count'].value_counts()

Fire_Count
1      430
2      227
3      164
4      117
6       72
      ... 
106      1
120      1
154      1
74       1
76       1
Name: count, Length: 114, dtype: int64

In [26]:
fire_path = os.path.join("..", "data", "processed", "filtered_fire_data.csv")
fire_df = pd.read_csv(fire_path)

In [27]:
print(fire_df['Fire_Count'].sum())

20148


In [28]:

# File paths (relative for portability)
climate_path = os.path.join("..", "data", "processed", "flitered_climate_data.csv")
fire_path = os.path.join("..", "data", "processed", "filtered_fire_data.csv")

# Load datasets
climate_df = pd.read_csv(climate_path)
fire_df = pd.read_csv(fire_path)

# Normalize DISTRICT column: strip whitespace and lowercase
climate_df['DISTRICT'] = climate_df['DISTRICT'].str.strip().str.lower()
fire_df['DISTRICT'] = fire_df['DISTRICT'].str.strip().str.lower()

# Merge on YEAR, MONTH, DISTRICT
combined_df = pd.merge(
    climate_df,
    fire_df,
    on=['YEAR', 'MONTH', 'DISTRICT'],
    how='left'
)

# Fill fire-related columns with 0 where data is missing
fire_columns = ['Brightness', 'Confidence', 'ThermalData', 'FRP', 'Fire_Count']
for col in fire_columns:
    if col in combined_df.columns:
        combined_df[col] = combined_df[col].fillna(0)
    else:
        combined_df[col] = 0

output_path = os.path.join("..", "data", "processed", "combined_fire_climate.csv")
combined_df.to_csv(output_path, index=False)


In [29]:
# Merge fire_df with combined_df to add LAT and LON based on YEAR, MONTH, DISTRICT
fire_with_location = pd.merge(
    fire_df,
    combined_df[['YEAR', 'MONTH', 'DISTRICT', 'LAT', 'LON']],
    on=['YEAR', 'MONTH', 'DISTRICT'],
    how='left'
)

# Save the result to a new file
output_path = os.path.join("..", "data", "processed", "filtered_fire_with_location.csv")
fire_with_location.to_csv(output_path, index=False)