In [None]:
!pip install joblib
!pip install imbalanced-learn
!pip install folium geopandas



# Imports

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import geopandas as gpd
import matplotlib.pyplot as plt
import pickle
import json
import joblib


from shapely.geometry import Polygon
from shapely.geometry import Point
from collections import Counter
from sklearn.preprocessing import LabelEncoder
from imblearn.combine import SMOTEENN, SMOTETomek
from imblearn.metrics import geometric_mean_score
from sklearn.svm import SVC
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.utils import compute_class_weight
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
from pyproj import CRS, Transformer
from shapely.geometry import Polygon
from google.colab import drive, files

drive.mount('/content/drive')


# Data Preprocessing and Model Training


## **[Data Preprocessing](https://)**

In [2]:
# Load and filter the firepoints data
selected_cols = ['latitude', 'longitude', 'bright_ti4', 'scan', 'track', 'confidence', 'frp', 'type', 'daynight', 'formatted_acq_dt']
df = pd.read_csv('/content/drive/MyDrive/filtered_feb-apr_2012-2021_firepoints_altered_dt.csv', usecols=selected_cols)
df = df[df['type'] == 0]
df.drop(columns='type', inplace=True)
df['bright_ti4'] = df['bright_ti4'] - 273.15

# Prepare df_c for classification model
df_c = df.copy()
#df_c['fire_occurred'] = 1
selected_cols = ['latitude', 'longitude', 'formatted_acq_dt']
df_c = df_c[selected_cols]

# Read and combine weather data
tmd_years = [2012, 2014, 2017, 2018, 2019, 2020]
w_df = pd.DataFrame()
for year in tmd_years:
    df = pd.read_csv(f"/content/drive/MyDrive/tmd_wunderground_30_minute/tmd_wunderground_weather_points_{year}.csv")
    w_df = pd.concat([w_df, df], ignore_index=True)

missing_years = [year for year in range(2012, 2022) if year not in tmd_years]
for year in missing_years:
    df = pd.read_csv(f"/content/drive/MyDrive/wunderground_30_minute/wunderground_weather_points_{year}.csv")
    w_df = pd.concat([w_df, df], ignore_index=True)

# Convert columns to datetime objects
df_c['formatted_acq_dt'] = pd.to_datetime(df_c['formatted_acq_dt'], format='%Y%m%d%H%M%S', errors='coerce')
w_df['datetime'] = pd.to_datetime(w_df['datetime'], format='%Y%m%d%H%M%S', errors='coerce')

# Round down 'formatted_acq_dt' to the nearest hour
df_c['rounded_acq_dt'] = df_c['formatted_acq_dt'].apply(lambda x: x.ceil('H') if x.minute >= 30 else x.floor('H'))

# Merge dataframes on the datetime columns
combined_df = pd.merge(df_c, w_df, left_on='rounded_acq_dt', right_on='datetime', how='inner')

# Filter out rows with NaN values in latitude and longitude
combined_df = combined_df.dropna(subset=['latitude', 'longitude'])

# Debug: Ensure all latitude and longitude values are finite numbers
combined_df = combined_df[(combined_df['latitude'].notna()) & (combined_df['longitude'].notna())]
combined_df = combined_df[(combined_df['latitude'] != 0) & (combined_df['longitude'] != 0)]
combined_df = combined_df[(combined_df['latitude'] > -90) & (combined_df['latitude'] < 90)]
combined_df = combined_df[(combined_df['longitude'] > -180) & (combined_df['longitude'] < 180)]

# Fill NA values and set column types
#combined_df['fire_occurred'].fillna(0, inplace=True)
#combined_df['fire_occurred'] = combined_df['fire_occurred'].astype(int)

# Split datetime to month, day, hour, mins columns
combined_df.drop(columns=['formatted_acq_dt'], inplace=True)
combined_df.drop_duplicates(subset=['datetime'], inplace=True)
combined_df.dropna(subset=['datetime'], inplace=True)
combined_df['year'] = combined_df['datetime'].dt.year
combined_df['month'] = combined_df['datetime'].dt.month
combined_df['day'] = combined_df['datetime'].dt.day
combined_df['hour'] = combined_df['datetime'].dt.hour
combined_df['mins'] = combined_df['datetime'].dt.minute

# Convert wind direction to numeric values
def convert_wind_direction(wind_dir):
    directions = {'N': 0, 'NNE': 22.5, 'NE': 45, 'ENE': 67.5, 'E': 90, 'ESE': 112.5, 'SE': 135, 'SSE': 157.5,
                  'S': 180, 'SSW': 202.5, 'SW': 225, 'WSW': 247.5, 'W': 270, 'WNW': 292.5, 'NW': 315, 'NNW': 337.5, 'CALM': 0, 'VAR': np.nan}
    return directions.get(wind_dir, np.nan)

combined_df['Wind'] = combined_df['Wind'].apply(convert_wind_direction)

# Drop rows with NaN values in weather features
combined_df.dropna(subset=['Wind', 'Gust_kph', 'Temperature_C', 'Pressure_hPa', 'Humidity_%'], inplace=True)

# Standardize the weather features
scaler = StandardScaler()
combined_df[['Wind', 'Gust_kph', 'Temperature_C', 'Pressure_hPa', 'Humidity_%']] = scaler.fit_transform(
    combined_df[['Wind', 'Gust_kph', 'Temperature_C', 'Pressure_hPa', 'Humidity_%']]
)


# Check the balance of the dataset
print("Class distribution in the combined dataset:")
#print(combined_df['fire_occurred'].value_counts())
print('***************************************************************************')
print('Combined_df:', combined_df)

Class distribution in the combined dataset:
***************************************************************************
Combined_df:          latitude  longitude      rounded_acq_dt            datetime  \
0       18.213800  98.420288 2012-02-03 06:00:00 2012-02-03 06:00:00   
7       18.225178  98.547394 2012-02-03 19:00:00 2012-02-03 19:00:00   
13      18.896162  98.966400 2012-02-04 06:00:00 2012-02-04 06:00:00   
34      19.701141  98.732376 2012-02-04 19:00:00 2012-02-04 19:00:00   
37      18.530241  98.238663 2012-02-05 06:00:00 2012-02-05 06:00:00   
...           ...        ...                 ...                 ...   
126819  19.938858  99.385406 2021-04-23 18:00:00 2021-04-23 18:00:00   
126822  18.885382  98.166946 2021-04-24 07:00:00 2021-04-24 07:00:00   
126858  18.329712  98.227409 2021-04-25 07:00:00 2021-04-25 07:00:00   
126884  18.308865  98.524147 2021-04-26 07:00:00 2021-04-26 07:00:00   
126888  19.566952  98.656631 2021-04-30 07:00:00 2021-04-30 07:00:00   

  

In [3]:
# Define the fire risk conditions based on the provided maximum and minimum values
def determine_fire_risk(row):
    temp = row['Temperature_C']
    wind = row['Wind']

    if temp > 0.5 and wind > 0.5:
        return 'high'
    elif -0.5 <= temp <= 0.5 and -0.5 <= wind <= 0.5:
        return 'medium'
    else:
        return 'low'


# Apply the function to each row in the dataframe
combined_df['fire_risk'] = combined_df.apply(determine_fire_risk, axis=1)

# Display the updated dataframe
print(combined_df)


         latitude  longitude      rounded_acq_dt            datetime  \
0       18.213800  98.420288 2012-02-03 06:00:00 2012-02-03 06:00:00   
7       18.225178  98.547394 2012-02-03 19:00:00 2012-02-03 19:00:00   
13      18.896162  98.966400 2012-02-04 06:00:00 2012-02-04 06:00:00   
34      19.701141  98.732376 2012-02-04 19:00:00 2012-02-04 19:00:00   
37      18.530241  98.238663 2012-02-05 06:00:00 2012-02-05 06:00:00   
...           ...        ...                 ...                 ...   
126819  19.938858  99.385406 2021-04-23 18:00:00 2021-04-23 18:00:00   
126822  18.885382  98.166946 2021-04-24 07:00:00 2021-04-24 07:00:00   
126858  18.329712  98.227409 2021-04-25 07:00:00 2021-04-25 07:00:00   
126884  18.308865  98.524147 2021-04-26 07:00:00 2021-04-26 07:00:00   
126888  19.566952  98.656631 2021-04-30 07:00:00 2021-04-30 07:00:00   

            Wind  Gust_kph  Temperature_C  Pressure_hPa  Humidity_%  year  \
0      -0.264456  0.470653       0.260653      0.949328   

In [4]:
df = combined_df

In [7]:
# Select features and target variable (excluding latitude and longitude)
features = ['Wind', 'Gust_kph', 'Temperature_C', 'Pressure_hPa', 'Humidity_%']
target = 'fire_risk'  # Adjust this according to your target column name

X = df[features]
y = df[target]

# Convert target variable to numeric
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Display the first few rows of features and encoded target
print(X.head())
print(y_encoded[:20])


         Wind  Gust_kph  Temperature_C  Pressure_hPa  Humidity_%
108 -0.264456  0.470653       0.260653      0.949328    0.314609
140 -0.264456 -1.012319      -1.098822      0.907496    1.644473
167 -0.264456  0.554753       0.378349      0.972442   -0.200082
213  0.714645 -1.012319      -1.220667      0.882183    1.925668
237  0.225095 -0.193741       0.355536      0.762198   -0.345415
[1 0 1 0 1 1 0 1 0 1 0 1 0 1 0 1 1 0 1 0]


## **Model Training**

In [10]:
# Split data into training and testing sets (70% training, 30% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, random_state=42)
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

# Save the model and scaler
joblib.dump(model, '/content/drive/MyDrive/fire_risk.pkl')
joblib.dump(scaler, '/content/drive/MyDrive/scaler.pkl')
print("Model and scaler saved successfully.")


Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       212
           1       1.00      1.00      1.00       326

    accuracy                           1.00       538
   macro avg       1.00      1.00      1.00       538
weighted avg       1.00      1.00      1.00       538

[[212   0]
 [  0 326]]
Model and scaler saved successfully.


## **Map Griding and Visualization**

**Prepare the Grid Cells with Weather Data**

In [5]:
# Load the GeoJSON polygon data for Chiang Mai
polygon_gdf = gpd.read_file('/content/drive/MyDrive/chiang_mai_polygon.geojson')

grid_size = 0.05  # This is approximately 10 km depending on latitude

# Function to create a grid over the polygon area
def create_grid(polygon_gdf, grid_size):
    bounds = polygon_gdf.total_bounds
    minx, miny, maxx, maxy = bounds
    x_grid = np.arange(minx, maxx, grid_size)
    y_grid = np.arange(miny, maxy, grid_size)

    polygons = []
    centers = []
    for x in x_grid:
        for y in y_grid:
            polygon = Polygon([(x, y), (x + grid_size, y), (x + grid_size, y + grid_size), (x, y + grid_size)])
            if polygon.intersects(polygon_gdf.unary_union):
                polygons.append(polygon)
                center = polygon.centroid
                centers.append((center.x, center.y))

    grid = gpd.GeoDataFrame({'geometry': polygons, 'center_lon': [c[0] for c in centers], 'center_lat': [c[1] for c in centers]})
    return grid

# Create grid cells
grid = create_grid(polygon_gdf, grid_size)

# Ensure grid cells are in the same CRS as the polygon
grid.set_crs(polygon_gdf.crs, inplace=True)

# Display the grid with center coordinates
print(grid.head())


                                            geometry  center_lon  center_lat
0  POLYGON ((98.01096 17.84355, 98.06096 17.84355...   98.035961   17.868553
1  POLYGON ((98.01096 17.89355, 98.06096 17.89355...   98.035961   17.918553
2  POLYGON ((98.01096 17.94355, 98.06096 17.94355...   98.035961   17.968553
3  POLYGON ((98.06096 17.59355, 98.11096 17.59355...   98.085961   17.618553
4  POLYGON ((98.06096 17.64355, 98.11096 17.64355...   98.085961   17.668553


In [7]:
# Create a GeoDataFrame from weather data
weather_gdf = gpd.GeoDataFrame(
    combined_df, geometry=gpd.points_from_xy(combined_df.longitude, combined_df.latitude))

# Function to find the closest weather point to each grid cell center
def get_closest_weather_data(grid, weather_gdf):
    grid['fire_risk'] = None
    for idx, row in grid.iterrows():
        point = Point(row['center_lon'], row['center_lat'])
        nearest_weather = weather_gdf.distance(point).idxmin()
        grid.at[idx, 'fire_risk'] = weather_gdf.loc[nearest_weather, 'fire_risk']
    return grid

# Get closest weather data for each grid cell
grid_with_weather = get_closest_weather_data(grid, weather_gdf)

# Display the updated grid with weather data
print(grid_with_weather.head())


                                            geometry  center_lon  center_lat  \
0  POLYGON ((98.01096 17.84355, 98.06096 17.84355...   98.035961   17.868553   
1  POLYGON ((98.01096 17.89355, 98.06096 17.89355...   98.035961   17.918553   
2  POLYGON ((98.01096 17.94355, 98.06096 17.94355...   98.035961   17.968553   
3  POLYGON ((98.06096 17.59355, 98.11096 17.59355...   98.085961   17.618553   
4  POLYGON ((98.06096 17.64355, 98.11096 17.64355...   98.085961   17.668553   

  fire_risk  
0    medium  
1    medium  
2    medium  
3       low  
4       low  


In [20]:
print(combined_df.columns)  # Verify that combined_df has all necessary columns

# Create a GeoDataFrame from weather data
weather_gdf = gpd.GeoDataFrame(
    combined_df, geometry=gpd.points_from_xy(combined_df.longitude, combined_df.latitude))

# Function to find the closest weather point to each grid cell center
def get_closest_weather_data(grid, weather_gdf):
    # Ensure these columns exist in the grid dataframe
    for col in ['fire_risk', 'Wind', 'Gust_kph', 'Temperature_C', 'Pressure_hPa', 'Humidity_%']:
        grid[col] = None

    for idx, row in grid.iterrows():
        point = Point(row['center_lon'], row['center_lat'])
        nearest_weather = weather_gdf.distance(point).idxmin()

        # Assign weather data to the grid cell
        for col in ['fire_risk', 'Wind', 'Gust_kph', 'Temperature_C', 'Pressure_hPa', 'Humidity_%']:
            grid.at[idx, col] = weather_gdf.loc[nearest_weather, col]

    return grid

grid_with_weather = get_closest_weather_data(grid, weather_gdf)

# Display the updated grid with weather data
print(grid_with_weather.head())

# Prepare features for prediction
features = ['Wind', 'Gust_kph', 'Temperature_C', 'Pressure_hPa', 'Humidity_%']
X_grid = grid_with_weather[features]

# Load the trained scaler
scaler = joblib.load('/content/drive/MyDrive/scaler.pkl')

# Ensure features are standardized using the same scaler used for training
X_grid = scaler.transform(X_grid)

# Load the trained model
model = joblib.load('/content/drive/MyDrive/fire_risk.pkl')

# Predict fire risk for each grid cell
grid_with_weather['predicted_fire_risk'] = model.predict(X_grid)

# Map numeric predictions back to categorical
risk_mapping = {0: 'low', 1: 'medium', 2: 'high'}
grid_with_weather['predicted_fire_risk'] = grid_with_weather['predicted_fire_risk'].map(risk_mapping)

# Display the final dataframe with predictions
print(grid_with_weather.head(20))


Index(['latitude', 'longitude', 'rounded_acq_dt', 'datetime', 'Wind',
       'Gust_kph', 'Temperature_C', 'Pressure_hPa', 'Humidity_%', 'year',
       'month', 'day', 'hour', 'mins', 'fire_risk'],
      dtype='object')
                                            geometry  center_lon  center_lat  \
0  POLYGON ((98.01096 17.84355, 98.06096 17.84355...   98.035961   17.868553   
1  POLYGON ((98.01096 17.89355, 98.06096 17.89355...   98.035961   17.918553   
2  POLYGON ((98.01096 17.94355, 98.06096 17.94355...   98.035961   17.968553   
3  POLYGON ((98.06096 17.59355, 98.11096 17.59355...   98.085961   17.618553   
4  POLYGON ((98.06096 17.64355, 98.11096 17.64355...   98.085961   17.668553   

  fire_risk      Wind  Gust_kph Temperature_C Pressure_hPa Humidity_%  \
0       low -0.264456 -0.788051     -0.994606     0.286665   1.663759   
1       low  0.225095 -0.698344     -0.813135    -0.503678   0.721506   
2       low  0.225095   0.05015      0.534933    -1.168537  -0.949304   
3       

**Visualize the Predictions on a Choropleth Map**


In [25]:
import geopandas as gpd
import folium
from folium.features import GeoJson, GeoJsonTooltip

# Convert the DataFrame to a GeoDataFrame
gdf = gpd.GeoDataFrame(grid_with_weather)

# Create a folium map centered on the average coordinates
m = folium.Map(location=[gdf['center_lat'].mean(), gdf['center_lon'].mean()], zoom_start=6)

# Define a color scale
colormap = {
    'low': '#1a9641',    # Green
    'medium': '#fdae61', # Orange
    'high': '#d7191c'    # Red
}

# Function to style the GeoJson
def style_function(feature):
    return {
        'fillColor': colormap.get(feature['properties']['fire_risk'], '#ffffff'),  # Default to white if key not found
        'color': 'black',
        'weight': 1,
        'fillOpacity': 0.7
    }

# Create GeoJson from GeoDataFrame
geojson = folium.GeoJson(
    gdf,
    style_function=style_function,
    tooltip=GeoJsonTooltip(
        fields=['fire_risk', 'center_lon', 'center_lat'],
        aliases=['Fire Risk:', 'Longitude:', 'Latitude:']
    )
)

# Add GeoJson to map
geojson.add_to(m)

# Save the map to an HTML file
m.save('fire_risk_grid_map.html')

# Display the map in the notebook
m


In [27]:
m.save('fire_risk_grid_map.html')
files.download('fire_risk_grid_map.html')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# You can Delelte the below one code. Its extra

In [24]:
import folium
import geopandas as gpd
from folium.plugins import MarkerCluster

# Convert combined_df to a GeoDataFrame
weather_gdf = gpd.GeoDataFrame(
    combined_df, geometry=gpd.points_from_xy(combined_df.longitude, combined_df.latitude))

# Ensure 'fire_risk' column is categorical for mapping
weather_gdf['fire_risk'] = pd.Categorical(weather_gdf['fire_risk'], categories=['low', 'medium', 'high'], ordered=True)

# Create a map centered on a specific location
m = folium.Map(location=[combined_df['latitude'].mean(), combined_df['longitude'].mean()], zoom_start=6)

# Add markers for each point with tooltips
marker_cluster = MarkerCluster().add_to(m)

for idx, row in weather_gdf.iterrows():
    folium.Marker(
        location=[row['latitude'], row['longitude']],
        popup=f"Fire Risk: {row['fire_risk']}<br>Latitude: {row['latitude']}<br>Longitude: {row['longitude']}",
        icon=folium.Icon(color='red' if row['fire_risk'] == 'high' else 'orange' if row['fire_risk'] == 'medium' else 'green')
    ).add_to(marker_cluster)

# Save the map to an HTML file
m.save('fire_risk_map.html')

# Display the map in the notebook (if running in a Jupyter environment)
m
