In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
import plotly.express as px

In [2]:
train_source = "https://github.com/prodramp/wildfire/raw/main/california-data/ca_fire_train.csv.zip"
valid_source = "https://github.com/prodramp/wildfire/raw/main/california-data/ca_fire_valid.csv.zip"
test_source = "https://github.com/prodramp/wildfire/raw/main/california-data/ca_fire_test.csv.zip"

In [3]:
train = pd.read_csv(train_source)
valid = pd.read_csv(valid_source)
test = pd.read_csv(test_source)

In [4]:
features = [
    'latitude', 'longitude', 'month',
    'fire_cnt_before', 'fire_before',
    'fire_cnt_last_year', 'fire_last_year',
    'fire_cnt_last_year_same_month', 'fire_last_year_same_month'
]

In [5]:
X_train = train[features]
y_train = train.fire

X_val, X_test, y_val, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42)


In [6]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [7]:
y_pred_val = model.predict(X_val)
y_pred_test = model.predict(X_test)


In [8]:
accuracy_val = accuracy_score(y_val, y_pred_val)
f1_val = f1_score(y_val, y_pred_val)
auc_val = roc_auc_score(y_val, y_pred_val)

In [9]:
print(f"Validation Accuracy: {accuracy_val}")
print(f"Validation F1-score: {f1_val}")
print(f"Validation ROC AUC: {auc_val}")

Validation Accuracy: 0.9973138887819267
Validation F1-score: 0.9677834690850057
Validation ROC AUC: 0.9725495100796823


In [10]:
accuracy_test = accuracy_score(y_test, y_pred_test)
f1_test = f1_score(y_test, y_pred_test)
auc_test = roc_auc_score(y_test, y_pred_test)

print(f"\nTest Accuracy: {accuracy_test}")
print(f"Test F1-score: {f1_test}")
print(f"Test ROC AUC: {auc_test}")


Test Accuracy: 0.9974235826203846
Test F1-score: 0.9690200920417554
Test ROC AUC: 0.97343897037508


In [12]:
from sklearn.metrics import roc_curve

In [13]:
fpr, tpr, thr = roc_curve(y_test, y_pred_test)
fig = px.line(pd.DataFrame(dict(FPR=fpr, TPR=tpr)), x='FPR', y='TPR', title='Wildfire Hotspot Model Performance')
fig.show()