In [94]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib
import json

In [95]:
df = pd.read_csv('forestfires.csv')

In [96]:
print("Loaded rows:", len(df))
print(df.head())

Loaded rows: 517
   X  Y month  day  FFMC   DMC     DC  ISI  temp  RH  wind  rain  area
0  7  5   mar  fri  86.2  26.2   94.3  5.1   8.2  51   6.7   0.0   0.0
1  7  4   oct  tue  90.6  35.4  669.1  6.7  18.0  33   0.9   0.0   0.0
2  7  4   oct  sat  90.6  43.7  686.9  6.7  14.6  33   1.3   0.0   0.0
3  8  6   mar  fri  91.7  33.3   77.5  9.0   8.3  97   4.0   0.2   0.0
4  8  6   mar  sun  89.3  51.3  102.2  9.6  11.4  99   1.8   0.0   0.0


In [97]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 517 entries, 0 to 516
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   X       517 non-null    int64  
 1   Y       517 non-null    int64  
 2   month   517 non-null    object 
 3   day     517 non-null    object 
 4   FFMC    517 non-null    float64
 5   DMC     517 non-null    float64
 6   DC      517 non-null    float64
 7   ISI     517 non-null    float64
 8   temp    517 non-null    float64
 9   RH      517 non-null    int64  
 10  wind    517 non-null    float64
 11  rain    517 non-null    float64
 12  area    517 non-null    float64
dtypes: float64(8), int64(3), object(2)
memory usage: 52.6+ KB


In [98]:
def fire_risk_score(row):
    # Normalizers are approximate max values based on dataset ranges / safe upper bounds
    # Adjust weights if you want to tune importance
    ffmc_norm = row['FFMC'] / 100.0        # FFMC: 18.7-96.2 approx -> normalize to 0-1
    dmc_norm  = row['DMC'] / 300.0         # DMC: up to ~291
    dc_norm   = row['DC'] / 900.0          # DC: up to ~860
    isi_norm  = row['ISI'] / 60.0          # ISI: up to ~56

    # Weights: adjust to emphasis ISI and FFMC more for spread/ignition
    score = (ffmc_norm * 3.0) + (dmc_norm * 2.0) + (dc_norm * 2.0) + (isi_norm * 3.0)
    # score nominal range roughly 0 - 10 (but clamp to 1-10)
    score_scaled = int(np.clip(round(score), 1, 10))
    return score_scaled

In [99]:
# Create risk_score column
df['risk_score'] = df.apply(fire_risk_score, axis=1)
print("Risk score distribution:\n", df['risk_score'].value_counts().sort_index())

Risk score distribution:
 risk_score
1      1
2      4
3     61
4     59
5    147
6    218
7     27
Name: count, dtype: int64


In [100]:
le_month = LabelEncoder()
le_day   = LabelEncoder()

In [101]:
df['month_enc'] = le_month.fit_transform(df['month'])
df['day_enc']   = le_day.fit_transform(df['day'])

# Save encoders (for later inference)
joblib.dump(le_month, "month_encoder.pkl")
joblib.dump(le_day, "day_encoder.pkl")
print("Saved month/day encoders")


Saved month/day encoders


In [102]:
features = ['X','Y','month_enc','day_enc','FFMC','DMC','DC','ISI','temp','RH','wind','rain']
X = df[features].copy()
y = df['risk_score'].copy()   # labels 1..10

In [104]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=42
)
print("Train:", X_train.shape, "Test:", X_test.shape)

Train: (413, 12) Test: (104, 12)


In [66]:
model = RandomForestClassifier(n_estimators=200)
model.fit(X_train, y_train)

0,1,2
,n_estimators,200
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [105]:
print("Train:", X_train.shape, "Test:", X_test.shape)

Train: (413, 12) Test: (104, 12)


In [106]:
rf = RandomForestClassifier(n_estimators=200, random_state=42, n_jobs=-1)
rf.fit(X_train, y_train)

0,1,2
,n_estimators,200
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [107]:
joblib.dump(rf, "fire_risk_rf_1to10.pkl")
print("Saved RF model to fire_risk_rf_1to10.pkl")

Saved RF model to fire_risk_rf_1to10.pkl


In [108]:
y_pred = rf.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {acc:.4f}\n")
print("Classification report (1..10):")
print(classification_report(y_test, y_pred, digits=4))
print("Confusion matrix (rows=true labels, cols=predicted):")
print(confusion_matrix(y_test, y_pred))

Test Accuracy: 0.9712

Classification report (1..10):
              precision    recall  f1-score   support

           3     1.0000    1.0000    1.0000        13
           4     0.9231    1.0000    0.9600        12
           5     1.0000    0.9643    0.9818        28
           6     0.9796    0.9796    0.9796        49
           7     0.5000    0.5000    0.5000         2

    accuracy                         0.9712       104
   macro avg     0.8805    0.8888    0.8843       104
weighted avg     0.9719    0.9712    0.9713       104

Confusion matrix (rows=true labels, cols=predicted):
[[13  0  0  0  0]
 [ 0 12  0  0  0]
 [ 0  0 27  1  0]
 [ 0  0  0 48  1]
 [ 0  1  0  0  1]]


In [109]:
def score_to_bucket(score):
    # return textual bucket and color
    s = int(score)
    if s <= 3:
        return "Low", "green"
    elif s <= 6:
        return "Medium", "yellow"
    elif s <= 8:
        return "High", "orange"
    else:
        return "Extreme", "red"

print("\nExample buckets:")
for s in [1,3,4,6,7,9,10]:
    print(s, "->", score_to_bucket(s))


Example buckets:
1 -> ('Low', 'green')
3 -> ('Low', 'green')
4 -> ('Medium', 'yellow')
6 -> ('Medium', 'yellow')
7 -> ('High', 'orange')
9 -> ('Extreme', 'red')
10 -> ('Extreme', 'red')


In [110]:
def predict_risk_from_dict(sample):
    """
    sample: dict with keys:
      X, Y, month (e.g. 'aug' or int-encoded), day (e.g. 'sun' or int-encoded),
      FFMC, DMC, DC, ISI, temp, RH, wind, rain
    returns: dict { 'score': int(1..10), 'bucket': str, 'color': str }
    """
    s = sample.copy()
    # encode month/day if strings
    if isinstance(s.get('month'), str):
        s['month_enc'] = le_month.transform([s['month']])[0]
    else:
        s['month_enc'] = int(s.get('month', 0))
    if isinstance(s.get('day'), str):
        s['day_enc'] = le_day.transform([s['day']])[0]
    else:
        s['day_enc'] = int(s.get('day', 0))

    ordered = [
        s.get('X', 1), s.get('Y', 2), s['month_enc'], s['day_enc'],
        s.get('FFMC', 50), s.get('DMC', 50), s.get('DC', 50), s.get('ISI', 1),
        s.get('temp', 20), s.get('RH', 40), s.get('wind', 1), s.get('rain', 0)
    ]
    pred = rf.predict([ordered])[0]
    bucket, color = score_to_bucket(pred)
    return {'score': int(pred), 'bucket': bucket, 'color': color}


In [111]:
# ----------------------------
# 10) Test predictions on sample inputs (use your earlier testcases)
# ----------------------------
sample_inputs = [
    {"X":3,"Y":4,"month":"jan","day":"mon","FFMC":20.0,"DMC":2.0,"DC":10,"ISI":0.5,"temp":5,"RH":80,"wind":1.0,"rain":0.5},
    {"X":5,"Y":6,"month":"may","day":"thu","FFMC":65,"DMC":80,"DC":300,"ISI":5,"temp":20,"RH":40,"wind":4,"rain":0},
    {"X":7,"Y":8,"month":"aug","day":"sun","FFMC":90,"DMC":200,"DC":700,"ISI":20,"temp":33,"RH":20,"wind":7,"rain":0},
    {"X":2,"Y":9,"month":"sep","day":"fri","FFMC":80,"DMC":120,"DC":550,"ISI":30,"temp":28,"RH":35,"wind":9,"rain":0},
    {"X":1,"Y":3,"month":"nov","day":"wed","FFMC":25,"DMC":5,"DC":20,"ISI":0,"temp":10,"RH":90,"wind":1,"rain":3},
    {"X":8,"Y":5,"month":"jul","day":"sat","FFMC":75,"DMC":100,"DC":400,"ISI":10,"temp":26,"RH":30,"wind":6,"rain":0}
]

print("\nSample predictions:")
for s in sample_inputs:
    out = predict_risk_from_dict(s)
    print(s['month'], s['day'], "-> score:", out['score'], "bucket:", out['bucket'], "color:", out['color'])



Sample predictions:
jan mon -> score: 2 bucket: Low color: green
may thu -> score: 4 bucket: Medium color: yellow
aug sun -> score: 6 bucket: Medium color: yellow
sep fri -> score: 5 bucket: Medium color: yellow




nov wed -> score: 3 bucket: Low color: green
jul sat -> score: 5 bucket: Medium color: yellow


