In [51]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


In [52]:
df = pd.read_csv('Algerian_forest_fires_dataset_UPDATE.csv')

In [53]:
df.head()

Unnamed: 0,name,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI,Classes
0,1,6,2012,29,57,18,0.0,65.7,3.4,7.6,1.3,3.4,0.5,not fire
1,2,6,2012,29,61,13,1.3,64.4,4.1,7.6,1.0,3.9,0.4,not fire
2,3,6,2012,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0.1,not fire
3,4,6,2012,25,89,13,2.5,28.6,1.3,6.9,0.0,1.7,0.0,not fire
4,5,6,2012,27,77,16,0.0,64.8,3.0,14.2,1.2,3.9,0.5,not fire


In [54]:
df.columns

Index(['name', 'month', 'year', 'Temperature', ' RH', ' Ws', 'Rain ', 'FFMC',
       'DMC', 'DC', 'ISI', 'BUI', 'FWI', 'Classes  '],
      dtype='object')

In [55]:
dfdropped = df.drop(['name','month','year',' RH','Rain ','DC','BUI','FWI'],axis=1)

In [56]:
dfdropped.head()

Unnamed: 0,Temperature,Ws,FFMC,DMC,ISI,Classes
0,29,18,65.7,3.4,1.3,not fire
1,29,13,64.4,4.1,1.0,not fire
2,26,22,47.1,2.5,0.3,not fire
3,25,13,28.6,1.3,0.0,not fire
4,27,16,64.8,3.0,1.2,not fire


In [57]:
dfdropped.rename(columns={' Ws':'windspeed','Classes  ':'class','ISI':'initialspreadindex','FFMC':'fuelmoisturecode','DMC':'duffmoisturecode'}, inplace=True)
dfdropped.head()

Unnamed: 0,Temperature,windspeed,fuelmoisturecode,duffmoisturecode,initialspreadindex,class
0,29,18,65.7,3.4,1.3,not fire
1,29,13,64.4,4.1,1.0,not fire
2,26,22,47.1,2.5,0.3,not fire
3,25,13,28.6,1.3,0.0,not fire
4,27,16,64.8,3.0,1.2,not fire


In [58]:
dfdropped['class'].unique()

array(['not fire   ', 'fire   ', 'fire', 'fire ', 'not fire', 'not fire '],
      dtype=object)

In [59]:
dfdropped['class'].replace({
    'not fire   ': 'notfire',
    'fire   ': 'fire',
    'fire ': 'fire',
    'not fire ':'notfire',
    'not fire':'notfire'},inplace=True   
)
dfdropped.head()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dfdropped['class'].replace({


Unnamed: 0,Temperature,windspeed,fuelmoisturecode,duffmoisturecode,initialspreadindex,class
0,29,18,65.7,3.4,1.3,notfire
1,29,13,64.4,4.1,1.0,notfire
2,26,22,47.1,2.5,0.3,notfire
3,25,13,28.6,1.3,0.0,notfire
4,27,16,64.8,3.0,1.2,notfire


In [60]:
dfdropped['class'] = dfdropped['class'].map({'fire':1,'notfire':0})
dfdropped.head(10)

Unnamed: 0,Temperature,windspeed,fuelmoisturecode,duffmoisturecode,initialspreadindex,class
0,29,18,65.7,3.4,1.3,0
1,29,13,64.4,4.1,1.0,0
2,26,22,47.1,2.5,0.3,0
3,25,13,28.6,1.3,0.0,0
4,27,16,64.8,3.0,1.2,0
5,31,14,82.6,5.8,3.1,1
6,33,13,88.2,9.9,6.4,1
7,30,15,86.6,12.1,5.6,1
8,25,13,52.9,7.9,0.4,0
9,28,12,73.2,9.5,1.3,0


In [61]:
dfdropped['class'].unique()

array([0, 1])

In [62]:
X = dfdropped.drop('class',axis=1)
y = dfdropped['class']

In [63]:
X

Unnamed: 0,Temperature,windspeed,fuelmoisturecode,duffmoisturecode,initialspreadindex
0,29,18,65.7,3.4,1.3
1,29,13,64.4,4.1,1.0
2,26,22,47.1,2.5,0.3
3,25,13,28.6,1.3,0.0
4,27,16,64.8,3.0,1.2
...,...,...,...,...,...
117,31,11,82.0,6.0,2.5
118,31,11,85.7,8.3,4.0
119,32,14,77.5,7.1,1.8
120,26,16,47.4,2.9,0.3


In [75]:
X

Unnamed: 0,Temperature,windspeed,fuelmoisturecode,duffmoisturecode,initialspreadindex
0,0.466667,0.466667,0.601297,0.050467,0.104
1,0.466667,0.133333,0.580227,0.063551,0.080
2,0.266667,0.733333,0.299838,0.033645,0.024
3,0.200000,0.133333,0.000000,0.011215,0.000
4,0.333333,0.333333,0.586710,0.042991,0.096
...,...,...,...,...,...
117,0.600000,0.000000,0.865478,0.099065,0.200
118,0.600000,0.000000,0.925446,0.142056,0.320
119,0.666667,0.200000,0.792545,0.119626,0.144
120,0.266667,0.333333,0.304700,0.041121,0.024


In [64]:
y
columnstoscale = ['Temperature','windspeed','fuelmoisturecode','duffmoisturecode','initialspreadindex']

In [65]:
scaler = MinMaxScaler()
scaler.fit(X[columnstoscale])
X[columnstoscale] = scaler.transform(X[columnstoscale])
X

Unnamed: 0,Temperature,windspeed,fuelmoisturecode,duffmoisturecode,initialspreadindex
0,0.466667,0.466667,0.601297,0.050467,0.104
1,0.466667,0.133333,0.580227,0.063551,0.080
2,0.266667,0.733333,0.299838,0.033645,0.024
3,0.200000,0.133333,0.000000,0.011215,0.000
4,0.333333,0.333333,0.586710,0.042991,0.096
...,...,...,...,...,...
117,0.600000,0.000000,0.865478,0.099065,0.200
118,0.600000,0.000000,0.925446,0.142056,0.320
119,0.666667,0.200000,0.792545,0.119626,0.144
120,0.266667,0.333333,0.304700,0.041121,0.024


In [66]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

In [67]:
model_rf = RandomForestRegressor(n_estimators=100, random_state=10)
model_rf.fit(X_train, y_train)

In [68]:
model_rf.score(X_test,y_test)

0.9849675324675324

In [69]:
bestmodel = model_rf

In [70]:
if hasattr(scaler, 'min_') and hasattr(scaler, 'scale_'):
    print("The scaler is fitted.")
else:
    print("The scaler is not fitted.")

The scaler is fitted.


In [71]:
from joblib import dump

dump(bestmodel, "artifacts/model.joblib")

scaler_with_columns = {
    'scalertest':scaler,
    'columnstoscale':columnstoscale
}
dump(scaler_with_columns, "artifacts/scaler.joblib")

['artifacts/scaler.joblib']

In [72]:
ypred = model_rf.predict(X_test)
ypred

array([0.14, 0.  , 0.  , 0.  , 1.  , 0.73, 0.  , 0.  , 1.  , 0.  , 0.  ,
       0.01, 0.  , 0.  , 1.  , 0.  , 1.  , 1.  , 1.  , 0.  , 1.  , 1.  ,
       1.  , 0.  , 1.  ])