### Easy Ensemble Classifier

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import balanced_accuracy_score
from sklearn.preprocessing import OneHotEncoder
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from imblearn.ensemble import EasyEnsembleClassifier

from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced


# Import our input dataset
data = Path('Resources_k/df7_k.csv')
df = pd.read_csv(data)
df

Unnamed: 0,fire_id,state_no,discovery_month_no,Temp_pre_7,Wind_pre_7,Hum_pre_7,fire_size_bin_no,medium_plus
0,3,29,6,18.142564,3.671282,35.353846,1,0
1,24,22,4,15.604790,1.775904,59.614458,3,0
2,31,25,4,-4.273834,6.658621,71.173116,1,0
3,35,8,10,18.188679,2.211429,60.328571,3,0
4,36,28,7,34.893333,3.695833,18.208333,5,1
...,...,...,...,...,...,...,...,...
13133,55336,39,8,28.719917,3.224274,55.426471,5,1
13134,55337,39,10,24.221869,1.563817,63.196819,5,1
13135,55341,33,9,10.734328,1.835821,67.266304,6,1
13136,55342,23,3,7.678571,3.803571,42.638384,5,1


In [2]:
df=df.drop(columns=['fire_id', 'fire_size_bin_no'])

In [3]:
# Remove loan status target from features data
target=df.medium_plus
y=target
# X = df.loc[:, df.columns !=target]
X=df.drop(columns=['medium_plus']).values

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [4]:
X_test_scaled

array([[ 1.49691652,  0.33288158,  1.81061152,  0.42225824, -2.42711523],
       [ 1.79469202, -0.79577714, -1.3073249 ,  0.76553512,  1.30590304],
       [-1.10861914,  0.70910116,  0.41341934, -0.78454843,  1.48544833],
       ...,
       [ 1.19914102,  2.21397946, -0.74605266, -0.17139494,  0.16396625],
       [-1.10861914,  0.33288158, -0.22001741,  0.53030658,  1.40623668],
       [-0.88528751,  1.46154031,  0.3632356 , -0.90650679,  0.68645195]])

In [5]:
# Train the EasyEnsembleClassifier

model = EasyEnsembleClassifier(n_estimators=10, random_state=78) 

#Fitting the model
model = model.fit(X_train_scaled, y_train)

# Making predictions using the testing data.
y_pred = model.predict(X_test_scaled)

In [6]:
# Calculated the balanced accuracy score
ee_acc_score = balanced_accuracy_score(y_test, y_pred)
ee_acc_score

0.7576531728665208

In [7]:
# Calculating the confusion matrix
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(
    cm, index=["Actual less than medium", "Actual medium plus"], columns=["Predicted less than medium", "Predicted medium +"])

In [8]:
# Displaying Easy Ensemble Classifier
print("Easy Ensemble Classifier")
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {ee_acc_score}")
print("Classification Report")
print(classification_report_imbalanced(y_test, y_pred))

Easy Ensemble Classifier
Confusion Matrix


Unnamed: 0,Predicted less than medium,Predicted medium +
Actual less than medium,1783,502
Actual medium plus,265,735


Accuracy Score : 0.7576531728665208
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

          0       0.87      0.78      0.73      0.82      0.76      0.58      2285
          1       0.59      0.73      0.78      0.66      0.76      0.57      1000

avg / total       0.79      0.77      0.75      0.77      0.76      0.57      3285



In [17]:
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test}).reset_index(drop=True)
results

Unnamed: 0,Prediction,Actual
0,1,0
1,0,0
2,0,0
3,0,0
4,0,0
...,...,...
3280,0,0
3281,0,0
3282,0,0
3283,0,1


In [21]:
import joblib
joblib.dump(model, 'model_joblib_ee.joblib')
job=joblib.load('model_joblib_ee.joblib')

In [26]:
x1=X_scaler.transform([[29, 6, 18.14, 3.67, 35.35]])
x1

array([[ 0.67803388, -0.04333799,  0.12115669,  0.49750246, -1.2234262 ]])

In [14]:
model.predict(x1)

array([1], dtype=int64)

In [15]:
x2=X_scaler.transform([[22, 4, 15.6, 1.77, 59.61]])

In [16]:
model.predict(x2)

array([0], dtype=int64)

In [22]:
x3=X_scaler.transform([[25, 4, -4.27, 6.65, 71.17]])
x3

array([[ 0.38025838, -0.79577714, -2.58939372,  2.89094514,  0.96901776]])

In [20]:
model.predict(x3)

array([0], dtype=int64)

In [24]:
job.predict([[ 0.38025838, -0.79577714, -2.58939372,  2.89094514,  0.96901776]])

array([0], dtype=int64)

In [27]:
job.predict([[ 0.67803388, -0.04333799,  0.12115669,  0.49750246, -1.2234262 ]])

array([1], dtype=int64)

-------------------------------------

# Create X-Scalar

In [None]:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import balanced_accuracy_score
from sklearn.preprocessing import OneHotEncoder
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from imblearn.ensemble import EasyEnsembleClassifier
import joblib

from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced


# Import our input dataset
data = Path('Resources_k/df7_k.csv')
df = pd.read_csv(data)
df

Unnamed: 0,fire_id,state_no,discovery_month_no,Temp_pre_7,Wind_pre_7,Hum_pre_7,fire_size_bin_no,medium_plus
0,3,29,6,18.142564,3.671282,35.353846,1,0
1,24,22,4,15.604790,1.775904,59.614458,3,0
2,31,25,4,-4.273834,6.658621,71.173116,1,0
3,35,8,10,18.188679,2.211429,60.328571,3,0
4,36,28,7,34.893333,3.695833,18.208333,5,1
...,...,...,...,...,...,...,...,...
13133,55336,39,8,28.719917,3.224274,55.426471,5,1
13134,55337,39,10,24.221869,1.563817,63.196819,5,1
13135,55341,33,9,10.734328,1.835821,67.266304,6,1
13136,55342,23,3,7.678571,3.803571,42.638384,5,1


In [None]:
df=df.drop(columns=['fire_id', 'fire_size_bin_no'])
df

Unnamed: 0,state_no,discovery_month_no,Temp_pre_7,Wind_pre_7,Hum_pre_7,medium_plus
0,29,6,18.142564,3.671282,35.353846,0
1,22,4,15.604790,1.775904,59.614458,0
2,25,4,-4.273834,6.658621,71.173116,0
3,8,10,18.188679,2.211429,60.328571,0
4,28,7,34.893333,3.695833,18.208333,1
...,...,...,...,...,...,...
13133,39,8,28.719917,3.224274,55.426471,1
13134,39,10,24.221869,1.563817,63.196819,1
13135,33,9,10.734328,1.835821,67.266304,1
13136,23,3,7.678571,3.803571,42.638384,1


In [None]:
# Remove loan status target from features data
target = df.medium_plus
y = target
# X = df.loc[:, df.columns !=target]
X = df.drop(columns=['medium_plus']).values

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

X_scaler


StandardScaler()

In [None]:


joblib.dump(X_scaler, 'X_scaler.save')




['X_scaler.save']

In [None]:
state discovery_month  Temp_pre_7  Wind_pre_7  Hum_pre_7
0     1               3        12.0        14.0        1.0


In [None]:
input_data=[1,3,12.0,14.0,1.0]

input_df = pd.DataFrame([input_data], columns=["state_no","discovery_month_no","Temp_pre_7","Wind_pre_7","Hum_pre_7"])
input_df


Unnamed: 0,state_no,discovery_month_no,Temp_pre_7,Wind_pre_7,Hum_pre_7
0,1,3,12.0,14.0,1.0


In [None]:
input_df.dtypes

state_no                int64
discovery_month_no      int64
Temp_pre_7            float64
Wind_pre_7            float64
Hum_pre_7             float64
dtype: object

In [None]:
# Remove loan status target from features data
#target=df.medium_plus
#y=target
# X = df.loc[:, df.columns !=target]
X=input_df.values

# Split training/test datasets
#X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

# Create a StandardScaler instance
#scaler = StandardScaler()

# Fit the StandardScaler
#X_scaler = scaler.fit(X)
X_scaler = joblib.load("X_scaler.save")
# Scale the data
X_scaled = X_scaler.transform(X)


In [None]:
X_scaled

array([[-1.40639464, -1.17199672, -0.621493  ,  8.79423497, -3.32589548]])