# ML - Fire, Weather and AVG PRCP Data 2008 - 2020

# Import and Cleaning Data

In [2]:
# Initial Import of Dependencies
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
import psycopg2 as pg
from collections import Counter

In [3]:
# External Database Connection
engine = pg.connect("dbname='d3r8dfuncb78iv' user='jrufhfiejfajri' host='ec2-52-200-155-213.compute-1.amazonaws.com' port='5432' password='9a7254d2151b5e3c280fe275dbba039acdc9190fbc167f64c564c449ca77af88'")
fire_df = pd.read_sql('select * from fire_data_with_avg_weather', con=engine)

In [4]:
# Check Data Types
fire_df.dtypes

fire_year          int64
district          object
unit              object
fire_number       object
fire_name         object
legal             object
latitude          object
longitude         object
fuel_model        object
county            object
report_date       object
general_cause     object
odf_acres        float64
total_acres      float64
prcp_avg         float64
snow_avg         float64
snwd_avg         float64
tmax_avg         float64
tmin_avg         float64
avg_prcp         float64
dtype: object

In [5]:
# Filter fire_year to match weather data 2008 - 2020
fire_df = fire_df[(fire_df['fire_year'] >= 2008) & (fire_df['fire_year'] <= 2020)]


In [6]:
# Confirm Count Matches Fire_weather dataset 
fire_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 12577 entries, 0 to 12576
Data columns (total 20 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   fire_year      12577 non-null  int64  
 1   district       12577 non-null  object 
 2   unit           12577 non-null  object 
 3   fire_number    12577 non-null  object 
 4   fire_name      12577 non-null  object 
 5   legal          12573 non-null  object 
 6   latitude       12569 non-null  object 
 7   longitude      12569 non-null  object 
 8   fuel_model     12511 non-null  object 
 9   county         12574 non-null  object 
 10  report_date    12577 non-null  object 
 11  general_cause  12577 non-null  object 
 12  odf_acres      12577 non-null  float64
 13  total_acres    12515 non-null  float64
 14  prcp_avg       10829 non-null  float64
 15  snow_avg       10582 non-null  float64
 16  snwd_avg       10598 non-null  float64
 17  tmax_avg       10540 non-null  float64
 18  tmin_a

In [7]:
#. Check for null values
fire_df.isna().sum()

fire_year           0
district            0
unit                0
fire_number         0
fire_name           0
legal               4
latitude            8
longitude           8
fuel_model         66
county              3
report_date         0
general_cause       0
odf_acres           0
total_acres        62
prcp_avg         1748
snow_avg         1995
snwd_avg         1979
tmax_avg         2037
tmin_avg         2037
avg_prcp         1748
dtype: int64

In [8]:
# Converting Lat/Long DMS to Decimal
pattern = r'(?P<d>[\d\.]+).*?(?P<m>[\d\.]+).*?(?P<s>[\d\.]+)'

#Lat
dms = fire_df['latitude'].str.extract(pattern).astype(float)
fire_df['Latitude'] = dms['d'] + dms['m'].div(60) + dms['s'].div(3600)

#Long  
dms = fire_df['longitude'].str.extract(pattern).astype(float)
fire_df['Longitude'] = dms['d'] + dms['m'].div(60) + dms['s'].div(3600)

# Convert all Longtiude to Negative
fire_df['Longitude'] *= -1

fire_df.head(5)

Unnamed: 0,fire_year,district,unit,fire_number,fire_name,legal,latitude,longitude,fuel_model,county,...,odf_acres,total_acres,prcp_avg,snow_avg,snwd_avg,tmax_avg,tmin_avg,avg_prcp,Latitude,Longitude
0,2008,97 - Northeast Oregon,Baker,42,Highway,T12S R36E S22 SWNW,44° 30' 42.01,-118° 16' 52,H,BAKER,...,0.5,0.5,0.0,0.0,11.375,54.285714,30.714286,0.085899,44.511669,-118.281111
1,2008,97 - Northeast Oregon,Baker,52,Price Creek,T07S R42E S20 SENE,44° 56' 30.98,-117° 35' 3.01,C,BAKER,...,0.0,0.1,0.076364,0.0,0.0,91.714286,59.285714,0.034884,44.941939,-117.584169
2,2008,97 - Northeast Oregon,Baker,53,Hunt Mountain # 179,T08S R38E S11 NWSW,44° 52' 40.01,-118° 1' 41.02,C,BAKER,...,0.1,0.1,0.076364,0.0,0.0,91.714286,59.285714,0.034884,44.877781,-118.028061
3,2008,97 - Northeast Oregon,Baker,48,China,T11S R39E S01 NENE,44° 36' 56.02,-117° 58' 16,I,BAKER,...,0.04,0.04,0.0,0.0,0.0,80.5,48.5,0.034884,44.615561,-117.971111
4,2008,97 - Northeast Oregon,Baker,45,Rookie Fire,T11S R37E S18 NWSE,44° 36' 20.02,-118° 13' 5.02,C,BAKER,...,0.02,0.02,0.023,0.0,0.0,76.428571,47.571429,0.034884,44.605561,-118.218061


In [9]:
# Drop DMS Lat/Long/odf_acres
fire_df = fire_df.drop(['latitude', 'longitude', "odf_acres", "prcp_avg"], axis=1)
# Confirm Dropped Columns
fire_df.head(5)

Unnamed: 0,fire_year,district,unit,fire_number,fire_name,legal,fuel_model,county,report_date,general_cause,total_acres,snow_avg,snwd_avg,tmax_avg,tmin_avg,avg_prcp,Latitude,Longitude
0,2008,97 - Northeast Oregon,Baker,42,Highway,T12S R36E S22 SWNW,H,BAKER,2008-05-12,Debris Burning,0.5,0.0,11.375,54.285714,30.714286,0.085899,44.511669,-118.281111
1,2008,97 - Northeast Oregon,Baker,52,Price Creek,T07S R42E S20 SENE,C,BAKER,2008-06-30,Lightning,0.1,0.0,0.0,91.714286,59.285714,0.034884,44.941939,-117.584169
2,2008,97 - Northeast Oregon,Baker,53,Hunt Mountain # 179,T08S R38E S11 NWSW,C,BAKER,2008-06-30,Lightning,0.1,0.0,0.0,91.714286,59.285714,0.034884,44.877781,-118.028061
3,2008,97 - Northeast Oregon,Baker,48,China,T11S R39E S01 NENE,I,BAKER,2008-06-27,Debris Burning,0.04,0.0,0.0,80.5,48.5,0.034884,44.615561,-117.971111
4,2008,97 - Northeast Oregon,Baker,45,Rookie Fire,T11S R37E S18 NWSE,C,BAKER,2008-06-22,Lightning,0.02,0.0,0.0,76.428571,47.571429,0.034884,44.605561,-118.218061


In [10]:
# Drop the null columns where all values are null
fire_df = fire_df.dropna(axis='columns', how='all')

# Drop the null rows
fire_df = fire_df.dropna()
fire_df.head(5)

Unnamed: 0,fire_year,district,unit,fire_number,fire_name,legal,fuel_model,county,report_date,general_cause,total_acres,snow_avg,snwd_avg,tmax_avg,tmin_avg,avg_prcp,Latitude,Longitude
0,2008,97 - Northeast Oregon,Baker,42,Highway,T12S R36E S22 SWNW,H,BAKER,2008-05-12,Debris Burning,0.5,0.0,11.375,54.285714,30.714286,0.085899,44.511669,-118.281111
1,2008,97 - Northeast Oregon,Baker,52,Price Creek,T07S R42E S20 SENE,C,BAKER,2008-06-30,Lightning,0.1,0.0,0.0,91.714286,59.285714,0.034884,44.941939,-117.584169
2,2008,97 - Northeast Oregon,Baker,53,Hunt Mountain # 179,T08S R38E S11 NWSW,C,BAKER,2008-06-30,Lightning,0.1,0.0,0.0,91.714286,59.285714,0.034884,44.877781,-118.028061
3,2008,97 - Northeast Oregon,Baker,48,China,T11S R39E S01 NENE,I,BAKER,2008-06-27,Debris Burning,0.04,0.0,0.0,80.5,48.5,0.034884,44.615561,-117.971111
4,2008,97 - Northeast Oregon,Baker,45,Rookie Fire,T11S R37E S18 NWSE,C,BAKER,2008-06-22,Lightning,0.02,0.0,0.0,76.428571,47.571429,0.034884,44.605561,-118.218061


In [11]:
# Import LE to convert Fuel Model and General Cause to Numeric Values
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

# Converting Fuel Model to Numeric
label_encoder = LabelEncoder()
fire_df["fueltype_num"] = label_encoder.fit_transform(fire_df["fuel_model"])

In [12]:
# Alternative Model with 7 CLasses - Set and Classify Fire Sizes - https://www.nwcg.gov/term/glossary/size-class-of-fire
#fire_df.loc[fire_df['total_acres'] <= .25, 'fire_severity'] = 1
#fire_df.loc[(fire_df['total_acres'] > .25) & (fire_df['total_acres'] <= 10), 'fire_severity'] = 2
#fire_df.loc[(fire_df['total_acres'] > 10) & (fire_df['total_acres'] <= 100), 'fire_severity'] = 3
#fire_df.loc[(fire_df['total_acres'] > 100) & (fire_df['total_acres'] <= 300), 'fire_severity'] = 4
#fire_df.loc[(fire_df['total_acres'] > 300) & (fire_df['total_acres'] <= 1000), 'fire_severity'] = 5
#fire_df.loc[(fire_df['total_acres'] > 1000) & (fire_df['total_acres'] <= 5000), 'fire_severity'] = 6
#fire_df.loc[fire_df['total_acres'] > 5000, 'fire_severity'] = 7                                                   

In [13]:
# Set and Classify Fire Sizes - https://www.nwcg.gov/term/glossary/size-class-of-fire
fire_df.loc[fire_df['total_acres'] <= .25, 'fire_severity'] = 1
fire_df.loc[(fire_df['total_acres'] > .25) & (fire_df['total_acres'] <= 300), 'fire_severity'] = 2
fire_df.loc[fire_df['total_acres'] > 300, 'fire_severity'] = 3          

In [14]:
# Loss in data from additional null fields in weather data
print(fire_df['fire_severity'].value_counts())

1.0    7191
2.0    2715
3.0     142
Name: fire_severity, dtype: int64


In [15]:
fire_df.general_cause.unique()

array(['Debris Burning', 'Lightning', 'Miscellaneous', 'Equipment Use',
       'Recreationist', 'Juveniles', 'Under Invest', 'Smoking',
       'Railroad', 'Arson'], dtype=object)

In [16]:
# Change General Cause Human or Nature

fire_df['general_cause'] = fire_df['general_cause'].replace(['Recreationist','Equipment Use','Debris Burning', 'Smoking', 'Arson', 'Railroad', 'Juveniles'],'1')
fire_df['general_cause'] = fire_df['general_cause'].replace(['Lightning'],'2')
fire_df['general_cause'] = fire_df['general_cause'].replace(['Under Invest', 'Miscellaneous'],'3')


#  Random Forest Classifier

In [17]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [18]:
fire_binary_encoded = pd.get_dummies(fire_df, columns=["fuel_model", "general_cause"])
fire_binary_encoded.head()

Unnamed: 0,fire_year,district,unit,fire_number,fire_name,legal,county,report_date,total_acres,snow_avg,...,fuel_model_J,fuel_model_K,fuel_model_L,fuel_model_R,fuel_model_T,fuel_model_U,fuel_model_X,general_cause_1,general_cause_2,general_cause_3
0,2008,97 - Northeast Oregon,Baker,42,Highway,T12S R36E S22 SWNW,BAKER,2008-05-12,0.5,0.0,...,0,0,0,0,0,0,0,1,0,0
1,2008,97 - Northeast Oregon,Baker,52,Price Creek,T07S R42E S20 SENE,BAKER,2008-06-30,0.1,0.0,...,0,0,0,0,0,0,0,0,1,0
2,2008,97 - Northeast Oregon,Baker,53,Hunt Mountain # 179,T08S R38E S11 NWSW,BAKER,2008-06-30,0.1,0.0,...,0,0,0,0,0,0,0,0,1,0
3,2008,97 - Northeast Oregon,Baker,48,China,T11S R39E S01 NENE,BAKER,2008-06-27,0.04,0.0,...,0,0,0,0,0,0,0,1,0,0
4,2008,97 - Northeast Oregon,Baker,45,Rookie Fire,T11S R37E S18 NWSE,BAKER,2008-06-22,0.02,0.0,...,0,0,0,0,0,0,0,0,1,0


In [19]:
fire_binary_encoded = fire_binary_encoded.drop(['fueltype_num','district', 'unit', "fire_name", "legal", "report_date","fire_number", "county", "total_acres" ], axis=1)

In [20]:
fire_binary_encoded.head()

Unnamed: 0,fire_year,snow_avg,snwd_avg,tmax_avg,tmin_avg,avg_prcp,Latitude,Longitude,fire_severity,fuel_model_A,...,fuel_model_J,fuel_model_K,fuel_model_L,fuel_model_R,fuel_model_T,fuel_model_U,fuel_model_X,general_cause_1,general_cause_2,general_cause_3
0,2008,0.0,11.375,54.285714,30.714286,0.085899,44.511669,-118.281111,2.0,0,...,0,0,0,0,0,0,0,1,0,0
1,2008,0.0,0.0,91.714286,59.285714,0.034884,44.941939,-117.584169,1.0,0,...,0,0,0,0,0,0,0,0,1,0
2,2008,0.0,0.0,91.714286,59.285714,0.034884,44.877781,-118.028061,1.0,0,...,0,0,0,0,0,0,0,0,1,0
3,2008,0.0,0.0,80.5,48.5,0.034884,44.615561,-117.971111,1.0,0,...,0,0,0,0,0,0,0,1,0,0
4,2008,0.0,0.0,76.428571,47.571429,0.034884,44.605561,-118.218061,1.0,0,...,0,0,0,0,0,0,0,0,1,0


In [21]:
# Define the features set.
X = fire_binary_encoded
X = X.drop("fire_severity", axis=1)
X.head()

Unnamed: 0,fire_year,snow_avg,snwd_avg,tmax_avg,tmin_avg,avg_prcp,Latitude,Longitude,fuel_model_A,fuel_model_B,...,fuel_model_J,fuel_model_K,fuel_model_L,fuel_model_R,fuel_model_T,fuel_model_U,fuel_model_X,general_cause_1,general_cause_2,general_cause_3
0,2008,0.0,11.375,54.285714,30.714286,0.085899,44.511669,-118.281111,0,0,...,0,0,0,0,0,0,0,1,0,0
1,2008,0.0,0.0,91.714286,59.285714,0.034884,44.941939,-117.584169,0,0,...,0,0,0,0,0,0,0,0,1,0
2,2008,0.0,0.0,91.714286,59.285714,0.034884,44.877781,-118.028061,0,0,...,0,0,0,0,0,0,0,0,1,0
3,2008,0.0,0.0,80.5,48.5,0.034884,44.615561,-117.971111,0,0,...,0,0,0,0,0,0,0,1,0,0
4,2008,0.0,0.0,76.428571,47.571429,0.034884,44.605561,-118.218061,0,0,...,0,0,0,0,0,0,0,0,1,0


In [22]:
# Define the target set.
y = fire_binary_encoded["fire_severity"].ravel()
y[:5]

array([2., 1., 1., 1., 1.])

In [23]:
# Splitting into Train and Test sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [24]:
# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [25]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=100, random_state=78) 

In [26]:
# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

In [27]:
predictions = rf_model.predict(X_test_scaled)

In [28]:
predictions

array([1., 1., 1., ..., 1., 1., 1.])

In [29]:
# Calculate feature importance in the Random Forest model.
importances = rf_model.feature_importances_
importances

array([0.07661949, 0.00076848, 0.02173191, 0.13872591, 0.14438356,
       0.12299716, 0.1713546 , 0.18000929, 0.01418593, 0.0019267 ,
       0.01199539, 0.00906385, 0.00606268, 0.0126187 , 0.00546915,
       0.00827071, 0.00431591, 0.01152214, 0.00586788, 0.00490758,
       0.00360037, 0.01451709, 0.01180649, 0.00930682, 0.00797219])

In [30]:
# Features Sorted by Importance
sorted(zip(rf_model.feature_importances_, X.columns), reverse=True)

[(0.18000929207925245, 'Longitude'),
 (0.17135459957422441, 'Latitude'),
 (0.14438355600973507, 'tmin_avg'),
 (0.13872591102449336, 'tmax_avg'),
 (0.1229971646672774, 'avg_prcp'),
 (0.07661949084849083, 'fire_year'),
 (0.02173190826690616, 'snwd_avg'),
 (0.0145170888439571, 'fuel_model_X'),
 (0.014185932012429634, 'fuel_model_A'),
 (0.012618702569853209, 'fuel_model_H'),
 (0.011995394550379176, 'fuel_model_C'),
 (0.011806494070161763, 'general_cause_1'),
 (0.011522138071865381, 'fuel_model_L'),
 (0.00930682282643251, 'general_cause_2'),
 (0.009063849448670743, 'fuel_model_F'),
 (0.008270711915305179, 'fuel_model_J'),
 (0.007972192151287966, 'general_cause_3'),
 (0.0060626786494037705, 'fuel_model_G'),
 (0.005867882959689063, 'fuel_model_R'),
 (0.005469151323783878, 'fuel_model_I'),
 (0.004907582884639095, 'fuel_model_T'),
 (0.004315908371000067, 'fuel_model_K'),
 (0.003600374384797934, 'fuel_model_U'),
 (0.0019266959600516902, 'fuel_model_B'),
 (0.0007684765359122537, 'snow_avg')]

In [31]:
# Legend For Fuel Models
#A	Annual grasses (cheat)
#B	Dense Chaparral
#C	Open pine, grass under
#F	Dense Brush (lighter than B)
#G	Conifer, Old growth
#H	Conifer, Second growth
#I	Slash, heavy
#J	Slash, medium
#K	Slash, thinning, P.C., Scattrd
#L	Grass Perennial
#R	Hardwood, summer
#T	Sagebrush, medium dense
#U	Closed canopy pine
#X	Non wildland fuel

In [32]:
# Legend For General Cause
# 1 = Human
# 2 = Nature
# 3 = Uncategorized

In [33]:
# Conusion Matrix to predict fire severity 
cm = confusion_matrix(y_test, predictions)
# DataFrame from the confusion matrix.
cm_df = pd.DataFrame(cm, index=["Class 1", "Class 2","Class 3"], columns=["Predicted 1", "Predicted 2","Predicted 3"])
cm_df

Unnamed: 0,Predicted 1,Predicted 2,Predicted 3
Class 1,1666,154,2
Class 2,530,125,1
Class 3,23,11,0


In [34]:
#Class 1 - one-quarter acre or less;
#Class 2 - more than one-quarter acre, but less than 300 acres;
#Class 3 - 300 acres or more.

In [35]:
#  # Alternative Model with 7 CLasses Conusion Matrix to predict fire severity 
#cm = confusion_matrix(y_test, predictions)
# DataFrame from the confusion matrix.
#cm_df = pd.DataFrame(cm, index=["Class 1", "Class 2","Class 3","Class 4","Class 5", "Class 6", "Class 7"], columns=["Predicted 1", "Predicted 2","Predicted 3","Predicted 4","Predicted 5", "Predicted 6","Predicted 7"])
#cm_df

In [36]:
#Class 1 - one-fourth acre or less;
#Class 2 - more than one-fourth acre, but less than 10 acres;
#Class 3 - 10 acres or more, but less than 100 acres;
#Class 4 - 100 acres or more, but less than 300 acres;
#Class 5 - 300 acres or more, but less than 1,000 acres;
#Class 6 - 1,000 acres or more, but less than 5,000 acres;
#Class 7 - 5,000 acres or more.

In [37]:
# Calculating the accuracy score.
acc_score = accuracy_score(y_test, predictions)

In [39]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 1,Predicted 2,Predicted 3
Class 1,1666,154,2
Class 2,530,125,1
Class 3,23,11,0


Accuracy Score : 0.7129777070063694
Classification Report
              precision    recall  f1-score   support

         1.0       0.75      0.91      0.82      1822
         2.0       0.43      0.19      0.26       656
         3.0       0.00      0.00      0.00        34

    accuracy                           0.71      2512
   macro avg       0.39      0.37      0.36      2512
weighted avg       0.66      0.71      0.67      2512



# Combination Sampling With SMOTEENN

In [40]:
fire_smoteen = fire_df.drop(['district','fuel_model', 'unit', "fire_name", "legal", "report_date","fire_number", "county", "total_acres" ], axis=1)
fire_smoteen.head(5)

Unnamed: 0,fire_year,general_cause,snow_avg,snwd_avg,tmax_avg,tmin_avg,avg_prcp,Latitude,Longitude,fueltype_num,fire_severity
0,2008,1,0.0,11.375,54.285714,30.714286,0.085899,44.511669,-118.281111,5,2.0
1,2008,2,0.0,0.0,91.714286,59.285714,0.034884,44.941939,-117.584169,2,1.0
2,2008,2,0.0,0.0,91.714286,59.285714,0.034884,44.877781,-118.028061,2,1.0
3,2008,1,0.0,0.0,80.5,48.5,0.034884,44.615561,-117.971111,6,1.0
4,2008,2,0.0,0.0,76.428571,47.571429,0.034884,44.605561,-118.218061,2,1.0


In [41]:
x_cols = [i for i in fire_smoteen.columns if i not in ('fire_severity')]
X = fire_smoteen[x_cols]
y = fire_smoteen['fire_severity']

In [42]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [43]:
from imblearn.combine import SMOTEENN
smoteenn = SMOTEENN(random_state=1)
X_resampled, y_resampled = smoteenn.fit_resample(X_train, y_train)
Counter(y_resampled)

Counter({1.0: 2009, 2.0: 3273, 3.0: 5275})

In [44]:
from sklearn.linear_model import LogisticRegression
smoteen_model = LogisticRegression(solver='lbfgs',max_iter=100)
smoteen_model.fit(X_resampled, y_resampled)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


LogisticRegression()

In [45]:
from sklearn.metrics import confusion_matrix
y_pred = smoteen_model.predict(X_test)
confusion_matrix(y_test, y_pred)

array([[336, 659, 803],
       [ 66, 290, 323],
       [  2,   3,  30]])

In [46]:
#from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import balanced_accuracy_score
y_pred = smoteen_model.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.49037194548243485

In [47]:
# Display the confusion matrix
smoteen_cm = confusion_matrix(y_test, y_pred)
# DataFrame from the confusion matrix.
smoteen_cm_df = pd.DataFrame(cm, index=["Class 1", "Class 2","Class 3"], columns=["Predicted 1", "Predicted 2","Predicted 3"])
smoteen_cm_df

Unnamed: 0,Predicted 1,Predicted 2,Predicted 3
Class 1,1666,154,2
Class 2,530,125,1
Class 3,23,11,0


In [48]:
# # Alternative Model with 7 CLasses - Display the confusion matrix
#smoteen_cm = confusion_matrix(y_test, y_pred)
# DataFrame from the confusion matrix.
#smoteen_cm_df = pd.DataFrame(cm, index=["Class 1", "Class 2","Class 3","Class 4","Class 5", "Class 6", "Class 7"], columns=["Predicted 1", "Predicted 2","Predicted 3","Predicted 4","Predicted 5", "Predicted 6","Predicted 7"])
#smoteen_cm_df

In [49]:
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

        1.0       0.83      0.19      0.90      0.31      0.41      0.16      1798
        2.0       0.30      0.43      0.64      0.36      0.52      0.27       679
        3.0       0.03      0.86      0.55      0.05      0.68      0.48        35

avg / total       0.68      0.26      0.83      0.32      0.45      0.19      2512

