# ML - Fire Data 2008 - 2020

# Import and Cleaning Data

In [1]:
# Initial Import of Dependencies
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
import psycopg2 as pg
from collections import Counter

In [2]:
# External Database Connection
engine = pg.connect("dbname='d3r8dfuncb78iv' user='jrufhfiejfajri' host='ec2-52-200-155-213.compute-1.amazonaws.com' port='5432' password='9a7254d2151b5e3c280fe275dbba039acdc9190fbc167f64c564c449ca77af88'")
fire_df = pd.read_sql('select * from wildfire_data', con=engine)

In [3]:
# Check Data Types
fire_df.dtypes

fire_year          int64
district          object
unit              object
fire_number       object
fire_name         object
legal             object
latitude          object
longitude         object
fuel_model        object
county            object
report_date       object
general_cause     object
odf_acres        float64
total_acres      float64
dtype: object

In [4]:
# Filter fire_year to match weather data 2008 - 2020
fire_df = fire_df[(fire_df['fire_year'] >= 2008) & (fire_df['fire_year'] <= 2020)]


In [5]:
# Confirm Count Matches Fire_weather dataset 
fire_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 12577 entries, 3 to 13715
Data columns (total 14 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   fire_year      12577 non-null  int64  
 1   district       12577 non-null  object 
 2   unit           12577 non-null  object 
 3   fire_number    12577 non-null  object 
 4   fire_name      12577 non-null  object 
 5   legal          12573 non-null  object 
 6   latitude       12569 non-null  object 
 7   longitude      12569 non-null  object 
 8   fuel_model     12511 non-null  object 
 9   county         12574 non-null  object 
 10  report_date    12577 non-null  object 
 11  general_cause  12577 non-null  object 
 12  odf_acres      12577 non-null  float64
 13  total_acres    12515 non-null  float64
dtypes: float64(2), int64(1), object(11)
memory usage: 1.4+ MB


In [6]:
#. Check for null values
fire_df.isna().sum()

fire_year         0
district          0
unit              0
fire_number       0
fire_name         0
legal             4
latitude          8
longitude         8
fuel_model       66
county            3
report_date       0
general_cause     0
odf_acres         0
total_acres      62
dtype: int64

In [7]:
# Converting Lat/Long DMS to Decimal
pattern = r'(?P<d>[\d\.]+).*?(?P<m>[\d\.]+).*?(?P<s>[\d\.]+)'

#Lat
dms = fire_df['latitude'].str.extract(pattern).astype(float)
fire_df['Latitude'] = dms['d'] + dms['m'].div(60) + dms['s'].div(3600)

#Long  
dms = fire_df['longitude'].str.extract(pattern).astype(float)
fire_df['Longitude'] = dms['d'] + dms['m'].div(60) + dms['s'].div(3600)

# Convert all Longtiude to Negative
fire_df['Longitude'] *= -1

fire_df.head(5)

Unnamed: 0,fire_year,district,unit,fire_number,fire_name,legal,latitude,longitude,fuel_model,county,report_date,general_cause,odf_acres,total_acres,Latitude,Longitude
3,2016,55 - West Oregon,Toledo,10,OAR Fire,T7S R11W S23 SWNW,44° 57' 13,-124° 0' 26,J,LINCOLN,2016-08-23,Smoking,0.1,0.1,44.953611,-124.007222
4,2016,72 - Coos,Bridge,178,Ashur Flat,T31S R9W S30 SENW,42° 50' 46,-123° 49' 9,H,DOUGLAS,2016-06-13,Equipment Use,0.01,0.01,42.846111,-123.819167
5,2015,53 - Forest Grove,Columbia City,107,Nick Thomas Rd,T6N R2W S10,,,,COLUMBIA,2015-03-08,Debris Burning,0.25,,,
6,2014,58 - North Cascade,Molalla,37,CAZADERO,T2S R3E S24 NESW,45° 23' 14,-122° 24' 11,G,CLACKAMAS,2014-08-21,Arson,0.02,0.02,45.387222,-122.403056
7,2012,71 - Southwest,Medford,64,Buck Flat,T34S R02W S04 NENE,42° 38' 55,-122° 56' 9,C,JACKSON,2012-07-17,Lightning,0.01,0.01,42.648611,-122.935833


In [8]:
# Drop DMS Lat/Long/odf_acres
fire_df = fire_df.drop(['latitude', 'longitude', "odf_acres"], axis=1)
# Confirm Dropped Columns
fire_df.head(5)

Unnamed: 0,fire_year,district,unit,fire_number,fire_name,legal,fuel_model,county,report_date,general_cause,total_acres,Latitude,Longitude
3,2016,55 - West Oregon,Toledo,10,OAR Fire,T7S R11W S23 SWNW,J,LINCOLN,2016-08-23,Smoking,0.1,44.953611,-124.007222
4,2016,72 - Coos,Bridge,178,Ashur Flat,T31S R9W S30 SENW,H,DOUGLAS,2016-06-13,Equipment Use,0.01,42.846111,-123.819167
5,2015,53 - Forest Grove,Columbia City,107,Nick Thomas Rd,T6N R2W S10,,COLUMBIA,2015-03-08,Debris Burning,,,
6,2014,58 - North Cascade,Molalla,37,CAZADERO,T2S R3E S24 NESW,G,CLACKAMAS,2014-08-21,Arson,0.02,45.387222,-122.403056
7,2012,71 - Southwest,Medford,64,Buck Flat,T34S R02W S04 NENE,C,JACKSON,2012-07-17,Lightning,0.01,42.648611,-122.935833


In [9]:
# Drop the null columns where all values are null
fire_df = fire_df.dropna(axis='columns', how='all')

# Drop the null rows
fire_df = fire_df.dropna()
fire_df.head(5)

Unnamed: 0,fire_year,district,unit,fire_number,fire_name,legal,fuel_model,county,report_date,general_cause,total_acres,Latitude,Longitude
3,2016,55 - West Oregon,Toledo,10,OAR Fire,T7S R11W S23 SWNW,J,LINCOLN,2016-08-23,Smoking,0.1,44.953611,-124.007222
4,2016,72 - Coos,Bridge,178,Ashur Flat,T31S R9W S30 SENW,H,DOUGLAS,2016-06-13,Equipment Use,0.01,42.846111,-123.819167
6,2014,58 - North Cascade,Molalla,37,CAZADERO,T2S R3E S24 NESW,G,CLACKAMAS,2014-08-21,Arson,0.02,45.387222,-122.403056
7,2012,71 - Southwest,Medford,64,Buck Flat,T34S R02W S04 NENE,C,JACKSON,2012-07-17,Lightning,0.01,42.648611,-122.935833
8,2008,72 - Coos,Coos FPA,15,Hedge Lane,T25S R13W S30 SWSW,F,COOS,2008-07-28,Smoking,0.01,43.368331,-124.295


In [10]:
# Import LE to convert Fuel Model and General Cause to Numeric Values
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

# Converting Fuel Model to Numeric
label_encoder = LabelEncoder()
fire_df["fueltype_num"] = label_encoder.fit_transform(fire_df["fuel_model"])

In [11]:
# Alternative Model with 7 CLasses - Set and Classify Fire Sizes - https://www.nwcg.gov/term/glossary/size-class-of-fire
#fire_df.loc[fire_df['total_acres'] <= .25, 'fire_severity'] = 1
#fire_df.loc[(fire_df['total_acres'] > .25) & (fire_df['total_acres'] <= 10), 'fire_severity'] = 2
#fire_df.loc[(fire_df['total_acres'] > 10) & (fire_df['total_acres'] <= 100), 'fire_severity'] = 3
#fire_df.loc[(fire_df['total_acres'] > 100) & (fire_df['total_acres'] <= 300), 'fire_severity'] = 4
#fire_df.loc[(fire_df['total_acres'] > 300) & (fire_df['total_acres'] <= 1000), 'fire_severity'] = 5
#fire_df.loc[(fire_df['total_acres'] > 1000) & (fire_df['total_acres'] <= 5000), 'fire_severity'] = 6
#fire_df.loc[fire_df['total_acres'] > 5000, 'fire_severity'] = 7                                                   

In [12]:
# Set and Classify Fire Sizes - https://www.nwcg.gov/term/glossary/size-class-of-fire
# Smaller buckets were used to better handle the skew in small fires vs large fires
fire_df.loc[fire_df['total_acres'] <= .25, 'fire_severity'] = 1
fire_df.loc[(fire_df['total_acres'] > .25) & (fire_df['total_acres'] <= 300), 'fire_severity'] = 2
fire_df.loc[fire_df['total_acres'] > 300, 'fire_severity'] = 3          

In [13]:
print(fire_df['fire_severity'].value_counts())

1.0    8947
2.0    3384
3.0     173
Name: fire_severity, dtype: int64


In [14]:
fire_df.general_cause.unique()

array(['Smoking', 'Equipment Use', 'Arson', 'Lightning', 'Miscellaneous',
       'Debris Burning', 'Juveniles', 'Recreationist', 'Under Invest',
       'Railroad'], dtype=object)

In [15]:
# Change General Cause Human or Nature

fire_df['general_cause'] = fire_df['general_cause'].replace(['Recreationist','Equipment Use','Debris Burning', 'Smoking', 'Arson', 'Railroad', 'Juveniles'],'1')
fire_df['general_cause'] = fire_df['general_cause'].replace(['Lightning'],'2')
fire_df['general_cause'] = fire_df['general_cause'].replace(['Under Invest', 'Miscellaneous'],'3')


In [16]:
# Change Fuel Type to BINS - Potentinal to improve the model by binning fuel modesl, ex: All Slash types as one instead of 3
#fire_df['fuel_model'] = fire_df['fuel_model'].replace(['I', 'J', 'K'],'1')
#fire_df.head()

#  Random Forest Classifier

In [17]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [18]:
fire_binary_encoded = pd.get_dummies(fire_df, columns=["fuel_model", "general_cause"])
fire_binary_encoded.head()

Unnamed: 0,fire_year,district,unit,fire_number,fire_name,legal,county,report_date,total_acres,Latitude,...,fuel_model_J,fuel_model_K,fuel_model_L,fuel_model_R,fuel_model_T,fuel_model_U,fuel_model_X,general_cause_1,general_cause_2,general_cause_3
3,2016,55 - West Oregon,Toledo,10,OAR Fire,T7S R11W S23 SWNW,LINCOLN,2016-08-23,0.1,44.953611,...,1,0,0,0,0,0,0,1,0,0
4,2016,72 - Coos,Bridge,178,Ashur Flat,T31S R9W S30 SENW,DOUGLAS,2016-06-13,0.01,42.846111,...,0,0,0,0,0,0,0,1,0,0
6,2014,58 - North Cascade,Molalla,37,CAZADERO,T2S R3E S24 NESW,CLACKAMAS,2014-08-21,0.02,45.387222,...,0,0,0,0,0,0,0,1,0,0
7,2012,71 - Southwest,Medford,64,Buck Flat,T34S R02W S04 NENE,JACKSON,2012-07-17,0.01,42.648611,...,0,0,0,0,0,0,0,0,1,0
8,2008,72 - Coos,Coos FPA,15,Hedge Lane,T25S R13W S30 SWSW,COOS,2008-07-28,0.01,43.368331,...,0,0,0,0,0,0,0,1,0,0


In [19]:
fire_binary_encoded = fire_binary_encoded.drop(['fueltype_num','district', 'unit', "fire_name", "legal", "report_date","fire_number", "county", "total_acres" ], axis=1)

In [20]:
fire_binary_encoded.head()

Unnamed: 0,fire_year,Latitude,Longitude,fire_severity,fuel_model_A,fuel_model_B,fuel_model_C,fuel_model_F,fuel_model_G,fuel_model_H,...,fuel_model_J,fuel_model_K,fuel_model_L,fuel_model_R,fuel_model_T,fuel_model_U,fuel_model_X,general_cause_1,general_cause_2,general_cause_3
3,2016,44.953611,-124.007222,1.0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,1,0,0
4,2016,42.846111,-123.819167,1.0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
6,2014,45.387222,-122.403056,1.0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,0
7,2012,42.648611,-122.935833,1.0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,1,0
8,2008,43.368331,-124.295,1.0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0


In [21]:
# Define the features set.
X = fire_binary_encoded
X = X.drop("fire_severity", axis=1)
X.head()

Unnamed: 0,fire_year,Latitude,Longitude,fuel_model_A,fuel_model_B,fuel_model_C,fuel_model_F,fuel_model_G,fuel_model_H,fuel_model_I,fuel_model_J,fuel_model_K,fuel_model_L,fuel_model_R,fuel_model_T,fuel_model_U,fuel_model_X,general_cause_1,general_cause_2,general_cause_3
3,2016,44.953611,-124.007222,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0
4,2016,42.846111,-123.819167,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0
6,2014,45.387222,-122.403056,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0
7,2012,42.648611,-122.935833,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0
8,2008,43.368331,-124.295,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0


In [22]:
# Define the target set.
y = fire_binary_encoded["fire_severity"].ravel()
y[:5]

array([1., 1., 1., 1., 1.])

In [23]:
# Splitting into Train and Test sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [24]:
# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [25]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=100, random_state=78) 

In [26]:
# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

In [27]:
predictions = rf_model.predict(X_test_scaled)

In [28]:
predictions

array([1., 1., 1., ..., 1., 1., 1.])

In [29]:
# Calculate feature importance in the Random Forest model.
importances = rf_model.feature_importances_
importances

array([0.14482542, 0.37160452, 0.38158204, 0.00836932, 0.00190733,
       0.00789437, 0.00615864, 0.00485301, 0.00671278, 0.00452144,
       0.00608264, 0.00413454, 0.007421  , 0.00373713, 0.00335087,
       0.00302688, 0.014739  , 0.00731579, 0.006444  , 0.00531928])

In [30]:
# Features Sorted by Importance
sorted(zip(rf_model.feature_importances_, X.columns), reverse=True)

[(0.3815820448823148, 'Longitude'),
 (0.37160451800122446, 'Latitude'),
 (0.14482541891658368, 'fire_year'),
 (0.01473900034074117, 'fuel_model_X'),
 (0.008369322252505713, 'fuel_model_A'),
 (0.0078943732973427, 'fuel_model_C'),
 (0.00742099775198451, 'fuel_model_L'),
 (0.007315791730552583, 'general_cause_1'),
 (0.006712778493125965, 'fuel_model_H'),
 (0.0064440039134755715, 'general_cause_2'),
 (0.006158642031039745, 'fuel_model_F'),
 (0.006082635799401574, 'fuel_model_J'),
 (0.005319280125837919, 'general_cause_3'),
 (0.004853008345479589, 'fuel_model_G'),
 (0.004521437736397455, 'fuel_model_I'),
 (0.004134535279168569, 'fuel_model_K'),
 (0.0037371275132057243, 'fuel_model_R'),
 (0.0033508693130262887, 'fuel_model_T'),
 (0.0030268815425131984, 'fuel_model_U'),
 (0.0019073327340788204, 'fuel_model_B')]

In [31]:
importances_df = pd.DataFrame(
    [['Longitude', '0.3815820448823148'], ['Latitude','0.37160451800122446'], ['fire_year','0.14482541891658368'], ['fuel_model_X', '0.01473900034074117']
    , ['fuel_model_A', '0.008369322252505713'], ['fuel_model_C', '0.0078943732973427'], ['fuel_model_L', '0.00742099775198451'], ['general_cause_1', '0.007315791730552583'],
     ['fuel_model_H', '0.006712778493125965'], ['general_cause_2', '0.0064440039134755715'], ['fuel_model_F', '0.006158642031039745'], ['fuel_model_J', '0.006082635799401574'],
     ['general_cause_3', '0.005319280125837919'], ['fuel_model_G', '0.004853008345479589'], ['fuel_model_I', '0.004521437736397455'], ['fuel_model_K', '0.004134535279168569'], 
     ['fuel_model_R', '0.0037371275132057243'], ['fuel_model_T', '0.0033508693130262887'], ['fuel_model_U', '0.0030268815425131984'], ['fuel_model_B', '.0019073327340788204']
    ],
    columns=['Feature', 'Feature Importance'])


importances_df.head(30)

Unnamed: 0,Feature,Feature Importance
0,Longitude,0.3815820448823148
1,Latitude,0.3716045180012244
2,fire_year,0.1448254189165836
3,fuel_model_X,0.0147390003407411
4,fuel_model_A,0.0083693222525057
5,fuel_model_C,0.0078943732973427
6,fuel_model_L,0.0074209977519845
7,general_cause_1,0.0073157917305525
8,fuel_model_H,0.0067127784931259
9,general_cause_2,0.0064440039134755


In [32]:
importances_df['Feature Importance'] = importances_df['Feature Importance'].astype(float)

In [33]:
importances_df['Feature Importance'] = importances_df['Feature Importance'].round(2)

In [34]:
importances_df

Unnamed: 0,Feature,Feature Importance
0,Longitude,0.38
1,Latitude,0.37
2,fire_year,0.14
3,fuel_model_X,0.01
4,fuel_model_A,0.01
5,fuel_model_C,0.01
6,fuel_model_L,0.01
7,general_cause_1,0.01
8,fuel_model_H,0.01
9,general_cause_2,0.01


In [35]:
importances_df["Feature Importance"] = importances_df["Feature Importance"] * 100

In [36]:
importances_df

Unnamed: 0,Feature,Feature Importance
0,Longitude,38.0
1,Latitude,37.0
2,fire_year,14.0
3,fuel_model_X,1.0
4,fuel_model_A,1.0
5,fuel_model_C,1.0
6,fuel_model_L,1.0
7,general_cause_1,1.0
8,fuel_model_H,1.0
9,general_cause_2,1.0


In [37]:
data = importances_df.to_json('./export.json', orient='records')
print(data)

None


In [38]:
# Legend For Fuel Models
#A	Annual grasses (cheat)
#B	Dense Chaparral
#C	Open pine, grass under
#F	Dense Brush (lighter than B)
#G	Conifer, Old growth
#H	Conifer, Second growth
#I	Slash, heavy
#J	Slash, medium
#K	Slash, thinning, P.C., Scattrd
#L	Grass Perennial
#R	Hardwood, summer
#T	Sagebrush, medium dense
#U	Closed canopy pine
#X	Non wildland fuel

In [39]:
# Legend For General Cause
# 1 = Human
# 2 = Nature
# 3 = Uncategorized

In [40]:
# Conusion Matrix to predict fire severity 
cm = confusion_matrix(y_test, predictions)
# DataFrame from the confusion matrix.
cm_df = pd.DataFrame(cm, index=["Class 1", "Class 2","Class 3"], columns=["Predicted 1", "Predicted 2","Predicted 3"])
cm_df

Unnamed: 0,Predicted 1,Predicted 2,Predicted 3
Class 1,1996,244,5
Class 2,632,199,3
Class 3,33,12,2


In [41]:
#Class 1 - one-quarter acre or less;
#Class 2 - more than one-quarter acre, but less than 300 acres;
#Class 3 - 300 acres or more.

In [42]:
#  # Alternative Model with 7 CLasses Conusion Matrix to predict fire severity 
#cm = confusion_matrix(y_test, predictions)
# DataFrame from the confusion matrix.
#cm_df = pd.DataFrame(cm, index=["Class 1", "Class 2","Class 3","Class 4","Class 5", "Class 6", "Class 7"], columns=["Predicted 1", "Predicted 2","Predicted 3","Predicted 4","Predicted 5", "Predicted 6","Predicted 7"])
#cm_df

In [43]:
#Class 1 - one-fourth acre or less;
#Class 2 - more than one-fourth acre, but less than 10 acres;
#Class 3 - 10 acres or more, but less than 100 acres;
#Class 4 - 100 acres or more, but less than 300 acres;
#Class 5 - 300 acres or more, but less than 1,000 acres;
#Class 6 - 1,000 acres or more, but less than 5,000 acres;
#Class 7 - 5,000 acres or more.

In [44]:
# Calculating the accuracy score.
acc_score = accuracy_score(y_test, predictions)

In [45]:
# Displaying results
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix


Unnamed: 0,Predicted 1,Predicted 2,Predicted 3
Class 1,1996,244,5
Class 2,632,199,3
Class 3,33,12,2


Accuracy Score : 0.7028150991682661
Classification Report
              precision    recall  f1-score   support

         1.0       0.75      0.89      0.81      2245
         2.0       0.44      0.24      0.31       834
         3.0       0.20      0.04      0.07        47

    accuracy                           0.70      3126
   macro avg       0.46      0.39      0.40      3126
weighted avg       0.66      0.70      0.67      3126



# Combination Sampling With SMOTEENN

In [46]:
fire_smoteen = fire_df.drop(['district','fuel_model', 'unit', "fire_name", "legal", "report_date","fire_number", "county", "total_acres" ], axis=1)
fire_smoteen.head(5)

Unnamed: 0,fire_year,general_cause,Latitude,Longitude,fueltype_num,fire_severity
3,2016,1,44.953611,-124.007222,7,1.0
4,2016,1,42.846111,-123.819167,5,1.0
6,2014,1,45.387222,-122.403056,4,1.0
7,2012,2,42.648611,-122.935833,2,1.0
8,2008,1,43.368331,-124.295,3,1.0


In [47]:
x_cols = [i for i in fire_smoteen.columns if i not in ('fire_severity')]
X = fire_smoteen[x_cols]
y = fire_smoteen['fire_severity']

In [48]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1, stratify=y)

In [49]:
# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [50]:
from imblearn.combine import SMOTEENN
smoteenn = SMOTEENN(random_state=1)
X_resampled, y_resampled = smoteenn.fit_resample(X_train_scaled, y_train)
Counter(y_resampled)

Counter({1.0: 2625, 2.0: 3312, 3.0: 6264})

In [51]:
from sklearn.linear_model import LogisticRegression
smoteen_model = LogisticRegression(solver='lbfgs',max_iter=100)
smoteen_model.fit(X_resampled, y_resampled)

LogisticRegression()

In [52]:
from sklearn.metrics import confusion_matrix
y_pred = smoteen_model.predict(X_test_scaled)
confusion_matrix(y_test, y_pred)

array([[ 462,  697, 1078],
       [ 140,  274,  432],
       [   4,    5,   34]])

In [53]:
#from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import balanced_accuracy_score
y_pred = smoteen_model.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.3333333333333333

In [54]:
# Display the confusion matrix
smoteen_cm = confusion_matrix(y_test, y_pred)
# DataFrame from the confusion matrix.
smoteen_cm_df = pd.DataFrame(cm, index=["Class 1", "Class 2","Class 3"], columns=["Predicted 1", "Predicted 2","Predicted 3"])
smoteen_cm_df

Unnamed: 0,Predicted 1,Predicted 2,Predicted 3
Class 1,1996,244,5
Class 2,632,199,3
Class 3,33,12,2


In [55]:
# # Alternative Model with 7 CLasses - Display the confusion matrix
#smoteen_cm = confusion_matrix(y_test, y_pred)
# DataFrame from the confusion matrix.
#smoteen_cm_df = pd.DataFrame(cm, index=["Class 1", "Class 2","Class 3","Class 4","Class 5", "Class 6", "Class 7"], columns=["Predicted 1", "Predicted 2","Predicted 3","Predicted 4","Predicted 5", "Predicted 6","Predicted 7"])
#smoteen_cm_df

In [56]:
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

        1.0       0.00      0.00      1.00      0.00      0.00      0.00      2237
        2.0       0.00      0.00      1.00      0.00      0.00      0.00       846
        3.0       0.01      1.00      0.00      0.03      0.00      0.00        43

avg / total       0.00      0.01      0.99      0.00      0.00      0.00      3126



  _warn_prf(average, modifier, msg_start, len(result))
