-------------------------------------

### Neural Networking

In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import tensorflow as tf
from pathlib import Path



In [3]:
# Import our input dataset
data = Path('Resources_k/fire_size_bins.csv')
df = pd.read_csv(data)
df.head()

Unnamed: 0.1,Unnamed: 0,fire_id,fire_size,fire_cause,latitude,longitude,state,discovery_month,Temp_pre_30,Temp_pre_15,Temp_pre_7,Wind_pre_30,Wind_pre_15,Wind_pre_7,Hum_pre_30,Hum_pre_15,Hum_pre_7,year,putout_time,fire_size_bin
0,0,3,1.0,Debris Burning,39.6414,-119.3083,NV,Jun,16.275967,18.996181,18.142564,4.054982,3.398329,3.671282,44.778429,37.140811,35.353846,2005,0,Teacup
1,1,24,40.0,Arson,31.435181,-88.999489,MS,Apr,13.468619,15.067227,15.60479,2.038268,1.737921,1.775904,57.997207,56.747191,59.614458,1999,0,Mini
2,3,31,1.2,Debris Burning,48.833,-99.7836,ND,Apr,-0.891635,0.372659,-4.273834,5.800667,6.012852,6.658621,77.575012,75.963981,71.173116,2007,0,Teacup
3,4,35,30.18,Debris Burning,31.259,-84.8956,GA,Oct,20.07948,17.722714,18.188679,3.65984,3.366443,2.211429,67.551783,61.733788,60.328571,2000,0,Mini
4,5,36,1420.0,Lightning,33.2418,-104.9122,NM,Jul,31.055859,32.523438,34.893333,4.026367,3.844922,3.695833,28.783203,25.789062,18.208333,1994,1,Large


In [4]:
df.drop(columns=["Unnamed: 0", "fire_id", "fire_size", "latitude", "longitude", "year", "putout_time"],inplace=True)

In [5]:
df

Unnamed: 0,fire_cause,state,discovery_month,Temp_pre_30,Temp_pre_15,Temp_pre_7,Wind_pre_30,Wind_pre_15,Wind_pre_7,Hum_pre_30,Hum_pre_15,Hum_pre_7,fire_size_bin
0,Debris Burning,NV,Jun,16.275967,18.996181,18.142564,4.054982,3.398329,3.671282,44.778429,37.140811,35.353846,Teacup
1,Arson,MS,Apr,13.468619,15.067227,15.604790,2.038268,1.737921,1.775904,57.997207,56.747191,59.614458,Mini
2,Debris Burning,ND,Apr,-0.891635,0.372659,-4.273834,5.800667,6.012852,6.658621,77.575012,75.963981,71.173116,Teacup
3,Debris Burning,GA,Oct,20.079480,17.722714,18.188679,3.659840,3.366443,2.211429,67.551783,61.733788,60.328571,Mini
4,Lightning,NM,Jul,31.055859,32.523438,34.893333,4.026367,3.844922,3.695833,28.783203,25.789062,18.208333,Large
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13133,Utilities,TX,Aug,30.731860,30.545367,28.719917,3.136761,3.238803,3.224274,50.089157,49.416505,55.426471,Large
13134,Utilities,TX,Oct,24.642268,23.713390,24.221869,1.529850,1.576828,1.563817,62.848172,60.641975,63.196819,Large
13135,Accidental,OR,Sep,15.546194,12.890633,10.734328,2.608150,2.486802,1.835821,55.009259,62.368700,67.266304,XL
13136,Debris Burning,MT,Mar,2.275974,7.360185,7.678571,4.428757,4.197593,3.803571,51.676681,46.933399,42.638384,Large


In [6]:
# Generate our categorical variable list
#cat = df.dtypes[df.dtypes == "object"].index.tolist()
cat=['fire_cause', 'discovery_month', 'state']

# Check the number of unique values in each column
df[cat].nunique()



fire_cause          5
discovery_month    12
state              45
dtype: int64

In [7]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(df[cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(cat)
encode_df.head()

Unnamed: 0,fire_cause_Accidental,fire_cause_Arson,fire_cause_Debris Burning,fire_cause_Lightning,fire_cause_Utilities,discovery_month_Apr,discovery_month_Aug,discovery_month_Dec,discovery_month_Feb,discovery_month_Jan,...,state_SC,state_SD,state_TN,state_TX,state_UT,state_VA,state_WA,state_WI,state_WV,state_WY
0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
# Merge one-hot encoded features and drop the originals
df = df.merge(encode_df,left_index=True, right_index=True)
df = df.drop(cat,1)
df.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Temp_pre_30,Temp_pre_15,Temp_pre_7,Wind_pre_30,Wind_pre_15,Wind_pre_7,Hum_pre_30,Hum_pre_15,Hum_pre_7,fire_size_bin,...,state_SC,state_SD,state_TN,state_TX,state_UT,state_VA,state_WA,state_WI,state_WV,state_WY
0,16.275967,18.996181,18.142564,4.054982,3.398329,3.671282,44.778429,37.140811,35.353846,Teacup,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,13.468619,15.067227,15.60479,2.038268,1.737921,1.775904,57.997207,56.747191,59.614458,Mini,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-0.891635,0.372659,-4.273834,5.800667,6.012852,6.658621,77.575012,75.963981,71.173116,Teacup,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,20.07948,17.722714,18.188679,3.65984,3.366443,2.211429,67.551783,61.733788,60.328571,Mini,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,31.055859,32.523438,34.893333,4.026367,3.844922,3.695833,28.783203,25.789062,18.208333,Large,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [78]:
df.columns

Index(['Temp_pre_30', 'Temp_pre_15', 'Temp_pre_7', 'Wind_pre_30',
       'Wind_pre_15', 'Wind_pre_7', 'Hum_pre_30', 'Hum_pre_15', 'Hum_pre_7',
       'putout_time', 'fire_size_bin', 'fire_cause_Accidental',
       'fire_cause_Arson', 'fire_cause_Debris Burning', 'fire_cause_Lightning',
       'fire_cause_Utilities', 'discovery_month_Apr', 'discovery_month_Aug',
       'discovery_month_Dec', 'discovery_month_Feb', 'discovery_month_Jan',
       'discovery_month_Jul', 'discovery_month_Jun', 'discovery_month_Mar',
       'discovery_month_May', 'discovery_month_Nov', 'discovery_month_Oct',
       'discovery_month_Sep'],
      dtype='object')

In [79]:
# # Import label encoder
# from sklearn import preprocessing
 
# # label_encoder object knows how to understand word labels.
# label_encoder = preprocessing.LabelEncoder()
 
# # Encode labels in column 'fire_size_bin.
# df['fire_size_bin']= label_encoder.fit_transform(df['fire_size_bin'])
 
# df['fire_size_bin'].unique()

In [9]:
df["size_bin_no"]=''
df

Unnamed: 0,Temp_pre_30,Temp_pre_15,Temp_pre_7,Wind_pre_30,Wind_pre_15,Wind_pre_7,Hum_pre_30,Hum_pre_15,Hum_pre_7,fire_size_bin,...,state_SD,state_TN,state_TX,state_UT,state_VA,state_WA,state_WI,state_WV,state_WY,size_bin_no
0,16.275967,18.996181,18.142564,4.054982,3.398329,3.671282,44.778429,37.140811,35.353846,Teacup,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
1,13.468619,15.067227,15.604790,2.038268,1.737921,1.775904,57.997207,56.747191,59.614458,Mini,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
2,-0.891635,0.372659,-4.273834,5.800667,6.012852,6.658621,77.575012,75.963981,71.173116,Teacup,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
3,20.079480,17.722714,18.188679,3.659840,3.366443,2.211429,67.551783,61.733788,60.328571,Mini,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
4,31.055859,32.523438,34.893333,4.026367,3.844922,3.695833,28.783203,25.789062,18.208333,Large,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13133,30.731860,30.545367,28.719917,3.136761,3.238803,3.224274,50.089157,49.416505,55.426471,Large,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,
13134,24.642268,23.713390,24.221869,1.529850,1.576828,1.563817,62.848172,60.641975,63.196819,Large,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,
13135,15.546194,12.890633,10.734328,2.608150,2.486802,1.835821,55.009259,62.368700,67.266304,XL,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,
13136,2.275974,7.360185,7.678571,4.428757,4.197593,3.803571,51.676681,46.933399,42.638384,Large,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,


In [10]:
df.loc[(df['fire_size_bin']=="Teacup"),"size_bin_no"] = 0
df.loc[(df['fire_size_bin']=="Toy"),"size_bin_no"] = 1
df.loc[(df['fire_size_bin']=="Mini"),"size_bin_no"] = 2
df.loc[(df['fire_size_bin']=="Medium"),"size_bin_no"] = 3
df.loc[(df['fire_size_bin']=="Large"),"size_bin_no"] = 4
df.loc[(df['fire_size_bin']=="XL"),"size_bin_no"] = 5
df


Unnamed: 0,Temp_pre_30,Temp_pre_15,Temp_pre_7,Wind_pre_30,Wind_pre_15,Wind_pre_7,Hum_pre_30,Hum_pre_15,Hum_pre_7,fire_size_bin,...,state_SD,state_TN,state_TX,state_UT,state_VA,state_WA,state_WI,state_WV,state_WY,size_bin_no
0,16.275967,18.996181,18.142564,4.054982,3.398329,3.671282,44.778429,37.140811,35.353846,Teacup,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,13.468619,15.067227,15.604790,2.038268,1.737921,1.775904,57.997207,56.747191,59.614458,Mini,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
2,-0.891635,0.372659,-4.273834,5.800667,6.012852,6.658621,77.575012,75.963981,71.173116,Teacup,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,20.079480,17.722714,18.188679,3.659840,3.366443,2.211429,67.551783,61.733788,60.328571,Mini,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
4,31.055859,32.523438,34.893333,4.026367,3.844922,3.695833,28.783203,25.789062,18.208333,Large,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13133,30.731860,30.545367,28.719917,3.136761,3.238803,3.224274,50.089157,49.416505,55.426471,Large,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,4
13134,24.642268,23.713390,24.221869,1.529850,1.576828,1.563817,62.848172,60.641975,63.196819,Large,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,4
13135,15.546194,12.890633,10.734328,2.608150,2.486802,1.835821,55.009259,62.368700,67.266304,XL,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5
13136,2.275974,7.360185,7.678571,4.428757,4.197593,3.803571,51.676681,46.933399,42.638384,Large,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4


In [11]:
df['size_bin_no']=df.size_bin_no.astype(int)

In [12]:
df.drop(['fire_size_bin'], axis=1, inplace=True)


In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13138 entries, 0 to 13137
Data columns (total 72 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Temp_pre_30                13138 non-null  float64
 1   Temp_pre_15                13138 non-null  float64
 2   Temp_pre_7                 13138 non-null  float64
 3   Wind_pre_30                13138 non-null  float64
 4   Wind_pre_15                13138 non-null  float64
 5   Wind_pre_7                 13138 non-null  float64
 6   Hum_pre_30                 13138 non-null  float64
 7   Hum_pre_15                 13138 non-null  float64
 8   Hum_pre_7                  13138 non-null  float64
 9   fire_cause_Accidental      13138 non-null  float64
 10  fire_cause_Arson           13138 non-null  float64
 11  fire_cause_Debris Burning  13138 non-null  float64
 12  fire_cause_Lightning       13138 non-null  float64
 13  fire_cause_Utilities       13138 non-null  flo

In [14]:
df.columns

Index(['Temp_pre_30', 'Temp_pre_15', 'Temp_pre_7', 'Wind_pre_30',
       'Wind_pre_15', 'Wind_pre_7', 'Hum_pre_30', 'Hum_pre_15', 'Hum_pre_7',
       'fire_cause_Accidental', 'fire_cause_Arson',
       'fire_cause_Debris Burning', 'fire_cause_Lightning',
       'fire_cause_Utilities', 'discovery_month_Apr', 'discovery_month_Aug',
       'discovery_month_Dec', 'discovery_month_Feb', 'discovery_month_Jan',
       'discovery_month_Jul', 'discovery_month_Jun', 'discovery_month_Mar',
       'discovery_month_May', 'discovery_month_Nov', 'discovery_month_Oct',
       'discovery_month_Sep', 'state_AK', 'state_AL', 'state_AR', 'state_AZ',
       'state_CA', 'state_CO', 'state_FL', 'state_GA', 'state_IA', 'state_ID',
       'state_IL', 'state_IN', 'state_KS', 'state_KY', 'state_LA', 'state_MA',
       'state_MD', 'state_ME', 'state_MI', 'state_MN', 'state_MO', 'state_MS',
       'state_MT', 'state_NC', 'state_ND', 'state_NE', 'state_NJ', 'state_NM',
       'state_NV', 'state_NY', 'state_OH', 'st

In [15]:
# Remove loan status target from features data
target=df.size_bin_no
y=target
# X = df.loc[:, df.columns !=target]
X=df.drop(columns=["size_bin_no", ]).values

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [17]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train[0])
hidden_nodes_layer1 = 210
hidden_nodes_layer2 = 140
hidden_nodes_layer3 = 70


nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_2 (Dense)             (None, 210)               15120     
                                                                 
 dense_3 (Dense)             (None, 140)               29540     
                                                                 
 dense_4 (Dense)             (None, 70)                9870      
                                                                 
 dense_5 (Dense)             (None, 1)                 71        
                                                                 
Total params: 54,601
Trainable params: 54,601
Non-trainable params: 0
_________________________________________________________________


In [18]:
# Compile the model
nn.compile(loss='categorical_crossentropy', optimizer="adam", metrics=["accuracy"])

In [19]:
## Import checkpoint dependencies
# import os
# from tensorflow.keras.callbacks import ModelCheckpoint

# # Define the checkpoint path and filenames
# os.makedirs("checkpoints/",exist_ok=True)
# checkpoint_path = "checkpoints/weights.{epoch:02d}.hdf5"

# # Create a callback that saves the model's weights every 5 epoch
# cp_callback = ModelCheckpoint(
#     filepath=checkpoint_path,
#     verbose=5,
#     save_weights_only=True,
#     save_freq='epoch')

# Train the model
# fit_model = nn.fit(X_train_scaled,y_train,epochs=100,callbacks=[cp_callback])
fit_model = nn.fit(X_train_scaled,y_train,epochs=50)

# # Restore the model weights
# nn.load_weights("checkpoints/weights.100.hdf5")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [20]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

103/103 - 0s - loss: 0.0000e+00 - accuracy: 0.3248 - 267ms/epoch - 3ms/step
Loss: 0.0, Accuracy: 0.324809730052948


---------------------

------------------------------

### Random Forest Classifier

In [62]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from pathlib import Path

# Import our input dataset
data = Path('Resources_k/fire_size_bins.csv')
df = pd.read_csv(data)
df.head()

Unnamed: 0.1,Unnamed: 0,fire_id,fire_size,fire_cause,latitude,longitude,state,discovery_month,Temp_pre_30,Temp_pre_15,Temp_pre_7,Wind_pre_30,Wind_pre_15,Wind_pre_7,Hum_pre_30,Hum_pre_15,Hum_pre_7,year,putout_time,fire_size_bin
0,0,3,1.0,Debris Burning,39.6414,-119.3083,NV,Jun,16.275967,18.996181,18.142564,4.054982,3.398329,3.671282,44.778429,37.140811,35.353846,2005,0,Teacup
1,1,24,40.0,Arson,31.435181,-88.999489,MS,Apr,13.468619,15.067227,15.60479,2.038268,1.737921,1.775904,57.997207,56.747191,59.614458,1999,0,Mini
2,3,31,1.2,Debris Burning,48.833,-99.7836,ND,Apr,-0.891635,0.372659,-4.273834,5.800667,6.012852,6.658621,77.575012,75.963981,71.173116,2007,0,Teacup
3,4,35,30.18,Debris Burning,31.259,-84.8956,GA,Oct,20.07948,17.722714,18.188679,3.65984,3.366443,2.211429,67.551783,61.733788,60.328571,2000,0,Mini
4,5,36,1420.0,Lightning,33.2418,-104.9122,NM,Jul,31.055859,32.523438,34.893333,4.026367,3.844922,3.695833,28.783203,25.789062,18.208333,1994,1,Large


In [63]:
df.drop(columns=["Unnamed: 0", "fire_id", "fire_size", "latitude", "longitude"],inplace=True)
df

Unnamed: 0,fire_cause,state,discovery_month,Temp_pre_30,Temp_pre_15,Temp_pre_7,Wind_pre_30,Wind_pre_15,Wind_pre_7,Hum_pre_30,Hum_pre_15,Hum_pre_7,year,putout_time,fire_size_bin
0,Debris Burning,NV,Jun,16.275967,18.996181,18.142564,4.054982,3.398329,3.671282,44.778429,37.140811,35.353846,2005,0,Teacup
1,Arson,MS,Apr,13.468619,15.067227,15.604790,2.038268,1.737921,1.775904,57.997207,56.747191,59.614458,1999,0,Mini
2,Debris Burning,ND,Apr,-0.891635,0.372659,-4.273834,5.800667,6.012852,6.658621,77.575012,75.963981,71.173116,2007,0,Teacup
3,Debris Burning,GA,Oct,20.079480,17.722714,18.188679,3.659840,3.366443,2.211429,67.551783,61.733788,60.328571,2000,0,Mini
4,Lightning,NM,Jul,31.055859,32.523438,34.893333,4.026367,3.844922,3.695833,28.783203,25.789062,18.208333,1994,1,Large
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13133,Utilities,TX,Aug,30.731860,30.545367,28.719917,3.136761,3.238803,3.224274,50.089157,49.416505,55.426471,2015,3,Large
13134,Utilities,TX,Oct,24.642268,23.713390,24.221869,1.529850,1.576828,1.563817,62.848172,60.641975,63.196819,2015,11,Large
13135,Accidental,OR,Sep,15.546194,12.890633,10.734328,2.608150,2.486802,1.835821,55.009259,62.368700,67.266304,2015,39,XL
13136,Debris Burning,MT,Mar,2.275974,7.360185,7.678571,4.428757,4.197593,3.803571,51.676681,46.933399,42.638384,2015,5,Large


In [67]:
# Generate our categorical variable list
#cat = df.dtypes[df.dtypes == "object"].index.tolist()
cat=['fire_cause', "state", "discovery_month"]

# Check the number of unique values in each column
df[cat].nunique()

fire_cause          5
state              45
discovery_month    12
dtype: int64

In [68]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(df[cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(cat)
encode_df.head()

Unnamed: 0,fire_cause_Accidental,fire_cause_Arson,fire_cause_Debris Burning,fire_cause_Lightning,fire_cause_Utilities,state_AK,state_AL,state_AR,state_AZ,state_CA,...,discovery_month_Dec,discovery_month_Feb,discovery_month_Jan,discovery_month_Jul,discovery_month_Jun,discovery_month_Mar,discovery_month_May,discovery_month_Nov,discovery_month_Oct,discovery_month_Sep
0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [69]:
# Merge one-hot encoded features and drop the originals
df = df.merge(encode_df,left_index=True, right_index=True)
df = df.drop(cat,1)
df.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Temp_pre_30,Temp_pre_15,Temp_pre_7,Wind_pre_30,Wind_pre_15,Wind_pre_7,Hum_pre_30,Hum_pre_15,Hum_pre_7,year,...,discovery_month_Dec_y,discovery_month_Feb_y,discovery_month_Jan_y,discovery_month_Jul_y,discovery_month_Jun_y,discovery_month_Mar_y,discovery_month_May_y,discovery_month_Nov_y,discovery_month_Oct_y,discovery_month_Sep_y
0,16.275967,18.996181,18.142564,4.054982,3.398329,3.671282,44.778429,37.140811,35.353846,2005,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,13.468619,15.067227,15.60479,2.038268,1.737921,1.775904,57.997207,56.747191,59.614458,1999,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-0.891635,0.372659,-4.273834,5.800667,6.012852,6.658621,77.575012,75.963981,71.173116,2007,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,20.07948,17.722714,18.188679,3.65984,3.366443,2.211429,67.551783,61.733788,60.328571,2000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,31.055859,32.523438,34.893333,4.026367,3.844922,3.695833,28.783203,25.789062,18.208333,1994,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [70]:
# Remove loan status target from features data
target=df.fire_size_bin
y=target
#X = df.loc[:, df.columns !=target]
X=df.copy()
X=df.drop(columns=['fire_size_bin'])

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [71]:
X

Unnamed: 0,Temp_pre_30,Temp_pre_15,Temp_pre_7,Wind_pre_30,Wind_pre_15,Wind_pre_7,Hum_pre_30,Hum_pre_15,Hum_pre_7,year,...,discovery_month_Dec_y,discovery_month_Feb_y,discovery_month_Jan_y,discovery_month_Jul_y,discovery_month_Jun_y,discovery_month_Mar_y,discovery_month_May_y,discovery_month_Nov_y,discovery_month_Oct_y,discovery_month_Sep_y
0,16.275967,18.996181,18.142564,4.054982,3.398329,3.671282,44.778429,37.140811,35.353846,2005,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,13.468619,15.067227,15.604790,2.038268,1.737921,1.775904,57.997207,56.747191,59.614458,1999,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-0.891635,0.372659,-4.273834,5.800667,6.012852,6.658621,77.575012,75.963981,71.173116,2007,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,20.079480,17.722714,18.188679,3.659840,3.366443,2.211429,67.551783,61.733788,60.328571,2000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,31.055859,32.523438,34.893333,4.026367,3.844922,3.695833,28.783203,25.789062,18.208333,1994,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13133,30.731860,30.545367,28.719917,3.136761,3.238803,3.224274,50.089157,49.416505,55.426471,2015,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13134,24.642268,23.713390,24.221869,1.529850,1.576828,1.563817,62.848172,60.641975,63.196819,2015,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
13135,15.546194,12.890633,10.734328,2.608150,2.486802,1.835821,55.009259,62.368700,67.266304,2015,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
13136,2.275974,7.360185,7.678571,4.428757,4.197593,3.803571,51.676681,46.933399,42.638384,2015,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [74]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=400, random_state=78) 

#Fitting the model
model = model.fit(X_train, y_train)

# Making predictions using the testing data.
y_pred = model.predict(X_test)

In [75]:
# Calculating the accuracy score.
from sklearn.metrics import balanced_accuracy_score
acc_score = balanced_accuracy_score(y_test, y_pred)
acc_score

0.3624631676794367

In [76]:
from sklearn.metrics import confusion_matrix
# Calculating the confusion matrix
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(
    cm, index=["Actual Teacup", "Actual Toy", "Actual Mini", "Actual Medium", "Actual Large", "Actual XL"], columns=["Predicted Teacup", "Predicted Toy", "Predicted Mini", "Predicted Medium", "Predicted Large", "Predicted XL"]
)
cm_df

Unnamed: 0,Predicted Teacup,Predicted Toy,Predicted Mini,Predicted Medium,Predicted Large,Predicted XL
Actual Teacup,236,8,5,56,2,102
Actual Toy,39,18,27,176,5,5
Actual Mini,35,10,43,340,10,7
Actual Medium,44,11,83,1365,18,5
Actual Large,20,12,22,260,11,4
Actual XL,135,0,1,14,1,155


In [77]:
# Print the imbalanced classification report
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))


                   pre       rec       spe        f1       geo       iba       sup

      Large       0.46      0.58      0.91      0.51      0.72      0.51       409
     Medium       0.31      0.07      0.99      0.11      0.26      0.06       270
       Mini       0.24      0.10      0.95      0.14      0.30      0.08       445
     Teacup       0.62      0.89      0.52      0.73      0.68      0.48      1526
        Toy       0.23      0.03      0.99      0.06      0.18      0.03       329
         XL       0.56      0.51      0.96      0.53      0.70      0.46       306

avg / total       0.48      0.56      0.75      0.49      0.55      0.35      3285



In [27]:
# Displaying Easy Ensemble Classifier
print("Random Forest Clasifier")
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report_imbalanced(y_test, y_pred))

Random Forest Clasifier
Confusion Matrix


Unnamed: 0,Predicted Teacup,Predicted Toy,Predicted Mini,Predicted Medium,Predicted Large,Predicted XL
Actual Teacup,139,9,8,159,5,89
Actual Toy,36,9,13,196,4,12
Actual Mini,42,11,23,358,1,10
Actual Medium,97,5,53,1315,12,44
Actual Large,21,3,18,266,10,11
Actual XL,84,1,3,87,3,128


Accuracy Score : 0.28921630513405056
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

      Large       0.33      0.34      0.90      0.34      0.55      0.29       409
     Medium       0.24      0.03      0.99      0.06      0.18      0.03       270
       Mini       0.19      0.05      0.97      0.08      0.22      0.05       445
     Teacup       0.55      0.86      0.39      0.67      0.58      0.36      1526
        Toy       0.29      0.03      0.99      0.05      0.17      0.03       329
         XL       0.44      0.42      0.94      0.43      0.63      0.37       306

avg / total       0.41      0.49      0.69      0.42      0.46      0.25      3285



In [78]:
## CCaallccuullaattee ffeeaattuurree iimmppoorrttaannccee iinn tthhee RRaannddoomm FFoorreesstt mmooddeell..
importances = model.feature_importances_
importances

array([6.31293633e-02, 6.25837833e-02, 6.31637163e-02, 6.18938930e-02,
       6.12218974e-02, 6.08595851e-02, 6.83096504e-02, 6.71820828e-02,
       6.62086368e-02, 4.71317145e-02, 1.00229395e-01, 3.29068005e-03,
       4.62985080e-03, 5.35206153e-03, 1.29257428e-02, 3.62612765e-03,
       7.26025525e-03, 2.41185231e-03, 1.65880059e-03, 2.34453585e-03,
       3.18069273e-03, 1.40883852e-03, 1.72445984e-03, 3.13388556e-03,
       4.24485386e-05, 3.39597977e-03, 3.62321960e-04, 2.76408178e-04,
       7.69575110e-04, 1.64098798e-03, 2.97133337e-04, 3.40318959e-06,
       1.59548896e-04, 2.21065839e-04, 5.94191892e-04, 1.29221513e-03,
       8.80718814e-04, 2.63531983e-03, 2.50660348e-03, 1.28185598e-03,
       1.01239552e-03, 1.32270981e-03, 1.07571418e-04, 2.35052755e-03,
       2.28668708e-03, 3.29927322e-03, 3.47433772e-04, 3.21624756e-03,
       1.77541282e-03, 4.13883481e-04, 2.53663838e-05, 1.09238717e-03,
       1.56189340e-03, 1.39171755e-03, 2.62721894e-03, 1.69981445e-03,
      

In [79]:
# We can sort the features by their importance.
sorted(zip(model.feature_importances_, X.columns), reverse=True)

[(0.10022939479334933, 'putout_time'),
 (0.06830965040098498, 'Hum_pre_30'),
 (0.0671820828329059, 'Hum_pre_15'),
 (0.06620863681710037, 'Hum_pre_7'),
 (0.06316371628019732, 'Temp_pre_7'),
 (0.0631293632768553, 'Temp_pre_30'),
 (0.06258378328696873, 'Temp_pre_15'),
 (0.06189389295001725, 'Wind_pre_30'),
 (0.06122189737557873, 'Wind_pre_15'),
 (0.060859585061935235, 'Wind_pre_7'),
 (0.04713171450333413, 'year'),
 (0.013126542609198927, 'fire_cause_Lightning_y'),
 (0.012925742834042967, 'fire_cause_Lightning_x'),
 (0.0072602552452220555, 'state_AK_x'),
 (0.0066404643607144015, 'state_AK_y'),
 (0.005431894910133094, 'fire_cause_Debris Burning_y'),
 (0.005352061525959901, 'fire_cause_Debris Burning_x'),
 (0.004869451462361008, 'discovery_month_Jul_x'),
 (0.00467329293251056, 'discovery_month_Jun_y'),
 (0.004655931587823601, 'discovery_month_Jul_y'),
 (0.0046298507958445045, 'fire_cause_Arson_x'),
 (0.004612291216752795, 'discovery_month_Jun_x'),
 (0.004611292513697992, 'fire_cause_Arson_y'

-------------------------

----------------------------------

### Easy Ensemble Classifier

In [47]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from pathlib import Path

# Import our input dataset
data = Path('Resources_k/fire_size_bins.csv')
df = pd.read_csv(data)
df.head()

Unnamed: 0.1,Unnamed: 0,fire_id,fire_size,fire_cause,latitude,longitude,state,discovery_month,Temp_pre_30,Temp_pre_15,Temp_pre_7,Wind_pre_30,Wind_pre_15,Wind_pre_7,Hum_pre_30,Hum_pre_15,Hum_pre_7,year,putout_time,fire_size_bin
0,0,3,1.0,Debris Burning,39.6414,-119.3083,NV,Jun,16.275967,18.996181,18.142564,4.054982,3.398329,3.671282,44.778429,37.140811,35.353846,2005,0,Teacup
1,1,24,40.0,Arson,31.435181,-88.999489,MS,Apr,13.468619,15.067227,15.60479,2.038268,1.737921,1.775904,57.997207,56.747191,59.614458,1999,0,Mini
2,3,31,1.2,Debris Burning,48.833,-99.7836,ND,Apr,-0.891635,0.372659,-4.273834,5.800667,6.012852,6.658621,77.575012,75.963981,71.173116,2007,0,Teacup
3,4,35,30.18,Debris Burning,31.259,-84.8956,GA,Oct,20.07948,17.722714,18.188679,3.65984,3.366443,2.211429,67.551783,61.733788,60.328571,2000,0,Mini
4,5,36,1420.0,Lightning,33.2418,-104.9122,NM,Jul,31.055859,32.523438,34.893333,4.026367,3.844922,3.695833,28.783203,25.789062,18.208333,1994,1,Large


In [48]:
df.drop(columns=["Unnamed: 0", "fire_id", "fire_size", "latitude", "longitude"],inplace=True)
df

Unnamed: 0,fire_cause,state,discovery_month,Temp_pre_30,Temp_pre_15,Temp_pre_7,Wind_pre_30,Wind_pre_15,Wind_pre_7,Hum_pre_30,Hum_pre_15,Hum_pre_7,year,putout_time,fire_size_bin
0,Debris Burning,NV,Jun,16.275967,18.996181,18.142564,4.054982,3.398329,3.671282,44.778429,37.140811,35.353846,2005,0,Teacup
1,Arson,MS,Apr,13.468619,15.067227,15.604790,2.038268,1.737921,1.775904,57.997207,56.747191,59.614458,1999,0,Mini
2,Debris Burning,ND,Apr,-0.891635,0.372659,-4.273834,5.800667,6.012852,6.658621,77.575012,75.963981,71.173116,2007,0,Teacup
3,Debris Burning,GA,Oct,20.079480,17.722714,18.188679,3.659840,3.366443,2.211429,67.551783,61.733788,60.328571,2000,0,Mini
4,Lightning,NM,Jul,31.055859,32.523438,34.893333,4.026367,3.844922,3.695833,28.783203,25.789062,18.208333,1994,1,Large
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13133,Utilities,TX,Aug,30.731860,30.545367,28.719917,3.136761,3.238803,3.224274,50.089157,49.416505,55.426471,2015,3,Large
13134,Utilities,TX,Oct,24.642268,23.713390,24.221869,1.529850,1.576828,1.563817,62.848172,60.641975,63.196819,2015,11,Large
13135,Accidental,OR,Sep,15.546194,12.890633,10.734328,2.608150,2.486802,1.835821,55.009259,62.368700,67.266304,2015,39,XL
13136,Debris Burning,MT,Mar,2.275974,7.360185,7.678571,4.428757,4.197593,3.803571,51.676681,46.933399,42.638384,2015,5,Large


In [49]:
# Generate our categorical variable list
#cat = df.dtypes[df.dtypes == "object"].index.tolist()
cat=['fire_cause', "state", "discovery_month"]

# Check the number of unique values in each column
df[cat].nunique()

fire_cause          5
state              45
discovery_month    12
dtype: int64

In [50]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(df[cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(cat)
encode_df.head()

Unnamed: 0,fire_cause_Accidental,fire_cause_Arson,fire_cause_Debris Burning,fire_cause_Lightning,fire_cause_Utilities,state_AK,state_AL,state_AR,state_AZ,state_CA,...,discovery_month_Dec,discovery_month_Feb,discovery_month_Jan,discovery_month_Jul,discovery_month_Jun,discovery_month_Mar,discovery_month_May,discovery_month_Nov,discovery_month_Oct,discovery_month_Sep
0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [51]:
# Merge one-hot encoded features and drop the originals
df = df.merge(encode_df,left_index=True, right_index=True)
df = df.drop(cat,1)
df.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Temp_pre_30,Temp_pre_15,Temp_pre_7,Wind_pre_30,Wind_pre_15,Wind_pre_7,Hum_pre_30,Hum_pre_15,Hum_pre_7,year,...,discovery_month_Dec,discovery_month_Feb,discovery_month_Jan,discovery_month_Jul,discovery_month_Jun,discovery_month_Mar,discovery_month_May,discovery_month_Nov,discovery_month_Oct,discovery_month_Sep
0,16.275967,18.996181,18.142564,4.054982,3.398329,3.671282,44.778429,37.140811,35.353846,2005,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,13.468619,15.067227,15.60479,2.038268,1.737921,1.775904,57.997207,56.747191,59.614458,1999,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,-0.891635,0.372659,-4.273834,5.800667,6.012852,6.658621,77.575012,75.963981,71.173116,2007,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,20.07948,17.722714,18.188679,3.65984,3.366443,2.211429,67.551783,61.733788,60.328571,2000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,31.055859,32.523438,34.893333,4.026367,3.844922,3.695833,28.783203,25.789062,18.208333,1994,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [52]:
# Remove loan status target from features data
target=df.fire_size_bin
y=target
# X = df.loc[:, df.columns !=target]
X=df.drop(columns=['fire_size_bin']).values

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [57]:
# Train the EasyEnsembleClassifier
from imblearn.ensemble import EasyEnsembleClassifier
model = EasyEnsembleClassifier(n_estimators=280, random_state=1) 

#Fitting the model
model = model.fit(X_train, y_train)

# Making predictions using the testing data.
y_pred = model.predict(X_test)

In [58]:
# Calculated the balanced accuracy score
from sklearn.metrics import accuracy_score
acc_score = accuracy_score(y_test, y_pred)

In [59]:
from sklearn.metrics import confusion_matrix
# Calculating the confusion matrix
cm = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(
cm, index=["Actual Teacup", "Actual Toy", "Actual Mini", "Actual Medium", "Actual Large", "Actual XL"], columns=["Predicted Teacup", "Predicted Toy", "Predicted Mini", "Predicted Medium", "Predicted Large", "Predicted XL"]
)
cm_df

Unnamed: 0,Predicted Teacup,Predicted Toy,Predicted Mini,Predicted Medium,Predicted Large,Predicted XL
Actual Teacup,172,46,2,13,5,164
Actual Toy,48,76,54,48,38,19
Actual Mini,35,78,123,143,85,13
Actual Medium,43,142,272,752,265,18
Actual Large,9,53,72,119,58,5
Actual XL,99,12,2,3,4,195


In [60]:
# Print the imbalanced classification report
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))

                   pre       rec       spe        f1       geo       iba       sup

      Large       0.42      0.43      0.92      0.43      0.63      0.37       402
     Medium       0.19      0.27      0.89      0.22      0.49      0.22       283
       Mini       0.23      0.26      0.86      0.25      0.47      0.21       477
     Teacup       0.70      0.50      0.82      0.59      0.64      0.40      1492
        Toy       0.13      0.18      0.87      0.15      0.40      0.15       316
         XL       0.47      0.62      0.93      0.53      0.76      0.56       315

avg / total       0.48      0.42      0.86      0.44      0.59      0.34      3285



In [61]:
# Displaying Easy Ensemble Classifier
print("Balanced Random Forest Clasifier")
print("Confusion Matrix")
display(cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report_imbalanced(y_test, y_pred))

Balanced Random Forest Clasifier
Confusion Matrix


Unnamed: 0,Predicted Teacup,Predicted Toy,Predicted Mini,Predicted Medium,Predicted Large,Predicted XL
Actual Teacup,172,46,2,13,5,164
Actual Toy,48,76,54,48,38,19
Actual Mini,35,78,123,143,85,13
Actual Medium,43,142,272,752,265,18
Actual Large,9,53,72,119,58,5
Actual XL,99,12,2,3,4,195


Accuracy Score : 0.41887366818873667
Classification Report
                   pre       rec       spe        f1       geo       iba       sup

      Large       0.42      0.43      0.92      0.43      0.63      0.37       402
     Medium       0.19      0.27      0.89      0.22      0.49      0.22       283
       Mini       0.23      0.26      0.86      0.25      0.47      0.21       477
     Teacup       0.70      0.50      0.82      0.59      0.64      0.40      1492
        Toy       0.13      0.18      0.87      0.15      0.40      0.15       316
         XL       0.47      0.62      0.93      0.53      0.76      0.56       315

avg / total       0.48      0.42      0.86      0.44      0.59      0.34      3285

