In [1]:
# Load EDA pkgs
import pandas as pd 
import numpy as np

# Load Data Vis Pkg
import matplotlib.pyplot as plt 
import seaborn as sns

# Load ML Pkgs
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# For Neural network (MultiLayerPerceptron)
from sklearn.neural_network import MLPClassifier

In [2]:
col_names = ['buying','maint','doors' ,'persons','lug_boot','safety','class']

# Load dataset
df = pd.read_csv("data/carData.csv",names=col_names)

df.head()

Unnamed: 0,buying,maint,doors,persons,lug_boot,safety,class
0,vhigh,vhigh,2,2,small,low,unacc
1,vhigh,vhigh,2,2,small,med,unacc
2,vhigh,vhigh,2,2,small,high,unacc
3,vhigh,vhigh,2,2,med,low,unacc
4,vhigh,vhigh,2,2,med,med,unacc


We will then label-encode our data set using either of these methods:

1. Custom Function
2. Label Encoder from Sklearn
3. OneHot Encoding
4. Pandas Get Dummies

In our Case we will be using a custom function to help us encode our data set and then map them to our values for each column respectively. We will then save these labels as dictionaries and use it for building the options sections of our ML app.

In [3]:
# Custom Function
buying_label = { ni: n for n,ni in enumerate(set(df['buying']))}
maint_label = { ni: n for n,ni in enumerate(set(df['maint']))}
doors_label = { ni: n for n,ni in enumerate(set(df['doors']))}
persons_label = { ni: n for n,ni in enumerate(set(df['persons']))}
lug_boot_label = { ni: n for n,ni in enumerate(set(df['lug_boot']))}
safety_label = { ni: n for n,ni in enumerate(set(df['safety']))}
class_label = { ni: n for n,ni in enumerate(set(df['class']))}

In [4]:
df1 = df

In [5]:
df1['buying'] = df1['buying'].map(buying_label)
df1['maint'] = df1['maint'].map(maint_label)
df1['doors'] = df1['doors'].map(doors_label)
df1['persons'] = df1['persons'].map(persons_label)
df1['lug_boot'] = df1['lug_boot'].map(lug_boot_label)
df1['safety'] = df1['safety'].map(safety_label)
df1['class'] = df1['class'].map(class_label)

We can also use the label encoder option

###### Using LabelEncoder

from sklearn.preprocessing import LabelEncoder

lb=LabelEncoder()

df2 = df

for i in df2.columns:

df2[i]=lb.fit_transform(df2[i])

### Building the Model

To summarize we will be using 3 different ML algorithms

1. LogisticRegression
2. Naive Bayes
3. Multi-Layer Perceptron Classifier

In [6]:
# Split our dataset into training and test dataset
Xfeatures = df1[['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety']]
ylabels = df1['class']

X_train, X_test, y_train, y_test = train_test_split(Xfeatures, ylabels, test_size=0.30, random_state=7)

In [7]:
# Using LogisticRegression

logit = LogisticRegression()
logit.fit(X_train, y_train)

LogisticRegression()

In [8]:
# We can then check for the accuracy of our model using accuracy_score from sklearn.metrics.
print("Accuracy Score:",accuracy_score(y_test, logit.predict(X_test)))

Accuracy Score: 0.7148362235067437


In [9]:
# Using Neural Network

nn_clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1)
nn_clf.fit(X_train,y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


MLPClassifier(alpha=1e-05, hidden_layer_sizes=(5, 2), random_state=1,
              solver='lbfgs')

In [10]:
MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(5, 2), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=1, shuffle=True, solver='lbfgs',
              tol=0.0001, validation_fraction=0.1, verbose=False,
              warm_start=False)

MLPClassifier(alpha=1e-05, hidden_layer_sizes=(5, 2), random_state=1,
              solver='lbfgs')

In [11]:
print("Accuracy Score:",accuracy_score(y_test, nn_clf.predict(X_test)))

Accuracy Score: 0.8497109826589595


### Saving the Model For Our App

To build our ML app we will need to save or serialized our ML models and use them for the next section. You can use pickle or joblib, but you should make sure to use the same when loading/de-serializing your model.

In [12]:
# Save Models
import joblib

In [13]:
logit_model = open("logit_car_model.pkl","wb")
joblib.dump(logit,logit_model)
logit_model.close()

In [14]:
nn_clf_model = open("nn_clf_car_model.pkl","wb")
joblib.dump(nn_clf,nn_clf_model)
nn_clf_model.close()