# Machine Learning Models

In [1]:
import pandas as pd

# sklearn 10FCV
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict

# Classification report
from sklearn.metrics import classification_report


## Naive Bayes

In [3]:
from sklearn.naive_bayes import GaussianNB

filename = 'Delivery_cleaned_attributes_space.csv'

colNames = [
'PN',
'TSLED',
'noDelExp',
'price',
'class',
]

data = pd.read_csv(filename, delim_whitespace=True, names=colNames)
array = data.values

X = array[:,0:4]
Y = array[:,4]

num_folds = 10
seed = 1

print("Naive Bayes:\n------------------------------------")
kfold = KFold(n_splits=num_folds)
model = GaussianNB()

print("")
# over all classification report
y_pred = cross_val_predict(model, X, Y, cv=10)
report = classification_report(Y, y_pred)
print(report)

Naive Bayes:
------------------------------------

              precision    recall  f1-score   support

         0.0       0.69      0.83      0.75      9562
         1.0       0.46      0.29      0.35      4908

    accuracy                           0.64     14470
   macro avg       0.58      0.56      0.55     14470
weighted avg       0.61      0.64      0.62     14470



## Decision Tree

In [4]:
from pandas import read_csv

from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
import matplotlib.pyplot as plt

filename = 'Delivery_cleaned_attributes.csv'

colNames = [
'PN',
'TSLED',
'noDelExp',
'price',
'class',
]

dataframe = read_csv(filename, names=colNames)
array = dataframe.values

X = array[:,0:4]
Y = array[:,4]

# Folds and seed`23
num_folds = 10
seed = 1

# Initial test
print("Decision Tree:\n------------------------------------")
kfold = KFold(n_splits=num_folds)
model = DecisionTreeClassifier()

print("")
# over all classification report
y_pred = cross_val_predict(model, X, Y, cv=10)
report = classification_report(Y, y_pred)
print(report)


Decision Tree:
------------------------------------

              precision    recall  f1-score   support

         0.0       0.35      0.21      0.26      9562
         1.0       0.13      0.22      0.16      4908

    accuracy                           0.22     14470
   macro avg       0.24      0.22      0.21     14470
weighted avg       0.27      0.22      0.23     14470



## Support Vector Machines

In [9]:
from sklearn.model_selection import cross_val_score
from sklearn import svm
from sklearn.model_selection import train_test_split

filename = 'Delivery_cleaned_attributes.csv'

colNames = [
'PN',
'TSLED',
'noDelExp',
'price',
'class',
]

dataframe = read_csv(filename, names=colNames)
array = dataframe.values

X = array[:,0:4]
Y = array[:,4]

# Normalize the attributes
norX = X / X.max(axis=0)

# Folds and seed
num_folds = 10
seed = 1


print("SVM:\n------------------------------------")
kfold = KFold(n_splits=num_folds)
model = svm.LinearSVR()
results = cross_val_score(model, norX, Y, cv=kfold)

print("")
# over all classification report
y_pred = cross_val_predict(model, norX, Y)
report = classification_report(Y, y_pred.round())
print(report)

SVM:
------------------------------------





              precision    recall  f1-score   support

         0.0       0.67      0.97      0.79      9562
         1.0       0.49      0.05      0.09      4908

    accuracy                           0.66     14470
   macro avg       0.58      0.51      0.44     14470
weighted avg       0.61      0.66      0.55     14470

