In [1]:
import pandas as pd 
import seaborn as sns
import numpy as np 
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split   
from sklearn.metrics import confusion_matrix  
from sklearn.metrics import accuracy_score
import pickle
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report
from collections import Counter
from imblearn.over_sampling import SMOTE

### data prep

In [22]:
data=pd.read_csv("data/cleaned_data.csv")
data

In [19]:
data['Degree_rotation'].value_counts()

In [4]:
def grouping_(x):
    if x<=30:
        return 0
    elif 30<x<300:
        return 50
    else:
        return 100
    
data['Degree_rotation'] = data['Degree_rotation'].apply(lambda x :grouping_(x))

In [20]:
data['Degree_rotation'].value_counts()

In [21]:
data

In [7]:
x= data.iloc[:,:-1].values  
y= data.iloc[:,-1].values  

### data preprocessing

In [18]:
# transform the dataset
oversample = SMOTE()
x, y = oversample.fit_resample(x, y)
# summarize distribution
counter = Counter(y)
for k,v in counter.items():
    per = v / len(y) * 100
    print('Class=%d, n=%d (%.3f%%)' % (k, v, per))
# plot the distribution
plt.bar(counter.keys(), counter.values())
plt.show()

In [9]:
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.25, random_state=0, shuffle=True,stratify=y)

### Model training

In [23]:
NV=GaussianNB()
NV.fit(x_train,y_train)

### Model evaluation

In [24]:
y_pred_NV= NV.predict(x_test)  
y_pred_NV

In [12]:
accuracy = accuracy_score(y_test, y_pred_NV)
# print("Accuracy Score:", accuracy)

In [13]:
print(classification_report(y_test, y_pred_NV))

              precision    recall  f1-score   support

           0       0.52      0.76      0.62     15797
          50       0.47      0.33      0.39     15796
         100       0.79      0.65      0.72     15796

    accuracy                           0.58     47389
   macro avg       0.59      0.58      0.57     47389
weighted avg       0.59      0.58      0.57     47389



In [25]:
import numpy as np
from sklearn.metrics import confusion_matrix

def accuracy_for_each_element(confusion_matrix):
    num_classes = len(confusion_matrix)
    accuracy_matrix = np.zeros((num_classes, num_classes), dtype=float)
    for i in range(num_classes):
        for j in range(num_classes):
            accuracy_matrix[i, j] = round(cm[i, j]/cm.sum(axis=1)[i], 2)
    return accuracy_matrix

cm = confusion_matrix(y_test, y_pred_NV)
# print('Confusion matrix:\n', cm)

accuracy_matrix = accuracy_for_each_element(cm)
# print("\nAccuracy Matrix for Each Element:", accuracy_matrix)

group_counts = [value for value in accuracy_matrix.flatten()]

labels = [v1 for v1 in group_counts]
labels = np.asarray(labels).reshape(3,3)

ht=sns.heatmap(cm, annot=labels, fmt='', cmap='Blues')
ht.set_xticklabels(['0', '50', '100'])
ht.set_yticklabels(['0', '50', '100'])

plt.title(f'Naive Bayes Model Size - 990 Bytes\n{round(accuracy*100, 2)} accuracy')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

### save model

In [15]:
f='model_zoo/naive_bayes.pkl'
pickle.dump(NV,open(f,'wb'))

### model testing

In [16]:
model=pickle.load(open('model_zoo/naive_bayes.pkl','rb'))
input_data = np.array([-722,-1918,-194,-15,-8,-1])
result= model.predict([input_data])

In [17]:
result[0]

100