In [1]:
import pandas as pd 
from sklearn.model_selection import train_test_split  
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score , classification_report, confusion_matrix
import pickle
import numpy as np 
from imblearn.over_sampling import SMOTE
import matplotlib.pyplot as plt
from collections import Counter
import seaborn as sns

In [2]:
data = pd.read_csv('data/cleaned_data.csv')
data

Unnamed: 0,Acc_x,Acc_y,Acc_z,Gyro_x,Gyro_y,Gyro_z,Degree_rotation
0,-131,-2039,-126,-16,-4,-6,0
1,-135,-2038,-119,-18,-4,-7,0
2,-131,-2039,-127,-17,-3,-7,0
3,-132,-2035,-118,-18,-4,-7,0
4,-131,-2039,-128,-18,-4,-7,0
...,...,...,...,...,...,...,...
72770,1,-2043,-85,-20,-2,-1,250
72771,10,-2046,-91,-21,-1,0,250
72772,11,-2045,-95,-19,-3,1,250
72773,9,-2046,-89,-19,-4,-1,250


### data prep

In [3]:
def grouping_(x):
    if x<=30:
        return 0
    elif 30<x<280:
        return 1
    elif 280<x<360:
        return 2
    
data['Degree_rotation'] = data['Degree_rotation'].apply(lambda x :grouping_(x))

In [4]:
x= data.iloc[:,:-1].values  
y= data.iloc[:,-1].values  

### data preprocessing

In [13]:
# transform the dataset
oversample = SMOTE()
x, y = oversample.fit_resample(x, y)
# summarize distribution
counter = Counter(y)
for k,v in counter.items():
    per = v / len(y) * 100
    print('Class=%d, n=%d (%.3f%%)' % (k, v, per))
# plot the distribution
plt.bar(counter.keys(), counter.values())
plt.show()

In [6]:
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.2, random_state=0, shuffle=True, stratify=y)

### Model training

In [14]:
# fit model no training data
XG_model = XGBClassifier()
XG_model.fit(x_train, y_train)

### Model Evaluation

In [8]:
# make predictions for test data
y_pred_XG = XG_model.predict(x_test)

In [15]:
accuracy = accuracy_score(y_test, y_pred_XG)
print("Accuracy Score:", accuracy)
print()
print(classification_report(y_test, y_pred_XG))

Accuracy Score: 0.8633641950884967

              precision    recall  f1-score   support

           0       0.80      0.93      0.86     12637
           1       0.88      0.69      0.77     12637
           2       0.92      0.97      0.95     12637

    accuracy                           0.86     37911
   macro avg       0.87      0.86      0.86     37911
weighted avg       0.87      0.86      0.86     37911



In [25]:
import numpy as np
from sklearn.metrics import confusion_matrix

def accuracy_for_each_element(confusion_matrix):
    num_classes = len(confusion_matrix)
    accuracy_matrix = np.zeros((num_classes, num_classes), dtype=float)
    for i in range(num_classes):
        for j in range(num_classes):
            accuracy_matrix[i, j] = round(cm[i, j]/cm.sum(axis=1)[i], 2)
    return accuracy_matrix

cm = confusion_matrix(y_test, y_pred_XG)
# print('Confusion matrix:\n', cm)

accuracy_matrix = accuracy_for_each_element(cm)
# print("\nAccuracy Matrix for Each Element:", accuracy_matrix)

group_counts = [value for value in accuracy_matrix.flatten()]

labels = [v1 for v1 in group_counts]
labels = np.asarray(labels).reshape(3,3)

ht=sns.heatmap(cm, annot=labels, fmt='', cmap='Blues')
ht.set_xticklabels(['0', '50', '100'])
ht.set_yticklabels(['0', '50', '100'])

plt.title(f'XGBoost Model Size - 1.1MB\n{round(accuracy*100, 2)} accuracy')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

### Save model

In [11]:
f='model_zoo/xg_classifier.pkl'
pickle.dump(XG_model,open(f,'wb'))

### Model testing

In [12]:
model=pickle.load(open('model_zoo/xg_classifier.pkl','rb'))
input_data = np.array([-648,-1938,-126,-19,-4,1])
result= model.predict([input_data])
if result[0]==0:
    print(0)
elif result[0]==1:
    print(50)
else:
    print(100)

50
