In [1]:
import pandas as pd
import pickle
import numpy as np 
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score , classification_report, confusion_matrix
import seaborn as sns
from sklearn.model_selection import train_test_split  
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.model_selection import train_test_split  


### data prep

In [2]:
data=pd.read_csv("data/cleaned_data.csv")
data

Unnamed: 0,Acc_x,Acc_y,Acc_z,Gyro_x,Gyro_y,Gyro_z,Degree_rotation
0,-131,-2039,-126,-16,-4,-6,0
1,-135,-2038,-119,-18,-4,-7,0
2,-131,-2039,-127,-17,-3,-7,0
3,-132,-2035,-118,-18,-4,-7,0
4,-131,-2039,-128,-18,-4,-7,0
...,...,...,...,...,...,...,...
72770,1,-2043,-85,-20,-2,-1,250
72771,10,-2046,-91,-21,-1,0,250
72772,11,-2045,-95,-19,-3,1,250
72773,9,-2046,-89,-19,-4,-1,250


In [3]:
def grouping_(x):
    if x<=30:
        return 0
    elif 30<x<300:
        return 50
    else:
        return 100

In [19]:
data['Degree_rotation'] = data['Degree_rotation'].apply(lambda x :grouping_(x))
data

### data preprocessing

In [18]:
# example of oversampling a multi-class classification dataset

x= data.iloc[:,:-1].values  
y= data.iloc[:,-1].values 
# transform the dataset
oversample = SMOTE()
x, y = oversample.fit_resample(x, y)
# summarize distribution
counter = Counter(y)
for k,v in counter.items():
 per = v / len(y) * 100
 print('Class=%d, n=%d (%.3f%%)' % (k, v, per))
# plot the distribution
plt.bar(counter.keys(), counter.values())
plt.show()

### model training

In [6]:
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.25, random_state=0, shuffle=True,stratify=y)

In [20]:
svm=SVC(kernel='rbf',gamma=0.5,C=1.0)
svm.fit(x_train,y_train)

# Evaluation

In [21]:
y_pred_svm= svm.predict(x_test)  
y_pred_svm

In [22]:
accuracy = accuracy_score(y_test, y_pred_svm)
print("Accuracy Score:", accuracy)

In [23]:
import numpy as np
from sklearn.metrics import confusion_matrix

def accuracy_for_each_element(confusion_matrix):
    num_classes = len(confusion_matrix)
    accuracy_matrix = np.zeros((num_classes, num_classes), dtype=float)
    for i in range(num_classes):
        for j in range(num_classes):
            accuracy_matrix[i, j] = round(cm[i, j]/cm.sum(axis=1)[i], 2)
    return accuracy_matrix

cm = confusion_matrix(y_test, y_pred_svm)
# print('Confusion matrix:\n', cm)

accuracy_matrix = accuracy_for_each_element(cm)
# print("\nAccuracy Matrix for Each Element:", accuracy_matrix)

group_counts = [value for value in accuracy_matrix.flatten()]

labels = [v1 for v1 in group_counts]
labels = np.asarray(labels).reshape(3,3)

ht=sns.heatmap(cm, annot=labels, fmt='', cmap='Blues')
ht.set_xticklabels(['0', '50', '100'])
ht.set_yticklabels(['0', '50', '100'])

plt.title(f'SVM Model Size - 6.2MB\n{round(accuracy*100, 2)} accuracy')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

In [11]:
print(classification_report(y_test, y_pred_svm))

              precision    recall  f1-score   support

           0       0.87      0.95      0.91     15797
          50       0.93      0.76      0.84     15796
         100       0.90      1.00      0.95     15796

    accuracy                           0.90     47389
   macro avg       0.90      0.90      0.90     47389
weighted avg       0.90      0.90      0.90     47389



### save model

In [12]:
f='model_zoo/svm.pkl'
pickle.dump(svm,open(f,'wb'))

In [13]:
model=pickle.load(open('model_zoo/svm.pkl','rb'))

### model testing

In [14]:
input_data = np.array([-131,-2037,-127,-17,-6,-7])

In [15]:
result= model.predict([input_data])

In [16]:
result[0]

50