In [41]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns 
import pickle

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
# from sklearn.linear_model import LogisticRegression 
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, confusion_matrix, f1_score, accuracy_score, roc_curve, auc

In [42]:
data = pd.read_csv('hapticObjectClassifier.csv')
print(data)

# 1002 we can modify the dataset to have actual labels (i.e. ball, water bottle, cube) 
# and then use sklearn preprocessing.LabelEncoder to convert string labels into numbers
#data.head()
X = data.iloc[:, :6].values
y = data['object '].values

    sensor1   sensor2   sensor3  sensor4   sensor5   sensor6   object 
0        224       232      233       238       240       266        1
1        240       230      230       231       233       223        0
2        267       228      227       221       220       221        2
3        229       237      231       238       230       262        1
4        240       233      232       240       236       230        0
5        263       224      230       222       223       227        2
6        221       239      236       236       230       250        1
7        231       234      234       236       234       223        0
8        268       229      223       220       229       230        2
9        228       230      239       230       236       260        1
10       232       233      234       235       232       222        0
11       260       223      221       229       220       226        2
12       225       240      232       240       236       256        1
13    

In [43]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)
scaler = MinMaxScaler()
X_train = scaler.fit_transform(x_train)
X_test = scaler.transform(x_test)




In [44]:
lr_list = [0.05, 0.075, 0.1, 0.25, 0.5, 0.75, 1]

for learning_rate in lr_list:
    gb_clf = GradientBoostingClassifier(n_estimators=20, learning_rate=learning_rate, max_features=2, max_depth=2, random_state=0)
    gb_clf.fit(X_train, y_train)

    print("Learning rate: ", learning_rate)
    print("\tAccuracy score (training): {0:.8f}".format(gb_clf.score(X_train, y_train)))
    print("\tAccuracy score (validation): {0:.8f}".format(gb_clf.score(X_test, y_test)))
    scores = cross_val_score(gb_clf, X_train, y_train, cv=5, scoring='f1_weighted')
    print("\tCross Validation score: ", scores)


Learning rate:  0.05
	Accuracy score (training): 1.00000000
	Accuracy score (validation): 0.92857143
	Cross Validation score:  [1. 1. 1. 1. 1.]
Learning rate:  0.075
	Accuracy score (training): 1.00000000
	Accuracy score (validation): 0.92857143
	Cross Validation score:  [1. 1. 1. 1. 1.]
Learning rate:  0.1
	Accuracy score (training): 1.00000000
	Accuracy score (validation): 0.92857143
	Cross Validation score:  [1. 1. 1. 1. 1.]
Learning rate:  0.25
	Accuracy score (training): 1.00000000
	Accuracy score (validation): 0.92857143
	Cross Validation score:  [1. 1. 1. 1. 1.]
Learning rate:  0.5
	Accuracy score (training): 1.00000000
	Accuracy score (validation): 0.92857143
	Cross Validation score:  [1. 1. 1. 1. 1.]
Learning rate:  0.75
	Accuracy score (training): 1.00000000
	Accuracy score (validation): 0.92857143
	Cross Validation score:  [1. 1. 1. 1. 1.]
Learning rate:  1
	Accuracy score (training): 1.00000000
	Accuracy score (validation): 0.92857143
	Cross Validation score:  [1. 1. 1. 1. 

In [45]:
gb_clf2 = GradientBoostingClassifier(n_estimators=20, learning_rate=0.5, max_features=2, max_depth=2, random_state=0)
gb_clf2.fit(X_train, y_train)
predictions = gb_clf2.predict(X_test)

print("Confusion Matrix:")
print(confusion_matrix(y_test, predictions))

print("Classification Report")
print(classification_report(y_test, predictions))

Confusion Matrix:
[[3 1 0]
 [0 4 0]
 [0 0 6]]
Classification Report
              precision    recall  f1-score   support

           0       1.00      0.75      0.86         4
           1       0.80      1.00      0.89         4
           2       1.00      1.00      1.00         6

    accuracy                           0.93        14
   macro avg       0.93      0.92      0.92        14
weighted avg       0.94      0.93      0.93        14



## K-Nearest Neighbors Classifier

In [46]:
knn = KNeighborsClassifier(n_neighbors=3)

knn.fit(x_train, y_train)
x_train.shape

(40, 6)

In [47]:
y_test.shape

(14,)

In [48]:

y_pred_train = knn.predict(X_train)
y_pred_test = knn.predict(X_test)
# clf = LogisticRegression()
print(f'y_pred_test\t\t: {y_pred_test}')
print(f'y_test     \t\t: {y_train}')


print(f'Accuracy_train: {accuracy_score(y_test, y_pred_test):.4f}')
print(f'KNN accuracy_train: {knn.score(X_test, y_test)}')

print(f'Accuracy_train: {accuracy_score(y_train, y_pred_train):.4f}')
print(f'KNN accuracy_train: {knn.score(X_train, y_train)}')

print(f"F1 score: {f1_score(y_test, y_pred_test, average='micro')}")

y_pred_test		: [2 2 2 2 2 2 2 2 2 2 2 2 2 2]
y_test     		: [0 0 0 1 1 1 2 1 1 0 1 1 0 2 1 2 1 0 0 2 0 0 1 2 2 1 2 2 0 0 0 1 2 2 0 1 2
 0 1 2]
Accuracy_train: 0.4286
KNN accuracy_train: 0.42857142857142855
Accuracy_train: 0.3000
KNN accuracy_train: 0.3
F1 score: 0.42857142857142855


In [49]:
fpr, tpr, thresholds = roc_curve(y_test, y_test, pos_label=2)
auc(fpr, tpr)

1.0

In [50]:
prediction = []
for i in range(11):
    p = knn.predict(x_test[i].reshape(1,-1))
    prediction.append(p[0])

print(prediction)

[0, 0, 1, 1, 2, 2, 2, 0, 1, 2, 0]


In [51]:
with open('model.pkl', 'wb') as model_file:
  pickle.dump(gb_clf2, model_file)

In [52]:
loaded_model = pickle.load(open("model.pkl", 'rb'))
result = loaded_model.score(x_train, y_train)
print(result)

0.35


In [53]:
'''x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
knn = KNeighborsClassifier(n_neighbors=3)

knn.fit(x_train, y_train)
clf = LogisticRegression()
clf.fit(x_train, y_train)

print(knn.score(x_test, y_test))
print(clf.score(x_test, y_test))'''

'x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)\nknn = KNeighborsClassifier(n_neighbors=3)\n\nknn.fit(x_train, y_train)\nclf = LogisticRegression()\nclf.fit(x_train, y_train)\n\nprint(knn.score(x_test, y_test))\nprint(clf.score(x_test, y_test))'

In [54]:
'''prediction = []
for i in range(11):
    p = knn.predict(x_test[i].reshape(1,-1))
    prediction.append(p[0])

print(prediction)'''

'prediction = []\nfor i in range(11):\n    p = knn.predict(x_test[i].reshape(1,-1))\n    prediction.append(p[0])\n\nprint(prediction)'

In [55]:
# with open('model.pkl', 'wb') as model_file:
#   pickle.dump(knn, model_file)