In [47]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.svm import SVC
import pandas as pd
import os

In [3]:
data = {}
for file in os.listdir("./features"):
    df= pd.read_csv(os.path.join("features", file), index_col = 0)
    df["activity"] = file.split("_")[0]
    data[file.split(".")[0]] = df
    

In [4]:
train, test = data["bike_2"], data["bike_1"]
for key in data:
    if "bike" not in key:
        train = train.append(data[key][:int(len(data[key])*.7)], ignore_index = True)
        test = test.append(data[key][int(len(data[key])*.7):], ignore_index = True)
train_y, train_x = train["activity"], train.drop(columns = "activity")
test_y, test_x = test["activity"], test.drop(columns = "activity")

In [5]:
model = RandomForestClassifier()
model.fit(train_x, train_y)
pred = model.predict(test_x)
print(model.score(test_x, test_y))
confusion_matrix(pred, test_y, labels = ["bike", "walk", "car", "idle"])

0.970917225950783


array([[115,   5,   1,   2],
       [  3,  98,   2,   0],
       [  0,   0,  69,   0],
       [  0,   0,   0, 152]], dtype=int64)

In [5]:
model = GaussianNB()
model.fit(train_x, train_y)
pred = model.predict(test_x)
print(model.score(test_x, test_y))
confusion_matrix(pred, test_y, labels = ["bike", "walk", "car", "idle"])

0.9395973154362416


array([[115,  11,   2,   3],
       [  3,  92,   1,   0],
       [  0,   0,  69,   7],
       [  0,   0,   0, 144]], dtype=int64)

In [6]:
model = make_pipeline(StandardScaler(), LogisticRegression(max_iter = 1000))
model.fit(train_x, train_y)
pred = model.predict(test_x)
print(model.score(test_x, test_y))
confusion_matrix(pred, test_y, labels = ["bike", "walk", "car", "idle"])

0.9798657718120806


array([[116,   3,   0,   1],
       [  2, 100,   1,   0],
       [  0,   0,  71,   2],
       [  0,   0,   0, 151]], dtype=int64)

In [6]:
model = make_pipeline(StandardScaler(), SVC(kernel = "linear"))
model.fit(train_x, train_y)
pred = model.predict(test_x)
print(model.score(test_x, test_y))
confusion_matrix(pred, test_y, labels = ["bike", "walk", "car", "idle"])

0.9753914988814317


array([[116,   3,   0,   2],
       [  2, 100,   1,   0],
       [  0,   0,  71,   3],
       [  0,   0,   0, 149]], dtype=int64)

In [8]:
model = make_pipeline(StandardScaler(), KNeighborsClassifier())
model.fit(train_x, train_y)
pred = model.predict(test_x)
print(model.score(test_x, test_y))
confusion_matrix(pred, test_y, labels = ["bike", "walk", "car", "idle"])

0.9642058165548099


array([[117,   7,   2,   3],
       [  1,  96,   1,   0],
       [  0,   0,  68,   1],
       [  0,   0,   1, 150]], dtype=int64)

In [35]:
import NN 
import pickle
with open("nn_train.pickle", "rb") as f:
    nn_train = pickle.load(f)
with open("nn_test.pickle", "rb") as f:
    nn_test = pickle.load(f)
    

In [None]:
model = NN.train(nn_train)

  loss = criterion(outputs, torch.tensor(labels))


[1,   100] loss: 0.029
[1,   200] loss: 0.014
[2,   100] loss: 0.007
[2,   200] loss: 0.009
[3,   100] loss: 0.004
[3,   200] loss: 0.003
[4,   100] loss: 0.003
[4,   200] loss: 0.004
[5,   100] loss: 0.002
[5,   200] loss: 0.001
[6,   100] loss: 0.001
[6,   200] loss: 0.000
[7,   100] loss: 0.000
[7,   200] loss: 0.001


In [48]:
imp.reload(NN)
y_pred, label = NN.evaluate(model, nn_test)
print(accuracy_score(label, y_pred))
confusion_matrix(label, y_pred)

0.9523809523809523


array([[ 91,  10,   0,   0],
       [  4, 114,   0,   0],
       [  0,   2, 147,   3],
       [  1,   1,   0,  68]], dtype=int64)