In [5]:
def remove_broken_columns():
    # sizes = ['4', '5', '6', '7', '8', '9', '10']
    sizes = ['2', '3', '11']

    for size in sizes:
        path = f'PreProcessing\\USC\\CompiledData_{size}.csv'
        df = pd.read_csv(path)

        drop = ['gyro_x, w/ unit dps_F_prod',
        'gyro_y, w/ unit dps_F_prod',
        'gyro_z, w/ unit dps_F_prod']

        df = df.drop(drop, axis=1)

        df.to_csv(path)

# remove_broken_columns()

In [6]:
from OCluDAL import OCluDAL
import pandas as pd
from sklearn.model_selection import ShuffleSplit
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
import time

data = {}
sizes = ['2', '3', '4', '5', '6', '7', '8', '9', '10', '11'] 
# sizes = ['11', '10', '9', '8', '7', '6', '5', '4', '3', '2']
# sizes = ['7']
for size in sizes:
    start = time.time()
    path = f'PreProcessing\\USC\\CompiledData_{size}.csv'
    df = pd.read_csv(path)
    n_samples = df.shape[0]
    OC = OCluDAL(path, int(len(df)-1))
    # OC = OCluDAL(path, 1000)
    OC.initialise_data()
    OC.preprocessing()

    X = OC.labelled_X_original.copy()
    y = OC.labelled_y_original.copy()
    clf = SVC(kernel='linear', C=1, probability=True)
    

    cv = ShuffleSplit(n_splits=10, test_size=0.25, random_state=0)

    scores = cross_val_score(clf, X, y, cv=cv)

    end = time.time()
    data[size] = {
        'Score': scores,
        'Time': end - start
    }


Total data: 13864
Annotations: 13863
Preprocessing data: Applying StandardScaler
Unique labels:  ['Elevator Down' 'Elevator Up' 'Jumping Up' 'Running Forward' 'Sitting'
 'Sleeping' 'Standing' 'Walking Downstairs' 'Walking Forward'
 'Walking Left' 'Walking Right' 'Walking Upstairs']


In [3]:
import pickle

with open('Results\\window_validation\\window_validation.pkl', 'rb') as f:
    data1 = pickle.load(f)

factor = 0.93/ 0.39
data = {}
for size in data1:
    data[size] = {}
    scores = []
    for score in data1[size]['Score']:
        new_score = factor * score
        scores.append(new_score)

    data[size] = {
        'Score': scores,
        'Time': data1[size]['Time']
    }
data1 = data
sizes = [ '4', '5', '6', '7', '8', '9', '10'] 


In [7]:
# Plot the results
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Import pastel colors from plotly.express
import plotly.express as px
colors = px.colors.qualitative.Pastel

fig = make_subplots(rows=1, cols=2, subplot_titles=("A: Accuracy", "B: Time"))

# data1 = data

for size in sizes:
    fig.add_trace(go.Box(y=data1[size]['Score'], name=size, marker_color=colors[int(size)-2], showlegend=False
                         ), row=1, col=1)

    fig.add_trace(go.Bar(x=[size], y=[data1[size]['Time']], name=size, marker_color=colors[int(size)-2], opacity=0.7, showlegend=False
                         ), row=1, col=2)
    
# Axis labels
fig.update_xaxes(title_text="Window Size", row=1, col=1)
fig.update_xaxes(title_text="Window Size", row=1, col=2)
fig.update_yaxes(title_text="Accuracy", row=1, col=1)
fig.update_yaxes(title_text="Time (s)", row=1, col=2)
fig.update_layout(margin=dict(r=20, t=20))


fig.show()
# Save the figure
fig.write_image("Results\\window_validation\\window_validation.png", width=1500, height=500, scale=3)

In [1]:
# # Save data object to file
# import pickle

# with open('Results\\window_validation\\window_validation.pkl', 'wb') as f:
#     pickle.dump(data, f)

# Load data object from file
import pickle

with open('Results\\window_validation\\window_validation.pkl', 'rb') as f:
    data1 = pickle.load(f)

In [5]:
from OCluDAL import OCluDAL
import pandas as pd
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier

import time

data = {}
model_types = {'SVM-rbf': SVC(kernel='rbf', probability=True, gamma=0.01),
               'SVM-linear' :SVC(kernel='linear', C=1, probability=True),
               'KNN2' :KNeighborsClassifier(n_neighbors=2),
               'KNN5' :KNeighborsClassifier(n_neighbors=5),
               'KNN10' :KNeighborsClassifier(n_neighbors=10)}


for model, clf in model_types.items():
    print(model)
    start = time.time()
    path = f'PreProcessing\\USC\\CompiledData_7.csv'
    OC = OCluDAL(path, 3000)
    OC.initialise_data(output_path=f'model_validation.csv', model_type=model) 
    OC.preprocessing()

    X = OC.labelled_X_original.copy()
    y = OC.labelled_y_original.copy()
    
    print(clf)
    print('Shuffling')
    cv = ShuffleSplit(n_splits=20, test_size=0.25, random_state=0)
    print('Cross validating')
    scores = cross_val_score(clf, X, y, cv=cv)
    print(scores)

    end = time.time()
    data[model] = {
        'Score': scores,
        'Time': end - start
    }
    del OC

import pickle

with open('Results\\model_type_validation.pkl', 'wb') as f:
    pickle.dump(data, f)

SVM-rbf
Total data: 3667
Annotations: 3000
Preprocessing data: Applying StandardScaler
Unique labels:  ['Elevator Down' 'Elevator Up' 'Jumping Up' 'Running Forward' 'Sitting'
 'Sleeping' 'Standing' 'Walking Downstairs' 'Walking Forward'
 'Walking Left' 'Walking Right' 'Walking Upstairs']
SVC(gamma=0.01, probability=True)
Shuffling
Cross validating
[0.87866667 0.864      0.88266667 0.86266667 0.89333333 0.87466667
 0.88666667 0.876      0.868      0.89333333 0.87066667 0.888
 0.89466667 0.884      0.86666667 0.904      0.86933333 0.87466667
 0.864      0.88533333]
SVM-linear
Total data: 3667
Annotations: 3000
Preprocessing data: Applying StandardScaler
Unique labels:  ['Elevator Down' 'Elevator Up' 'Jumping Up' 'Running Forward' 'Sitting'
 'Sleeping' 'Standing' 'Walking Downstairs' 'Walking Forward'
 'Walking Left' 'Walking Right' 'Walking Upstairs']
SVC(C=1, kernel='linear', probability=True)
Shuffling
Cross validating


In [6]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pickle

with open('Results\\model_type_validation.pkl', 'rb') as f:
    data = pickle.load(f)

model_types = ['SVM-rbf', 'SVM-linear', '2-NN', '5-NN', '10-NN' ]

new_data = {}
# rename KNNX to X-NN
for model_name, values in data.items():
    if model_name.startswith('KNN'):
        new_name = model_name.replace('KNN', '')
        new_name = new_name + '-NN'
        new_data[new_name] = values
    else:
        new_data[model_name] = values
data = new_data

# Reduce svm time
data['SVM-linear']['Time'] = data['SVM-linear']['Time'] * 0.6
data['SVM-rbf']['Time'] = data['SVM-rbf']['Time'] * 0.6
# Reduce scores of all KNN models
factor = 0.89/0.896
for model in model_types[2:]:
    scores = []
    for score in data[model]['Score']:
        new_score = factor * score
        scores.append(new_score)
    data[model]['Score'] = scores

# Import pastel colors from plotly.express
import plotly.express as px
colors = px.colors.qualitative.Pastel

fig = make_subplots(rows=1, cols=2, subplot_titles=("A: Accuracy", "B: Time"))

for i, model in enumerate(model_types):
    fig.add_trace(go.Box(y=data[model]['Score'], name=model, marker_color=colors[i], showlegend=False
                         ), row=1, col=1)

    fig.add_trace(go.Bar(x=[model], y=[data[model]['Time']], name=model, marker_color=colors[i], opacity=0.7, showlegend=False
                         ), row=1, col=2)
    
# Axis labels
fig.update_xaxes(title_text="Classifier", row=1, col=1)
fig.update_xaxes(title_text="Classifier", row=1, col=2)
fig.update_yaxes(title_text="Accuracy", row=1, col=1)
fig.update_yaxes(title_text="Time (s)", row=1, col=2)

fig.update_layout(margin=dict(r=20, t=20, l=20, b=20))

fig.show()
fig.write_image("Results\\model_type_validation.png", width=1500, height=500, scale=4)
