In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import numpy as np
import plotly.express as px

In [14]:
metrics_df = pd.read_csv("data/metrics.csv")
metrics_df.head()

Unnamed: 0,Nom,Fichier source,Longueur de l'enregistrement (s),Nombre de pas,Fréquence des pas par fft (Hz),Fréquence des pas par comptage (Hz),Ecart-type de la durée des pas (s),Amplitude de l'accélération (g)
0,Corentin,accelerometer4_data.csv,8.98,13,1.670379,1.687764,0.086563,1.035185
1,Leo,accelerometer1_data.csv,8.99,14,1.779755,1.763908,0.032245,1.259968
2,Serge,accelerometer4_data.csv,8.99,14,1.668521,1.651842,0.119277,0.852011
3,Antoine,accelerometer6_data.csv,8.99,14,1.668521,1.690507,0.051615,1.268072
4,Antoine,accelerometer9_data.csv,9.98,13,1.603206,1.606426,0.056105,1.00965


In [15]:
members = list(metrics_df["Nom"].values)

In [16]:
def prepare_df(df):
    df = df.copy()
    df = df[[
        "Nom",
        "Fréquence des pas par fft (Hz)",
        "Fréquence des pas par comptage (Hz)",
        "Ecart-type de la durée des pas (s)",
        "Amplitude de l'accélération (g)"
        ]]
    X = df.drop(columns=["Nom"])
    X = StandardScaler().fit_transform(X)
    y = df["Nom"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    return X_train, X_test, y_train, y_test

member_or_other = lambda x, member : member if x == member else "Autre"

def specific_member(y_train, y_test, member):
    y_train = y_train.apply(lambda x: member_or_other(x, member))
    y_test = y_test.apply(lambda x: member_or_other(x, member))
    return y_train, y_test

In [17]:

X_train, X_test, y_train, y_test = prepare_df(metrics_df)
model = RandomForestClassifier()
scores = cross_val_score(model, X_train, y_train, cv=5)
print("Cross validation scores : ", scores)
print("Mean cross validation score : ", np.mean(scores))

Cross validation scores :  [0.6        0.6        0.55555556 0.55555556 0.77777778]
Mean cross validation score :  0.6177777777777778


In [18]:

model.fit(X_train, y_train)
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)
print("Train accuracy:", accuracy_score(y_train, y_train_pred))
print("Test accuracy:", accuracy_score(y_test, y_test_pred))
print("Confusion matrix:")
print(confusion_matrix(y_test, y_test_pred))


Train accuracy: 1.0
Test accuracy: 0.9166666666666666
Confusion matrix:
[[2 0 0 0 0 0]
 [0 1 0 0 0 0]
 [0 0 3 0 0 0]
 [0 0 0 2 0 0]
 [0 0 0 0 1 0]
 [0 1 0 0 0 2]]
