In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
import numpy as np
import plotly.express as px

In [6]:
metrics_df = pd.read_csv("data/metrics.csv")
metrics_df.head()

Unnamed: 0,Nom,Fichier source,Longueur de l'enregistrement (s),Nombre de pas,Fréquence des pas par fft (Hz),Fréquence des pas par comptage (Hz),Ecart-type de la durée des pas (s),Amplitude de l'accélération (g)
0,Corentin,accelerometer4_data.csv,8.98,13,1.670379,1.687764,0.086563,1.035185
1,Leo,accelerometer1_data.csv,8.99,14,1.779755,1.763908,0.032245,1.259968
2,Serge,accelerometer4_data.csv,8.99,14,1.668521,1.651842,0.119277,0.852011
3,Antoine,accelerometer6_data.csv,8.99,14,1.668521,1.690507,0.051615,1.268072
4,Antoine,accelerometer9_data.csv,9.98,13,1.603206,1.606426,0.056105,1.00965


In [7]:
members = list(metrics_df["Nom"].values)

In [None]:
def filter_members(df, member1, member2):
    if member1 not in members:
        raise ValueError(f"Member \"{member1}\" not found")
    if member2 not in members:
        raise ValueError("Member \"{member2}\" not found")
    result = df.query(f"Nom == '{member1}' or Nom == '{member2}'")
    return result

def make_scatter(df):
    colors = px.colors.qualitative.Plotly
    fig = px.scatter(df, x="Fréquence des pas par comptage (Hz)", y="Amplitude de l'accélération (g)", color="Nom", hover_data={"Fichier source": True}, color_discrete_sequence=colors)
    fig.update_traces(marker=dict(size=15))
    fig.update_layout(
        title="Fréquence des pas par comptage en fonction de l'amplitude de l'accélération",
        xaxis_title="Fréquence des pas par comptage (Hz)",
        yaxis_title="Amplitude de l'accélération (g)",
        hovermode="closest",
    )
    return fig
  
filtered_df = filter_members(metrics_df, "Matthieu", "Leo")
fig = make_scatter(filtered_df)
fig      

ValueError: 
    Invalid value of type 'builtins.str' received for the 'textposition' property of scatter
        Received value: 'to  p center'

    The 'textposition' property is an enumeration that may be specified as:
      - One of the following enumeration values:
            ['top left', 'top center', 'top right', 'middle left',
            'middle center', 'middle right', 'bottom left', 'bottom
            center', 'bottom right']
      - A tuple, list, or one-dimensional numpy array of the above

In [None]:
def prepare_df(df):
    df = df.copy()
    df = df[[
        "Nom",
        "Fréquence des pas par fft (Hz)",
        "Fréquence des pas par comptage (Hz)",
        "Ecart-type de la durée des pas (s)",
        "Amplitude de l'accélération (g)"
        ]]
    X = df.drop(columns=["Nom"])
    X = StandardScaler().fit_transform(X)
    y = df["Nom"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    return X_train, X_test, y_train, y_test



In [None]:

filtered_df = filter_members(metrics_df, "Matthieu", "Leo")
X_train, X_test, y_train, y_test = prepare_df(metrics_df)
model = RandomForestClassifier()
scores = cross_val_score(model, X_train, y_train, cv=5)
print("Cross validation scores : ", scores)
print("Mean cross validation score : ", np.mean(scores))

In [None]:

model.fit(X_train, y_train)
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)
print("Train accuracy:", accuracy_score(y_train, y_train_pred))
print("Test accuracy:", accuracy_score(y_test, y_test_pred))
print("Confusion matrix:")
print(confusion_matrix(y_test, y_test_pred))
