In [28]:
import pandas as pd
import numpy as np
from collections import Counter
import seaborn as sns
import matplotlib.pyplot as plt
import itertools
import math
from datetime import datetime

In [29]:
data_path = '..//DATA//DatasetExos.csv'

In [30]:
def load_data(path):
    try:
        data = pd.read_csv(path , sep=',')
        return data
    except Exception as e:
      print(f"Une erreur s'est produite : {e}")

In [31]:
data = load_data(data_path)

In [32]:
data.head()

Unnamed: 0,ep (ms),Acc_x,Acc_y,Acc_z,Gyro_x,Gyro_y,Gyro_z,ID,Exercise,Category,Set
0,2019-01-11 15:08:05.200,0.0135,0.977,-0.071,-1.8904,2.4392,0.9388,B,bench,heavy,30.0
1,2019-01-11 15:08:05.400,-0.0015,0.9705,-0.0795,-1.6826,-0.8904,2.1708,B,bench,heavy,30.0
2,2019-01-11 15:08:05.600,0.001333,0.971667,-0.064333,2.5608,-0.256,-1.4146,B,bench,heavy,30.0
3,2019-01-11 15:08:05.800,-0.024,0.957,-0.0735,8.061,-4.5244,-2.073,B,bench,heavy,30.0
4,2019-01-11 15:08:06.000,-0.028,0.957667,-0.115,2.439,-1.5486,-3.6098,B,bench,heavy,30.0


In [33]:
data.columns

Index(['ep (ms)', 'Acc_x', 'Acc_y', 'Acc_z', 'Gyro_x', 'Gyro_y', 'Gyro_z',
       'ID', 'Exercise', 'Category', 'Set'],
      dtype='object')

In [34]:
data = data.dropna()

In [35]:
data = data.replace({'heav': 'heavy', 'raw': 'row' , 'reste':'rest'})

In [None]:
data = data.drop(columns=['ep (ms)','ID'])	

In [37]:
def is_numeric(value):
    return isinstance(value, (int, float, np.number))


In [38]:
def combined_distance(instance1, instance2):
    """
    Calcule la distance combinée (Manhattan + Hamming) entre deux instances.
    """
    manhattan_distance = 0
    hamming_distance = 0

    for val1, val2 in zip(instance1, instance2):
        # Convertir les dates en timestamps si nécessaire
        if isinstance(val1, str) and isinstance(val2, str):
            try:
                val1 = datetime.strptime(val1, '%Y-%m-%d %H:%M:%S.%f').timestamp()
                val2 = datetime.strptime(val2, '%Y-%m-%d %H:%M:%S.%f').timestamp()
            except ValueError:
                pass  # Pas une date valide, traiter comme catégorique

        if is_numeric(val1) and is_numeric(val2):
            manhattan_distance += abs(val1 - val2)
        else:
            hamming_distance += val1 != val2

    return manhattan_distance + hamming_distance

In [39]:
# combined_distance(instance1, instance2)

In [40]:
def sort_data(distances):
    return sorted(distances, key=lambda x: x[0])

In [70]:
def class_maj(sorted_data, k):
    
    # Obtenir les classes des k plus proches voisins
    k_neighbors = sorted_data[:k]
    classes = [neighbor[1] for neighbor in k_neighbors]
    print(f" la liste de k_neighbors{classes}")
    # Retourner la classe majoritaire
    predicted_class = Counter(classes).most_common(1)[0][0]
    
    return predicted_class
    

In [71]:
def KNN(data, instance, k):
    # Initialiser une liste pour stocker les distances
    distances = []
    
    # Exclure la colonne de classe du dataset
    class_column = 'Exercise'
    feature_columns = data.columns.difference([class_column])

    # Calcul des distances
    for index, row in data.iterrows():
        dist = combined_distance(instance, row[feature_columns])
        distances.append((dist, row[class_column]))  # Inclure la classe réelle dans la liste des distances
    print(f" la liste des distances {distances}")
    # Trier par distance
    sorted_data = sort_data(distances)
    print(f"sorted data : {sorted_data}")

    predicted_class = class_maj(sorted_data, k)
    
    return predicted_class

In [72]:
instance1 = ['2024-11-20 18:09:51.000', -0.137, 1.066, 0.8215, -6.597, 0.808, 1.985, 'B', 'medium', 30]
k = 3

predicted_class = KNN(data, instance1, k)
print(f"Classe prédite : {predicted_class}")

 la liste des distances [(13.635, 'bench'), (13.0516, 'bench'), (19.8604, 'bench'), (30.2819, 'bench'), (23.263066666666667, 'bench'), (26.206400000000002, 'bench'), (71.99773333333333, 'bench'), (50.8341, 'bench'), (11.498333333333331, 'bench'), (57.9718, 'bench'), (24.64126666666667, 'bench'), (78.4154, 'bench'), (21.009933333333336, 'bench'), (24.2661, 'bench'), (65.3916, 'bench'), (33.3792, 'bench'), (9.4969, 'bench'), (21.098466666666667, 'bench'), (20.602600000000002, 'bench'), (19.544200000000004, 'bench'), (22.735133333333334, 'bench'), (74.7755, 'bench'), (59.993, 'bench'), (19.2285, 'bench'), (45.2479, 'bench'), (51.10726666666667, 'bench'), (24.3932, 'bench'), (20.1048, 'bench'), (55.906, 'bench'), (72.47093333333333, 'bench'), (42.6779, 'bench'), (16.082733333333334, 'bench'), (16.3814, 'bench'), (19.085733333333334, 'bench'), (19.3264, 'bench'), (20.791, 'bench'), (20.0723, 'bench'), (22.578266666666668, 'bench'), (46.004400000000004, 'bench'), (88.41686666666666, 'bench')

In [73]:
instance1 = ['2024-11-20 18:09:51.000', -0.137, 1.066, 0.8215, -6.597, 0.808, 1.985, 'B', 'medium', 30]
k = 10

predicted_class = KNN(data, instance1, k)
print(f"Classe prédite : {predicted_class}")

 la liste des distances [(13.635, 'bench'), (13.0516, 'bench'), (19.8604, 'bench'), (30.2819, 'bench'), (23.263066666666667, 'bench'), (26.206400000000002, 'bench'), (71.99773333333333, 'bench'), (50.8341, 'bench'), (11.498333333333331, 'bench'), (57.9718, 'bench'), (24.64126666666667, 'bench'), (78.4154, 'bench'), (21.009933333333336, 'bench'), (24.2661, 'bench'), (65.3916, 'bench'), (33.3792, 'bench'), (9.4969, 'bench'), (21.098466666666667, 'bench'), (20.602600000000002, 'bench'), (19.544200000000004, 'bench'), (22.735133333333334, 'bench'), (74.7755, 'bench'), (59.993, 'bench'), (19.2285, 'bench'), (45.2479, 'bench'), (51.10726666666667, 'bench'), (24.3932, 'bench'), (20.1048, 'bench'), (55.906, 'bench'), (72.47093333333333, 'bench'), (42.6779, 'bench'), (16.082733333333334, 'bench'), (16.3814, 'bench'), (19.085733333333334, 'bench'), (19.3264, 'bench'), (20.791, 'bench'), (20.0723, 'bench'), (22.578266666666668, 'bench'), (46.004400000000004, 'bench'), (88.41686666666666, 'bench')