<a href="https://colab.research.google.com/github/ibtoubtech/ibtoubtech/blob/main/Devoir1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Chargement des données

In [None]:
train_df = pd.read_csv('/content/dataset/train.csv')
test_df = pd.read_csv('/content/dataset/test.csv')

# Exploration initiale

In [None]:
def explore_data(df):
    print("\nAperçu des données:")
    print(df.head())
    print("\nInformations générales:")
    print(df.info())
    print("\nStatistiques descriptives:")
    print(df.describe())

explore_data(train_df)


Aperçu des données:
   tBodyAcc-mean()-X  tBodyAcc-mean()-Y  tBodyAcc-mean()-Z  tBodyAcc-std()-X  \
0           0.288585          -0.020294          -0.132905         -0.995279   
1           0.278419          -0.016411          -0.123520         -0.998245   
2           0.279653          -0.019467          -0.113462         -0.995380   
3           0.279174          -0.026201          -0.123283         -0.996091   
4           0.276629          -0.016570          -0.115362         -0.998139   

   tBodyAcc-std()-Y  tBodyAcc-std()-Z  tBodyAcc-mad()-X  tBodyAcc-mad()-Y  \
0         -0.983111         -0.913526         -0.995112         -0.983185   
1         -0.975300         -0.960322         -0.998807         -0.974914   
2         -0.967187         -0.978944         -0.996520         -0.963668   
3         -0.983403         -0.990675         -0.997099         -0.982750   
4         -0.980817         -0.990482         -0.998321         -0.979672   

   tBodyAcc-mad()-Z  tBodyAcc-max()

# Suppression de la colonne 'subject' si non nécessaire

In [None]:
if 'subject' in train_df.columns:
    train_df.drop(columns=['subject'], inplace=True)
    test_df.drop(columns=['subject'], inplace=True)

# Encodage de la variable cible

In [None]:
label_encoder = LabelEncoder()
train_df['Activity'] = label_encoder.fit_transform(train_df['Activity'])
test_df['Activity'] = label_encoder.transform(test_df['Activity'])

# Séparation des features et des labels

In [None]:
X_train = train_df.drop(columns=['Activity'])
y_train = train_df['Activity']
X_test = test_df.drop(columns=['Activity'])
y_test = test_df['Activity']

# Normalisation des données

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialisation des modèles

In [None]:
models = {
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
}

# Entraînement et validation croisée

In [None]:
for name, model in models.items():
    scores = cross_val_score(model, X_train, y_train, cv=3, scoring='accuracy')
    print(f"\n{name} - Précision moyenne en validation croisée: {scores.mean():.4f}")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"\nRapport de classification pour {name}:")
    print(classification_report(y_test, y_pred))
    print("\nMatrice de confusion:")
    print(confusion_matrix(y_test, y_pred))




Decision Tree - Précision moyenne en validation croisée: 0.8466

Rapport de classification pour Decision Tree:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       537
           1       0.82      0.80      0.81       491
           2       0.82      0.83      0.83       532
           3       0.79      0.89      0.84       496
           4       0.85      0.82      0.83       420
           5       0.79      0.71      0.75       471

    accuracy                           0.85      2947
   macro avg       0.85      0.84      0.84      2947
weighted avg       0.85      0.85      0.85      2947


Matrice de confusion:
[[537   0   0   0   0   0]
 [  0 394  97   0   0   0]
 [  0  89 443   0   0   0]
 [  0   0   0 443  27  26]
 [  0   0   0  15 344  61]
 [  0   0   0 102  34 335]]





Random Forest - Précision moyenne en validation croisée: 0.8939

Rapport de classification pour Random Forest:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       537
           1       0.87      0.87      0.87       491
           2       0.88      0.88      0.88       532
           3       0.90      0.97      0.93       496
           4       0.95      0.84      0.89       420
           5       0.89      0.92      0.91       471

    accuracy                           0.92      2947
   macro avg       0.92      0.91      0.91      2947
weighted avg       0.92      0.92      0.92      2947


Matrice de confusion:
[[537   0   0   0   0   0]
 [  0 425  66   0   0   0]
 [  0  61 470   1   0   0]
 [  0   0   0 480  10   6]
 [  0   0   0  22 353  45]
 [  0   0   0  30   7 434]]


AttributeError: 'super' object has no attribute '__sklearn_tags__'