Random Forest

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder

# Charger les données
data = pd.read_csv('finaldataa1.csv')

# Séparer les features et la target
# On exclut segment_id, start_time, end_time, num_activities, dominant_activity
features = data.drop(['segment_id', 'dominant_activity'], axis=1)
target = data['dominant_activity']

# Encoder la target (les labels) en valeurs numériques
label_encoder = LabelEncoder()
target_encoded = label_encoder.fit_transform(target)

In [2]:
# Séparer en ensembles d'entraînement et de test (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(features, target_encoded, 
                                                    test_size=0.2, 
                                                    random_state=42)

In [3]:
# Créer le modèle Random Forest
rf_model = RandomForestClassifier(n_estimators=100,  # nombre d'arbres
                                 random_state=42,
                                 class_weight='balanced',  # pour gérer les déséquilibres de classes
                                 n_jobs=-1)  # utiliser tous les cœurs CPU

# Entraîner le modèle
rf_model.fit(X_train, y_train)

ValueError: could not convert string to float: '2009-12-19 16:42:47.118495407'

In [13]:
# Prédictions sur l'ensemble de test
y_pred = rf_model.predict(X_test)

# Calcul de l'accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Rapport de classification détaillé
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

Accuracy: 0.47

Classification Report:
                         precision    recall  f1-score   support

          Bed_to_Toilet       0.66      0.54      0.60       275
                 Chores       0.61      0.12      0.20      4426
          Desk_Activity       0.74      0.54      0.62       276
     Dining_Rm_Activity       0.52      0.09      0.15       753
               Eve_Meds       0.14      0.03      0.05        33
         Guest_Bathroom       0.66      0.08      0.14       930
       Kitchen_Activity       0.69      0.77      0.73       602
             Leave_Home       0.74      0.22      0.34       903
Master_Bedroom_Activity       0.36      0.44      0.40       219
               Meditate       0.50      0.20      0.29        15
           Morning_Meds       0.06      0.01      0.02        90
                   Read       0.05      0.54      0.09        89
                  Sleep       0.48      0.96      0.64      4783
               Watch_TV       0.42      0.46      

SVM

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score

# Charger les données
data = pd.read_csv('finaldataa1.csv')

# Séparer les features et la target
X = data.drop(['segment_id', 'start_time', 'end_time', 'dominant_activity', 'num_activities'], axis=1)
y = data['dominant_activity']

# Diviser en train et test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [3]:
# Standardisation des features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [4]:
# Création du modèle SVM
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42)

# Entraînement du modèle
svm_model.fit(X_train_scaled, y_train)

In [None]:
from sklearn.model_selection import GridSearchCV

# Définition des paramètres à tester
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.1, 1],
    'kernel': ['rbf', 'linear', 'poly']
}

# Recherche par grille
grid_search = GridSearchCV(SVC(random_state=42), param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train_scaled, y_train)

# Meilleurs paramètres
print("Meilleurs paramètres:", grid_search.best_params_)

# Meilleur modèle
best_svm = grid_search.best_estimator_
y_pred_best = best_svm.predict(X_test_scaled)
print("\nMeilleur accuracy:", accuracy_score(y_test, y_pred_best))