In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
import joblib

# Chargement des données
df = pd.read_csv("donnees_migratoires_mbujimayi.csv")

# Encodage des colonnes catégorielles
cat_cols = ['sexe', 'pays_origine', 'raison_voyage', 'provenance', 'destination']
encoders = {}
for col in cat_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    encoders[col] = le

# Encodage de la cible
target_encoder = LabelEncoder()
df['niveau_risque'] = target_encoder.fit_transform(df['niveau_risque'])

# Séparation
X = df.drop(columns='niveau_risque')
y = df['niveau_risque']

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modèle
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Prédictions et métriques
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred, target_names=target_encoder.classes_))

# Sauvegarde du modèle et des encoders
joblib.dump(model, "model_risque.pkl")
joblib.dump(encoders, "encoders.pkl")
joblib.dump(target_encoder, "target_encoder.pkl")

Accuracy: 0.999
              precision    recall  f1-score   support

      Faible       1.00      1.00      1.00       729
       Moyen       1.00      1.00      1.00      1481
       Élevé       1.00      1.00      1.00       790

    accuracy                           1.00      3000
   macro avg       1.00      1.00      1.00      3000
weighted avg       1.00      1.00      1.00      3000



['target_encoder.pkl']