In [9]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
import joblib

# 1. تحميل البيانات
df = pd.read_csv("child_behavior_dataset_balanced_noise.csv")

# 2. ترميز top_spending_category (categorical feature)
top_category_encoder = LabelEncoder()
df["top_spending_category"] = top_category_encoder.fit_transform(df["top_spending_category"])

# 3. ترميز label (القيم المستهدفة)
label_encoder = LabelEncoder()
df["label_encoded"] = label_encoder.fit_transform(df["label"])

# 4. تقسيم البيانات: المدخلات X والمخرجات y
X = df.drop(["label", "label_encoded"], axis=1)
y = df["label_encoded"]

# 5. تقسيم التدريب والاختبار
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 6. تدريب الموديل
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 7. التقييم
y_pred = model.predict(X_test)
print("🎯 Accuracy:", accuracy_score(y_test, y_pred))
print("\n📊 Classification Report:\n", classification_report(y_test, y_pred, target_names=label_encoder.classes_))

# 8. حفظ الموديل والمشفرات
joblib.dump(model, "child_behavior_model.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")
joblib.dump(top_category_encoder, "top_category_encoder.pkl")

print("✅ تم حفظ الموديل والمشفرات بنجاح!")


🎯 Accuracy: 0.88

📊 Classification Report:
               precision    recall  f1-score   support

    balanced       0.91      0.88      0.89       152
    investor       0.86      0.86      0.86       156
       saver       0.83      0.89      0.86       130
     spender       0.91      0.90      0.90       162

    accuracy                           0.88       600
   macro avg       0.88      0.88      0.88       600
weighted avg       0.88      0.88      0.88       600

✅ تم حفظ الموديل والمشفرات بنجاح!
