In [4]:
# Load libraries
import pandas as pd
import numpy as np
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Load dataset
df = pd.read_csv('/content/appdata10.csv')

# Show columns
print("ðŸ“Œ Columns:", df.columns.tolist())

# Drop non-numeric/unnecessary columns
df = df.drop(columns=['user', 'first_open', 'screen_list', 'enrolled_date'], errors='ignore')

# Convert time column to hour (optional)
if 'hour' in df.columns:
    df['hour'] = df['hour'].astype(str).str.strip()
    df['hour'] = pd.to_datetime(df['hour'], format='%H:%M:%S', errors='coerce').dt.hour.fillna(0)

# Convert boolean-like columns to integers
for col in ['minigame', 'used_premium_feature', 'liked']:
    if col in df.columns:
        df[col] = df[col].astype(int)

# Set target variable
target_column = 'enrolled'
X = df.drop(columns=[target_column])
y = df[target_column]

# Double check all X values are numeric
assert X.select_dtypes(include=['object']).empty, "Non-numeric data remains in features!"

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
print("\nâœ… Model Performance:\n", classification_report(y_test, y_pred))

# Save model
joblib.dump(model, 'churn_model.pkl')
print("âœ… churn_model.pkl saved successfully!")


ðŸ“Œ Columns: ['user', 'first_open', 'dayofweek', 'hour', 'age', 'screen_list', 'numscreens', 'minigame', 'used_premium_feature', 'enrolled', 'enrolled_date', 'liked']

âœ… Model Performance:
               precision    recall  f1-score   support

           0       0.52      0.41      0.46      3863
           1       0.67      0.76      0.72      6137

    accuracy                           0.63     10000
   macro avg       0.60      0.59      0.59     10000
weighted avg       0.61      0.63      0.62     10000

âœ… churn_model.pkl saved successfully!
