# 📊 01 - EDA + Predictive Modeling (Resolution Time & Churn Risk)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.metrics import mean_absolute_error, classification_report

## 📥 Load Merged Dataset

In [None]:
df = pd.read_csv("../data/processed/merged_logs.csv")
df.head()

## 📈 Regression: Predict Resolution Time

In [None]:
# Feature prep
X_reg = pd.get_dummies(df[['priority', 'issue_category', 'region']], drop_first=True)
y_reg = df['resolved_in_hours']

X_train, X_test, y_train, y_test = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)

reg_model = RandomForestRegressor(random_state=42)
reg_model.fit(X_train, y_train)

y_pred = reg_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"MAE: {mae:.2f} hours")

## 🔁 Classification: Predict Churn Risk

In [None]:
# Prepare classifier features
X_cls = df[['avg_session_length_min', 'feature_clicks', 'days_active_last_30']]
y_cls = df['churn_risk']

X_train, X_test, y_train, y_test = train_test_split(X_cls, y_cls, test_size=0.2, random_state=42)

clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))