In [5]:
import pandas as pd
import numpy as np
import os
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
courses = pd.read_csv("data/courses.csv")
transactions = pd.read_csv("data/transactions.csv")
course_summary = transactions.groupby("CourseID").agg(
    Enrollment_Count=("TransactionID", "count"),
    Total_Revenue=("Amount", "sum")
).reset_index()

df = courses.merge(course_summary, on="CourseID", how="left")
df.fillna(0, inplace=True)

df.head()

le_category = LabelEncoder()
le_level = LabelEncoder()
le_type = LabelEncoder()

df["CourseCategory"] = le_category.fit_transform(df["CourseCategory"])
df["CourseLevel"] = le_level.fit_transform(df["CourseLevel"])
df["CourseType"] = le_type.fit_transform(df["CourseType"])

features = [
    "CourseCategory",
    "CourseType",
    "CourseLevel",
    "CoursePrice",
    "CourseDuration",
    "CourseRating"
]
X = df[features]
y_enroll = df["Enrollment_Count"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y_enroll, test_size=0.2, random_state=42
)
enrollment_model = RandomForestRegressor(
    n_estimators=100,
    random_state=42
)

enrollment_model.fit(X_train, y_train)
y_pred = enrollment_model.predict(X_test)

print("Enrollment Model Results")
print("MAE:", mean_absolute_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R2:", r2_score(y_test, y_pred))
y_revenue = df["Total_Revenue"]
X_train_rev, X_test_rev, y_train_rev, y_test_rev = train_test_split(
    X, y_revenue, test_size=0.2, random_state=42
)
revenue_model = RandomForestRegressor(
    n_estimators=100,
    random_state=42
)

revenue_model.fit(X_train_rev, y_train_rev)
y_pred_rev = revenue_model.predict(X_test_rev)

print("Revenue Model Results")
print("MAE:", mean_absolute_error(y_test_rev, y_pred_rev))
print("RMSE:", np.sqrt(mean_squared_error(y_test_rev, y_pred_rev)))
print("R2:", r2_score(y_test_rev, y_pred_rev))
os.makedirs("models", exist_ok=True)
joblib.dump(enrollment_model, "models/enrollment_model.pkl")
joblib.dump(revenue_model, "models/revenue_model.pkl")

joblib.dump(le_category, "models/le_category.pkl")
joblib.dump(le_level, "models/le_level.pkl")
joblib.dump(le_type, "models/le_type.pkl")

print("✅ All models and encoders saved successfully!")

Enrollment Model Results
MAE: 0.7675000000000001
RMSE: 0.9761275531404694
R2: -0.9056499999999998
Revenue Model Results
MAE: 2716.4575
RMSE: 4038.2498790658065
R2: 0.1526390406809811
✅ All models and encoders saved successfully!
