<a href="https://colab.research.google.com/github/divya-hile/week1/blob/main/student_segmentation_recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Student Segmentation and Course Recommendation System

In [None]:
!pip install streamlit scikit-learn plotly


In [None]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

import plotly.express as px


Users sheet

In [None]:
users = pd.DataFrame({
    "UserID": range(1, 101),
    "Age": np.random.randint(18, 45, 100),
    "Gender": np.random.choice(["Male", "Female"], 100)
})


Courses sheet

In [None]:
courses = pd.DataFrame({
    "CourseID": range(1, 21),
    "CourseCategory": np.random.choice(
        ["Data Science", "AI", "Web Dev", "Cloud", "Business"], 20),
    "CourseType": np.random.choice(["Beginner", "Intermediate", "Advanced"], 20),
    "CourseLevel": np.random.choice(["Beginner", "Intermediate", "Advanced"], 20),
    "CourseRating": np.round(np.random.uniform(3.5, 5.0, 20), 1)
})


Transcations sheet

In [None]:
transactions = pd.DataFrame({
    "UserID": np.random.choice(users["UserID"], 300),
    "CourseID": np.random.choice(courses["CourseID"], 300),
    "TransactionDate": pd.date_range("2023-01-01", periods=300),
    "Amount": np.random.randint(500, 5000, 300)
})


In [None]:
data = transactions.merge(courses, on="CourseID")


In [None]:
learner_profile = data.groupby("UserID").agg(
    total_courses=("CourseID", "count"),
    avg_spend=("Amount", "mean"),
    avg_rating=("CourseRating", "mean"),
    diversity=("CourseCategory", "nunique")
).reset_index()


In [None]:
learner_profile = learner_profile.merge(users, on="UserID")


Features Engineering

In [None]:
features = learner_profile[[
    "total_courses",
    "avg_spend",
    "avg_rating",
    "diversity",
    "Age"
]]


Data Preprocessing

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features)


Optimal Clusters

In [None]:
inertia = []
K = range(2, 8)

for k in K:
    km = KMeans(n_clusters=k, random_state=42)
    km.fit(X_scaled)
    inertia.append(km.inertia_)

px.line(x=K, y=inertia, title="Elbow Method")


K-Means Clustering

In [None]:
kmeans = KMeans(n_clusters=4, random_state=42)
learner_profile["Cluster"] = kmeans.fit_predict(X_scaled)


In [None]:
silhouette_score(X_scaled, learner_profile["Cluster"])


Cluster Visualization

In [None]:
fig = px.scatter(
    learner_profile,
    x="avg_spend",
    y="total_courses",
    color="Cluster",
    size="diversity",
    title="Learner Segmentation"
)
fig.show()


Cluster Interpretation

In [None]:
learner_profile.groupby("Cluster")[[ "total_courses", "avg_spend", "avg_rating", "diversity", "Age"]].mean()

In [None]:
def recommend_courses(user_id, top_n=5):
    cluster = learner_profile.loc[
        learner_profile.UserID == user_id, "Cluster"
    ].values[0]

    similar_users = learner_profile[
        learner_profile.Cluster == cluster
    ]["UserID"]

    popular_courses = (
        data[data.UserID.isin(similar_users)]
        .groupby("CourseID")
        .CourseID.count()
        .sort_values(ascending=False)
        .head(top_n)
        .index
    )

    return courses[courses.CourseID.isin(popular_courses)]


In [None]:
recommend_courses(10)


In [None]:
print("Silhouette Score:", silhouette_score(X_scaled, learner_profile["Cluster"]))


In [None]:
import streamlit as st
import pandas as pd

st.title("EduPro â€“ Personalized Course Recommendation")

user_id = st.selectbox("Select User ID", learner_profile.UserID)

if st.button("Recommend Courses"):
    recs = recommend_courses(user_id)
    st.dataframe(recs)


In [None]:
!streamlit run app.py