<a href="https://colab.research.google.com/github/ckz2011/CourseRecommendationSystem/blob/main/Course_Recommendation_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


# Assignment 2: Personalized Course Recommendation Engine

This Colab notebook implements a course recommendation system using vector embeddings and FAISS similarity search. It is based on the assignment description that requires recommending the top 5 most relevant courses based on a user's profile and completed course list.


In [9]:

!pip install sentence-transformers faiss-cpu pandas




In [10]:

from typing import List, Tuple
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss

# Load course data
def load_course_data(filepath: str) -> pd.DataFrame:
    df = pd.read_csv(filepath)
    df['content'] = df['title'] + " " + df['description']
    return df


In [11]:

# Convert course descriptions to embeddings
def compute_embeddings(courses: List[str], model) -> np.ndarray:
    return model.encode(courses, show_progress_bar=True)

# Build FAISS index
def build_faiss_index(embeddings: np.ndarray):
    dim = embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(embeddings)
    return index


In [12]:

# Recommendation Logic
def recommend_courses(profile: str, completed_ids: List[str], df: pd.DataFrame,
                      model, index, k: int = 5) -> List[Tuple[str, float]]:
    # Exclude completed courses
    df_filtered = df[~df['course_id'].isin(completed_ids)]

    # Embed the profile
    query_embedding = model.encode([profile])[0].astype("float32")

    # Get embeddings of remaining courses
    remaining_embeddings = compute_embeddings(df_filtered['content'].tolist(), model)

    # Use FAISS for similarity
    index_temp = faiss.IndexFlatL2(remaining_embeddings.shape[1])
    index_temp.add(remaining_embeddings)
    distances, indices = index_temp.search(np.array([query_embedding]), k)

    # Format output
    recommendations = []
    for i, idx in enumerate(indices[0]):
        course_id = df_filtered.iloc[idx]['course_id']
        similarity = 1 / (1 + distances[0][i])  # Convert L2 distance to similarity
        recommendations.append((course_id, round(similarity, 4)))

    return recommendations


In [13]:

# Sample CSV Data
csv_data = '''course_id,title,description
ML101,Introduction to Machine Learning,A foundational course on supervised and unsupervised learning.
DS201,Data Science with Python,Hands-on data analysis and visualization using Python.
K8S301,Kubernetes for Beginners,Learn to deploy and manage containerized applications.
BC401,Blockchain Fundamentals,Intro to distributed ledgers and smart contracts.
AZ102,Azure DevOps Basics,CI/CD pipelines and release strategies on Azure.
'''
with open("assignment2data.csv", "w") as f:
    f.write(csv_data)


In [14]:

# Load data and run recommender
df_courses = load_course_data("assignment2data.csv")
model = SentenceTransformer('all-MiniLM-L6-v2')
course_embeddings = compute_embeddings(df_courses['content'].tolist(), model)
index = build_faiss_index(course_embeddings)

# Input
user_profile = "I want to build and deploy microservices with Kubernetes"
completed_courses = ["AZ102"]

# Recommendations
recs = recommend_courses(user_profile, completed_courses, df_courses, model, index)
print("Top Recommended Courses:")
for course_id, score in recs:
    print(f"Course ID: {course_id} | Similarity Score: {score}")


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Top Recommended Courses:
Course ID: K8S301 | Similarity Score: 0.6237000226974487
Course ID: BC401 | Similarity Score: 0.35040000081062317
Course ID: ML101 | Similarity Score: 0.34279999136924744
Course ID: DS201 | Similarity Score: 0.326200008392334
Course ID: BC401 | Similarity Score: 0.0


In [15]:
# Example 1: Data Visualization
user_profile = """I've completed the 'Python Programming for Data Science' course and enjoy data visualization. What should I take next?"""
completed_courses = ["DS201"]

recs = recommend_courses(user_profile, completed_courses, df_courses, model, index)
print("Top Recommended Courses (Example 1):")
for course_id, score in recs:
    print(f"Course ID: {course_id} | Similarity Score: {score}")

# Example 2: Azure + DevOps
user_profile = """I know Azure basics and want to manage containers and build CI/CD pipelines. Recommend courses."""
completed_courses = ["AZ102"]

recs = recommend_courses(user_profile, completed_courses, df_courses, model, index)
print("\nTop Recommended Courses (Example 2):")
for course_id, score in recs:
    print(f"Course ID: {course_id} | Similarity Score: {score}")

# Example 3: ML Fundamentals
user_profile = """My background is in ML fundamentals; I'd like to specialize in neural networks and production workflows."""
completed_courses = ["ML101"]

recs = recommend_courses(user_profile, completed_courses, df_courses, model, index)
print("\nTop Recommended Courses (Example 3):")
for course_id, score in recs:
    print(f"Course ID: {course_id} | Similarity Score: {score}")

# Example 4: Kubernetes + Microservices
user_profile = """I want to learn to build and deploy microservices with Kubernetes—what courses fit best?"""
completed_courses = ["AZ102"]

recs = recommend_courses(user_profile, completed_courses, df_courses, model, index)
print("\nTop Recommended Courses (Example 4):")
for course_id, score in recs:
    print(f"Course ID: {course_id} | Similarity Score: {score}")

# Example 5: Blockchain Beginner
user_profile = """I'm interested in blockchain and smart contracts but have no prior experience. Which courses do you suggest?"""
completed_courses = []

recs = recommend_courses(user_profile, completed_courses, df_courses, model, index)
print("\nTop Recommended Courses (Example 5):")
for course_id, score in recs:
    print(f"Course ID: {course_id} | Similarity Score: {score}")


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Top Recommended Courses (Example 1):
Course ID: ML101 | Similarity Score: 0.41690000891685486
Course ID: AZ102 | Similarity Score: 0.3677999973297119
Course ID: K8S301 | Similarity Score: 0.36169999837875366
Course ID: BC401 | Similarity Score: 0.3537999987602234
Course ID: AZ102 | Similarity Score: 0.0


Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Top Recommended Courses (Example 2):
Course ID: K8S301 | Similarity Score: 0.47429999709129333
Course ID: ML101 | Similarity Score: 0.3797999918460846
Course ID: DS201 | Similarity Score: 0.376800000667572
Course ID: BC401 | Similarity Score: 0.37549999356269836
Course ID: BC401 | Similarity Score: 0.0


Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Top Recommended Courses (Example 3):
Course ID: K8S301 | Similarity Score: 0.38760000467300415
Course ID: DS201 | Similarity Score: 0.3677999973297119
Course ID: AZ102 | Similarity Score: 0.357699990272522
Course ID: BC401 | Similarity Score: 0.3564000129699707
Course ID: AZ102 | Similarity Score: 0.0


Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Top Recommended Courses (Example 4):
Course ID: K8S301 | Similarity Score: 0.613099992275238
Course ID: ML101 | Similarity Score: 0.40310001373291016
Course ID: BC401 | Similarity Score: 0.3634999990463257
Course ID: DS201 | Similarity Score: 0.35910001397132874
Course ID: BC401 | Similarity Score: 0.0


Batches:   0%|          | 0/1 [00:00<?, ?it/s]


Top Recommended Courses (Example 5):
Course ID: BC401 | Similarity Score: 0.512499988079071
Course ID: ML101 | Similarity Score: 0.38760000467300415
Course ID: AZ102 | Similarity Score: 0.3709000051021576
Course ID: DS201 | Similarity Score: 0.3628999888896942
Course ID: K8S301 | Similarity Score: 0.35760000348091125
