In [4]:
import openai
import faiss
import numpy as np
import pandas as pd
from typing import List, Tuple

In [5]:
EMBEDDING_MODEL = "text-embedding-ada-002"

In [17]:
class CourseRecommender:
    def __init__(self, data_path: str):
        self.df = pd.read_csv(data_path)
        self.df.fillna("", inplace=True)
        self.df["full_text"] = self.df["title"] + " " + self.df["description"]
        self.embeddings = self._compute_embeddings(self.df["full_text"].tolist())
        self.index = self._build_index(self.embeddings)

    def _compute_embeddings(self, texts: List[str]) -> np.ndarray:
        embeddings = []
        for i in range(0, len(texts), 10):  # batching
            batch = texts[i:i+10]
            response = client.embeddings.create(model=EMBEDDING_MODEL, input=batch)
            batch_embeddings = [e.embedding for e in response.data]
            embeddings.extend(batch_embeddings)
        return np.array(embeddings).astype("float32")

    def _build_index(self, embeddings: np.ndarray):
        dim = embeddings.shape[1]
        index = faiss.IndexFlatL2(dim)
        index.add(embeddings)
        return index

    def recommend_courses(self, profile: str, completed_ids: List[str]) -> List[Tuple[str, float]]:
        # Filter out completed courses
        mask = ~self.df["id"].isin(completed_ids)
        remaining_df = self.df[mask].reset_index(drop=True)
        remaining_embeddings = self.embeddings[mask.values]

        # Embed user profile
        query_embedding = client.embeddings.create(model=EMBEDDING_MODEL, input=[profile]).data[0].embedding
        query_vec = np.array(query_embedding).astype("float32").reshape(1, -1)

        # Build temp index for filtered data
        dim = remaining_embeddings.shape[1]
        temp_index = faiss.IndexFlatL2(dim)
        temp_index.add(remaining_embeddings)

        # Search top 10 to filter best 5 not already completed
        distances, indices = temp_index.search(query_vec, 10)
        top_ids = []
        for idx, dist in zip(indices[0], distances[0]):
            course_id = remaining_df.iloc[idx]["id"]
            similarity = 1 / (1 + dist)  # Convert L2 distance to pseudo-similarity
            top_ids.append((course_id, round(similarity, 4)))
            if len(top_ids) >= 5:
                break
        return top_ids


In [18]:
import os
import requests

In [19]:
def download_dataset(url: str, dest: str):
    if not os.path.exists(dest):
        print(f"Downloading dataset from {url}...")
        r = requests.get(url)
        with open(dest, "wb") as f:
            f.write(r.content)

In [20]:
DATASET_URL = "https://raw.githubusercontent.com/Bluedata-Consulting/GAAPB01-training-code-base/refs/heads/main/Assignments/assignment2dataset.csv"
DATASET_PATH = "assignment2dataset.csv"

In [11]:
import os
os.environ['AZURE_OPENAI_API_KEY'] = '04f9a983b5d747baac8c74a75c0d525a'
os.environ['AZURE_OPENAI_ENDPOINT'] = 'https://swedencentral.api.cognitive.microsoft.com/'

In [12]:
from openai import OpenAI
#OPENAI_API_KEY="04f9a983b5d747baac8c74a75c0d525a"
#OPENAI_ENDPOINT="https://swedencentral.api.cognitive.microsoft.com/"
client = OpenAI()

OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [None]:

# Download dataset if needed
download_dataset(DATASET_URL, DATASET_PATH)

In [None]:
# Initialize recommender
recommender = CourseRecommender(DATASET_PATH)

NameError: name 'client' is not defined

In [None]:



# Sample test profiles
test_profiles = [
    ("I’ve completed the ‘Python Programming for Data Science’ course and enjoy data visualization.", ["C001"]),
    ("I know Azure basics and want to manage containers and build CI/CD pipelines.", ["C015"]),
    ("My background is in ML fundamentals; I’d like to specialize in neural networks and production workflows.", ["C007"]),
    ("I want to learn to build and deploy microservices with Kubernetes—what courses fit best?", []),
    ("I’m interested in blockchain and smart contracts but have no prior experience.", [])
]

for i, (query, completed) in enumerate(test_profiles):
    print(f"\n=== Test Profile {i+1} ===")
    results = recommender.recommend_courses(profile=query, completed_ids=completed)
    for course_id, score in results:
        print(f"{course_id}: Score={score:.4f}")
