In [1]:
import os
import torch
from sentence_transformers import SentenceTransformer, util
import os
import pickle
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
from transformers import pipeline

In [2]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
print("\n Loading Sentence-BERT model...")
model = SentenceTransformer("all-MiniLM-L6-v2")
print("Loaded Model...")

print("Loading Generated Questions...")
generated_qs_path = "data/intermediate/generated_questions.txt"
if os.path.exists(generated_qs_path):
    with open(generated_qs_path, "r", encoding="utf-8") as f:
        generated_questions = [line.strip() for line in f if line.strip()]
    generated_embeddings = model.encode(generated_questions, convert_to_tensor=True)
    print("Loaded Generated Questions.")
else:
    print("'generated_questions.txt' not found!")
    exit()


print("Loading Courses and Institutions Data")
with open("data/processed/data.pkl", "rb") as f:
    df = pickle.load(f)


df_unique = df.drop_duplicates(subset=["reviews"]).reset_index(drop=True)
print("Loaded courses and institutions data.")

print("Loading Intent Classifier Model..")
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
print("Loaded Intent Classifier Model")


 Loading Sentence-BERT model...
Loaded Model...
Loading Generated Questions...
Loaded Generated Questions.
Loading Courses and Institutions Data
Loaded courses and institutions data.
Loading Intent Classifier Model..


Device set to use mps:0


Loaded Intent Classifier Model


## Ask User to select a institution and Course

In [5]:
# --- Ask user to select an institution ---
institutions = sorted(df_unique["institution"].dropna().unique())
print("\n🏫 Available Institutions:")
for idx, inst in enumerate(institutions):
    print(f"{idx + 1}. {inst}")

inst_idx = int(input("\n👉 Select an institution by number: ")) - 1
selected_institution = institutions[inst_idx]
print(f"\n✅ You selected: {selected_institution}")

# --- Filter courses ---
inst_courses = sorted(df_unique[df_unique["institution"] == selected_institution]["name"].dropna().unique())
print("\n📚 Courses in this institution:")
for idx, course in enumerate(inst_courses):
    print(f"{idx + 1}. {course}")

course_idx = int(input("\n👉 Select a course by number: ")) - 1
selected_course = inst_courses[course_idx]
print(f"\n✅ You selected course: {selected_course}")


🏫 Available Institutions:
1. (ISC)²
2. Alberta Machine Intelligence Institute
3. Amazon Web Services
4. Arizona State University
5. Atlassian
6. Autodesk
7. BCG
8. Berklee College of Music
9. Birkbeck, University of London
10. California Institute of the Arts
11. Case Western Reserve University
12. Columbia University
13. Copenhagen Business School
14. DeepLearning.AI
15. Duke University
16. ESSEC Business School
17. Eindhoven University of Technology
18. Emory University
19. Erasmus University Rotterdam
20. Facebook
21. Georgia Institute of Technology
22. GitLab
23. Google
24. Google - Spectrum Sharing
25. Google Cloud
26. HEC Paris
27. Hebrew University of Jerusalem
28. IBM
29. IE School of Architecture & Design
30. Imperial College London
31. JetBrains
32. Johns Hopkins University
33. London Business School
34. Ludwig-Maximilians-Universität München (LMU)
35. Macquarie University
36. Nanyang Technological University, Singapore
37. New York University
38. Northwestern University
39.

## Get Query from user related to course

In [6]:
query = input("\nEnter your question about the course: ")

is_related = True

if generated_embeddings is not None:
    query_embedding = model.encode(query, convert_to_tensor=True)
    query_embedding = query_embedding.cpu()
    generated_embeddings = generated_embeddings.cpu()
    cosine_scores = util.pytorch_cos_sim(query_embedding, generated_embeddings)
    max_score = torch.max(cosine_scores).item()

    print(f"Max Cosine Similarity Score: {max_score:.4f}")

    if max_score >= 0.7:
        print("This question is related to the course reviews. Proceeding with the answer.")
    else:
        print("This question is not related to the course reviews.")
        is_related = False


Max Cosine Similarity Score: 0.8445
This question is related to the course reviews. Proceeding with the answer.


In [8]:
# Natural language intent labels
intent_labels_readable = [
    "Is the user asking a yes or no question?",
    "Is the user asking about the instructor?",
    "Is the user asking about the course content or topics?",
    "Is the user asking about how difficult the course is?",
    "Is the user asking about career outcomes or job relevance?",
    "Is the user comparing this course to another course?",
    "Is the user asking for general opinions from students?",
    "Is the user asking for a summary or overview of the course?"
]

# Map readable labels to internal intent codes
label_map = {
    "Is the user asking a yes or no question?": "yes_no",
    "Is the user asking about the instructor?": "instructor",
    "Is the user asking about the course content or topics?": "content",
    "Is the user asking about how difficult the course is?": "difficulty",
    "Is the user asking about career outcomes or job relevance?": "career",
    "Is the user comparing this course to another course?": "comparison",
    "Is the user asking for general opinions from students?": "general_opinion",
    "Is the user asking for a summary or overview of the course?": "course_overview"
}

# Intent-action mapping, including semantic analysis
intent_actions = {
    "yes_no": {
        "sentiment": True,
        "nlg": False,
        "summarization": False,
        "semantic": False
    },
    "instructor": {
        "sentiment": True,
        "nlg": True,
        "summarization": False,
        "semantic": True
    },
    "content": {
        "sentiment": False,
        "nlg": False,
        "summarization": True,
        "semantic": True
    },
    "difficulty": {
        "sentiment": True,
        "nlg": True,
        "summarization": False,
        "semantic": True
    },
    "career": {
        "sentiment": True,
        "nlg": True,
        "summarization": False,
        "semantic": True
    },
    "comparison": {
        "sentiment": True,
        "nlg": True,
        "summarization": False,
        "semantic": True
    },
    "general_opinion": {
        "sentiment": True,
        "nlg": True,
        "summarization": True,
        "semantic": True
    },
    "course_overview": {
        "sentiment": False,
        "nlg": False,
        "summarization": True,
        "semantic": True
    }
}

# Assuming query and is_related are defined elsewhere
if is_related:
    print("✅ Related to the course reviews. Proceeding...")
    print("Question: ", query)
    result = classifier(query, candidate_labels=intent_labels_readable, multi_label=False)
    predicted_label = result['labels'][0]
    predicted_intent = label_map[predicted_label]
    actions = intent_actions[predicted_intent]

    print(f"Intent: {predicted_intent}")

    if actions["sentiment"]:
        print("➡ Perform sentiment analysis")

    if actions["nlg"]:
        print("➡ Generate answer using NLG")

    if actions["summarization"]:
        print("➡ Summarize relevant content")

    if actions["semantic"]:
        print("🧠 Semantic analysis is necessary")
    else:
        print("🛑 Semantic analysis not required")

✅ Related to the course reviews. Proceeding...
Question:  How Instructor teaches?
Intent: instructor
➡ Perform sentiment analysis
➡ Generate answer using NLG
🧠 Semantic analysis is necessary
