In [1]:
# Set Environment 
#!pip install openai transformers sentence-transformers nltk pandas scikit-learn flask
#!pip install sentence-transformers
#!pip install google-generativeai

In [2]:
# Import libraries
import os
import torch
import pandas as pd
import google.generativeai as genai # for google gemini api
from sentence_transformers import SentenceTransformer, util # for embeddings
from transformers import pipeline # for huggingface
import gradio as gr

KeyboardInterrupt: 

In [None]:
# Load CSV with Multi-Row Header (row 0 = module type, row 1 = year label)
df = pd.read_csv("Course Dataset_final.csv", header=[0, 1])

# Clean Column Names (Multi-Row)
cleaned_columns = []
current_module_section = None

for parent, child in df.columns:
    parent = str(parent).strip()
    child = str(child).strip()

    if parent in ['Core Modules', 'Optional Modules']:
        current_module_section = parent
        cleaned_columns.append(f"{current_module_section} {child}")
    elif 'Unnamed' in parent and current_module_section:
        cleaned_columns.append(f"{current_module_section} {child}")
    else:
        cleaned_columns.append(parent)
        current_module_section = None  

df.columns = cleaned_columns

# Check column names
print(df.columns)

In [None]:
# Convert Course Data to a Searchable Format (Embedding-Based Similarity)
# Load the model for embeddings
model = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1")

# Define function to format course descriptions for embeddings
def format_course_description(row):
    description = []

    def try_append(col_name):
        if col_name in row and pd.notna(row[col_name]):
            description.append(str(row[col_name]))

    # Basic course info
    for col in [
        'Programme', 'Programme Type', 'Faculty', 'School/Department',
        'Entry Requirement', 'English Language Requirement',
        'Specific Requirements', 'Duration', 'Malaysian Fee',
        'International Fee', 'Intake', 'Qualification (Postgraduate)',
        'PG Modules', 'Why this course?'
    ]:
        try_append(col)

    # Core and Optional Modules by year
    for year in ['First Year', 'Second Year', 'Third Year', 'Fourth Year']:
        try_append(f'Core Modules {year}')
        try_append(f'Optional Modules {year}')

    return " | ".join(description)

# Apply to each row to generate course descriptions
df["course_description"] = df.apply(format_course_description, axis=1)

# Encode course descriptions into embeddings
df["embedding"] = df["course_description"].apply(lambda x: model.encode(x).tolist())

print("Embeddings generated successfully!")

In [None]:
# Integrate with Large Language Model (LLM) using Google Gemini's API
os.environ["GEMINI_API_KEY"] = "YOUR_API_KEY" # please use your google api key here
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

In [None]:
# Sentiment/Emotion Classifier
emotion_classifier = pipeline(
    "text-classification", 
    model="bhadresh-savani/distilbert-base-uncased-emotion", 
    top_k=None
)

def detect_emotion(text, top_k=3):
    results = emotion_classifier(text)
    sorted_results = sorted(results[0], key=lambda x: x["score"], reverse=True)
    return sorted_results[:top_k]

In [None]:
# Keyword Extraction 
def extract_keywords(text):
    course_names = df["Programme"].dropna().unique()
    matched_courses = [name for name in course_names if name.lower() in text.lower()]
    for course in matched_courses:
        keyword_memory[course.lower()] = None
    return matched_courses 
    
# Match keywords from csv into foundation, undergraduate and postgraduate
undergraduate_keywords = ["bsc", "ba", "bed", "bpharm", "meng"]
postgraduate_keywords = ["postgraduate", "msc", "mres", "phd", "mpharm"]
foundation_keywords = ["foundation"]

In [None]:
# Converts one course into a readable string that lists all the course details
def format_course_info(course):
    def get_clean_value(field):
        value = str(course.get(field, "")).strip()
        return value if value.lower() not in ["nan", "n/a", "none", ""] else None

    fields = [
        ("Programme", "Programme"),
        ("Programme Type", "Programme Type"),
        ("Faculty", "Faculty"),
        ("School/Department", "School/Department"),
        ("Entry Requirement", "Entry Requirement"),
        ("English Language Requirement", "English Language Requirement"),
        ("Specific Requirements", "Specific Requirements"),
        ("Duration", "Duration"),
        ("Malaysian Fee", "Malaysian Fee"),
        ("International Fee", "International Fee"),
        ("Intake", "Intake"),
        ("Qualification (Postgraduate)", "Qualification (Postgraduate)"),
        ("PG Modules", "PG Modules"),
        ("Core Modules", "Core Modules"),
        ("Optional Modules", "Optional Modules"),
        ("Why this course?", "Why this course?")
    ]

    formatted_info = ""
    for label, field in fields:
        value = get_clean_value(field)
        if value:
            formatted_info += f"{label}: {value}\n"

    return formatted_info

In [None]:
# Finds and returns the top 3 relevant courses based on course interest, degree level and similarity 
def search_courses(user_query, top_n=3, level_filter=None):
    # Convert level_filter to lowercase if present
    level_filter = level_filter.lower() if level_filter else None

    # Filter initial dataframe
    filtered_df = df.dropna(subset=["Programme Type"])

    # Filter by degree level
    if level_filter:
        level_map = {
            "undergraduate": ["bsc", "ba", "bed", "bpharm", "meng", "beng", "ug", "undergraduate"],
            "postgraduate": ["msc", "ma", "mres", "phd", "mpharm", "pg", "postgraduate"],
            "foundation": ["foundation"]
        }
        filtered_df = filtered_df[
            filtered_df["Programme Type"].str.lower().str.contains('|'.join(level_map[level_filter])) 
        ]

    if filtered_df.empty:
        print("⚠️ No courses match the degree level filter.")
        return []

    # Skip semantic ranking if searching for Foundation
    if level_filter == "foundation":
        top_courses = filtered_df.head(1)
    else:
        # Proceed with semantic ranking for undergraduate and postgraduate
        if "embedding" not in filtered_df.columns or filtered_df["embedding"].isnull().all():
            print("⚠️ 'embedding' column not found or empty. Returning top matches without semantic ranking.")
            top_courses = filtered_df.head(top_n)
        else:
            query_text = session_memory.get("course_interest", user_query)
            query_embedding = model.encode(query_text, convert_to_tensor=True)
            query_embedding = torch.nn.functional.normalize(query_embedding, p=2, dim=0)
                
            similarities = []
            for _, row in filtered_df.iterrows():
                course_embedding = torch.tensor(row["embedding"])
                sim_score = util.pytorch_cos_sim(query_embedding, course_embedding)[0][0].item()

                programme_name = row["Programme"].lower() if pd.notnull(row["Programme"]) else ""
                school_name = row.get("School/Department", "").lower()
                boost = 0.0

                course_interest = session_memory.get("course_interest", "")
                if isinstance(course_interest, str):
                    for keyword in course_interest.lower().split(","):
                        keyword = keyword.strip()
                        if keyword in programme_name or keyword in school_name:
                            boost += 0.15 # add boost for keywords that has the same word 

                similarities.append(sim_score + boost)

            if not similarities:
                print("⚠️ No course similarities found.")
                return []

            filtered_df = filtered_df.copy()
            filtered_df["similarity"] = similarities
            filtered_df = filtered_df.sort_values(by="similarity", ascending=False)
            top_courses = filtered_df.head(top_n)

    # Format results
    results = []
    for _, course in top_courses.iterrows():
        course_info = {
            "Programme Type": course.get("Programme Type", "N/A"),
            "Programme": course.get("Programme", "N/A"),
            "Faculty": course.get("Faculty", "N/A"),
            "School/Department": course.get("School/Department", "N/A"),
            "Entry Requirement": course.get("Entry Requirement", "N/A"),
            "English Language Requirement": course.get("English Language Requirement", "N/A"),
            "Specific Requirements": course.get("Specific Requirements", "N/A"),
            "Duration": course.get("Duration", "N/A"),
            "Malaysian Fee": course.get("Malaysian Fee", "N/A"),
            "International Fee": course.get("International Fee", "N/A"),
            "Intake": course.get("Intake", "N/A"),
            "Qualification (Postgraduate)": course.get("Qualification (Postgraduate)", "N/A"),
            "PG Modules": course.get("PG Modules", "N/A"),
            "Why this course?": course.get("Why this course?", "N/A")
        }

        # Extract all core and optional modules
        core_modules = [course.get(col, "") for col in df.columns if "Core Modules" in col]
        optional_modules = [course.get(col, "") for col in df.columns if "Optional Modules" in col]

        if core_modules:
            course_info["Core Modules"] = ", ".join([str(m) for m in core_modules if pd.notnull(m)])
        if optional_modules:
            course_info["Optional Modules"] = ", ".join([str(m) for m in optional_modules if pd.notnull(m)])

        results.append(course_info)

    return results

In [None]:
# Takes the user's input, updates the conversation context, and produces a response generated by Gemini
def chat_with_gemini(user_input):
    retrieved_data = []
    emotions = detect_emotion(user_input, top_k=3)
    user_input_lower = user_input.lower().strip()
    session_memory["history"].append({"user": user_input})

    # Handle 'exit' early
    if user_input_lower == "exit":
        return "Thank you for using CourseMate! If you need more help, feel free to reach out again. Have a great day! 🌟"

    # Handle 'continue' after being provided with course info
    if user_input_lower == "continue":
        session_memory["degree_level"] = None
        session_memory["course_interest"] = None
        session_memory["retrieved_data"] = None
        session_memory["selected_programme"] = None
        return "Sure! Are you looking for foundation, undergraduate or postgraduate programmes?"

    # Handle 'option' to redisplay previous list
    if user_input_lower == "option" and session_memory.get("retrieved_data"):
        session_memory["selected_programme"] = None
        return (
            "Here are the previous course options:\n\n" +
            "\n\n".join([
                (
                    f"{i+1}. {course['Programme']} ({course.get('Programme Type', 'N/A')})"
                    if session_memory.get("degree_level") == "undergraduate" else
                    f"{i+1}. {course['Programme']} ({course.get('Qualification (Postgraduate)', 'N/A')})"
                    if session_memory.get("degree_level") == "postgraduate" else
                    f"{i+1}. {course['Programme']}"
                )
                for i, course in enumerate(session_memory["retrieved_data"])
            ]) +
            "\n\nPlease type ‘1’, ‘2’, or ‘3’ to get the course details."
        )
    # Degree level detection using keyword sets
    undergraduate_keywords = ["undergraduate", "ug", "undergrad", "bachelor", "bsc", "ba", "meng", "beng"]
    postgraduate_keywords = ["postgraduate", "pg", "master", "msc", "mres", "mpharm", "phd", "postgrad"]
    foundation_keywords = ["foundation"]
    
    # Auto-detect progression phrasing like "after foundation"
    if "after foundation" in user_input_lower or "following foundation" in user_input_lower:
        session_memory["degree_level"] = "undergraduate"
    elif "after undergraduate" in user_input_lower or "after bachelor's" in user_input_lower:
        session_memory["degree_level"] = "postgraduate"
    elif any(keyword in user_input_lower for keyword in foundation_keywords):
        session_memory["degree_level"] = "foundation"
        
        # Foundation is special — show one result directly
        retrieved_data = search_courses(user_input, top_n=1, level_filter="foundation")
        session_memory["retrieved_data"] = retrieved_data
        session_memory["selected_programme"] = None
    
        if not retrieved_data:
            return "Sorry, I couldn't find any foundation programmes matching your description. Could you try another search?"
    
        return format_course_info(retrieved_data[0]) + "\n\nWould you like to continue exploring other courses or end the session here? Type 'continue' to explore, 'exit' to end the session."
    
    elif any(keyword in user_input_lower for keyword in undergraduate_keywords):
        session_memory["degree_level"] = "undergraduate"
    elif any(keyword in user_input_lower for keyword in postgraduate_keywords):
        session_memory["degree_level"] = "postgraduate"
        
    # Detect course interest if not already stored
    if not session_memory.get("course_interest"):
        programme_names = df["Programme"].dropna().str.lower().tolist()
        matched_phrases = []
        for prog_name in programme_names:
            if prog_name in user_input_lower or user_input_lower in prog_name:
                matched_phrases.append(prog_name)
            else:
                words = user_input_lower.split()
                for n in range(1, min(5, len(words) + 1)):
                    for i in range(len(words) - n + 1):
                        phrase = " ".join(words[i:i + n])
                        if phrase in prog_name:
                            matched_phrases.append(phrase)

        if matched_phrases:
            session_memory["course_interest"] = ", ".join(set(matched_phrases))
        else:
            return "Could you please share what specific course areas you're interested in? This helps me narrow down the options."

        # If course interest found but degree level not set
        if not session_memory.get("degree_level"):
            return "Are you looking for foundation, undergraduate, or postgraduate programmes?"

    # Once both degree level and course interest are known
    if session_memory.get("course_interest") and session_memory.get("degree_level"):
        # Check if selection is pending
        if session_memory.get("retrieved_data") and not session_memory.get("selected_programme"):
            choice_map = {
                "1": 0, "one": 0,
                "2": 1, "two": 1,
                "3": 2, "three": 2
            }
            # Validate choice
            if user_input_lower not in choice_map:
                return "Please choose one by typing ‘1’, ‘2’, or ‘3’ to see full course details."

            # Process valid selection
            for phrase, index in choice_map.items():
                if phrase == user_input_lower:
                    selected = session_memory["retrieved_data"][index]
                    session_memory["selected_programme"] = selected
                    break
            if not session_memory.get("selected_programme"):
                return "Please choose one by typing ‘1’, ‘2’, or ‘3’ to see full course details."

        # If selected, show just that course
        if session_memory.get("selected_programme"):
            retrieved_data = [session_memory["selected_programme"]]
        else:
            # Fetch new top 3 courses
            top_n = 1 if session_memory["degree_level"] == "foundation" else 3
            retrieved_data = search_courses(
                session_memory["course_interest"],
                top_n=top_n,
                level_filter=session_memory["degree_level"]
            )
            session_memory["retrieved_data"] = retrieved_data
            session_memory["selected_programme"] = None

            if not retrieved_data:
                return "Sorry, I couldn't find any courses matching that description. Could you try another search?"

            if len(retrieved_data) == 1:
                return format_course_info(retrieved_data[0]) + "\n\nWould you like to continue exploring other courses or end the session here? Type 'continue' to explore, 'exit' to end the session."
            else:
                return (
                    "Here are the top course matches:\n\n" +
                    "\n\n".join([
                        (
                            f"{i+1}. {course['Programme']} ({course['Programme Type']})"
                            if session_memory.get("degree_level") == "undergraduate" and 'Programme Type' in course else
                            f"{i+1}. {course['Programme']} ({course['Qualification (Postgraduate)']})"
                            if session_memory.get("degree_level") == "postgraduate" and 'Qualification (Postgraduate)' in course else
                            f"{i+1}. {course['Programme']}"
                        )
                        for i, course in enumerate(retrieved_data)
                    ]) +
                    "\n\nPlease type ‘1’, ‘2’, or ‘3’ to get more details about the one you’re most interested in."
                )

    # Emotion-tone adaptation
    top_emotion = emotions[0]["label"]
    if top_emotion in ["joy", "love", "gratitude", "admiration", "amusement", "excitement", "pride", "relief", "approval", "desire", "optimism"]:
        tone_prefix = "Use a cheerful and supportive tone."
    elif top_emotion in ["anger", "annoyance", "disapproval", "disgust"]:
        tone_prefix = "Use a calm, understanding, and patient tone to reassure the user."
    elif top_emotion in ["fear", "nervousness", "worry", "grief", "remorse", "embarrassment"]:
        tone_prefix = "Use a reassuring and friendly tone to make the user feel comfortable."
    elif top_emotion in ["sadness", "disappointment"]:
        tone_prefix = "Use an empathetic and encouraging tone to uplift the user."
    elif top_emotion in ["confusion", "surprise", "curiosity", "realization"]:
        tone_prefix = "Use a helpful and informative tone, explaining clearly."
    else:
        tone_prefix = "Use a neutral and professional tone."

    # Format course info for response
    context = "\n\n".join([format_course_info(c) for c in retrieved_data])
    
    # Gemini prompt
    prompt = f"""
    You are an intelligent and helpful course advisor chatbot. 
    Your role is to assist prospective students in exploring university degree programmes based on their course interests 
    and desired study level (foundation, undergraduate, or postgraduate). 

    Avoid starting your reply with greetings like "Hi", "Hello", or "Hey". Go straight into the relevant course information.

    The logic of this interaction works as follows:

    1. The user first expresses their course interest. From their input, extract relevant course-related keywords or phrases and match them against known programme names in the database.
    2. The user may also specify their intended degree level (foundation, undergraduate, or postgraduate). If not explicitly mentioned, follow-up prompts may request clarification.
    3. Once both course interest and degree level are identified, filter the course database to retrieve up to 3 of the most relevant programmes.
    4. If more than one relevant course is found, display a list of options (e.g., "1. Programme A", "2. Programme B", etc.) and ask the user to choose one by typing ‘1’, ‘2’, or ‘3’. 
    5. When the user selects an option, return a *detailed summary* of that course only.

    The user's last message was:  
    "{user_input}" 
    Detected course interest: {session_memory.get("course_interest", "unknown")}  
    Selected degree level: {session_memory.get("degree_level", "unspecified")}  

    Course data retrieved from the database:  
    {context}

    Now, respond with a complete and engaging explanation of the course details as per the user’s interest and level, using a tone that is empathetic and adapted to the {tone_prefix}, without explicitly mentioning their emotions.

    At the end of the explanation, ask:
    "Would you like more information on another option from the list, or explore a different course? 
    You can type 'option' to view the list again, 'continue' to explore other courses, or 'exit' if you're all set for now."
    
    If the user's question seems unrelated to university courses or degree programmes, politely ask them to rephrase or provide a course-related query instead.
    
    If the user inquires about what they can do after graduation in relation to this course, mention potential career paths or further study options based on the programme content.
    """
    genai_model = genai.GenerativeModel("models/gemini-1.5-flash")
    
    # To limit prompt length 
    MAX_PROMPT_LENGTH = 30000
    if len(prompt) > MAX_PROMPT_LENGTH:
        prompt = prompt[:MAX_PROMPT_LENGTH]
    response = genai_model.generate_content(prompt)

    return response.text

In [None]:
# Always reset memory at the start of a new session
keyword_memory = {}
session_memory = {
    "degree_level": None,
    "course_interest": None,
    "intent": None,
    "history": []
}

# Display welcome message at the start of every run
print("🤖 Chatbot: Hi! I'm CourseMate, your guide to courses at the University of Nottingham Malaysia. How can I help you today?")

# Main conversation loop
while True:
    user_query = input("👤 You: ").lower().strip()

    # Check for mid-conversation exit keywords
    if user_query in ["bye", "goodbye", "bye bye", "ok bye", "thanks", "thank you", "thank u", "thanks bye", "tq", "see you", "see ya", "exit", "thanks bye"]:
        print("🤖 Chatbot: Thank you for using CourseMate! If you need more help, feel free to reach out again. Have a great day! 🌟")
        break

    extract_keywords(user_query)
    bot_response = chat_with_gemini(user_query)

    # Check if the bot response contains the session termination message
    if "Thank you for using CourseMate! If you need more help, feel free to reach out again. Have a great day! 🌟" in bot_response:
        print("🤖 Chatbot:", bot_response)
        break  # Exit the loop and terminate the session

    print("\n🤖 Chatbot:", bot_response)

In [None]:
# Interface

# Memory
keyword_memory = {}
session_memory = {
    "degree_level": None,
    "course_interest": None,
    "intent": None,
    "history": []
}

# Exit trigger keywords
exit_keywords = {"bye", "goodbye", "bye bye", "ok bye", "thanks", "thank you", "thank u",
                 "thanks bye", "tq", "see you", "see ya"}

# Gradio chatbot function
def chatbot_conversation(user_query, history):
    global session_memory, keyword_memory

    # Check for exit keywords (case-insensitive)
    if user_query.strip().lower() in exit_keywords:
        bot_response = "🤖 Chatbot: Thank you for using CourseMate! If you need more help, feel free to reach out again. Have a great day! 🌟"
        history.append([user_query, bot_response])
        return history, gr.update(visible=False)

    extract_keywords(user_query)
    bot_response = chat_with_gemini(user_query)

    if "Thank you for using CourseMate! If you need more help, feel free to reach out again." in bot_response:
        history.append([user_query, bot_response])
        return history, gr.update(visible=False)
    else:
        history.append([user_query, bot_response])
        return history, ""

# Reset memory, UI and welcome message
def reset_chat():
    global session_memory, keyword_memory
    session_memory = {
        "degree_level": None,
        "course_interest": None,
        "intent": None,
        "history": []
    }
    keyword_memory = {}

    # First chatbot-only message (no user yet)
    welcome_message = [
        ["🤖", "Hi! I'm CourseMate. What course are you interested in exploring today (e.g., computer science or mathematics)?"]
    ]
    return welcome_message, ""

# Interface
with gr.Blocks() as demo:
    gr.Markdown("<h2 style='text-align: center;'>🎓 CourseMate - Your UNM Course Advisor</h2>")

    # Instructions visible at all times
    gr.Markdown("""
    ### 💬 Quick Guide:
    1. Type a course-related field (e.g. **education**, **mechatronic engineering**) — be specific.
    2. Please select one option from the list of recommended courses **(type 1, 2 or 3)**.
    3. You can then ask follow up questions.
    3. Type **'continue'** to explore more courses or **'option'** to view the list again.
    4. To exit, you may type **'bye'**, **'exit'**, or **'thank you'**.
    5. To clear the chat and restart, **please press the ‘Clear’ button** below.
    """)

    # Chatbot initialized with welcome message
    chatbot = gr.Chatbot(
        label="CourseMate",
        show_label=False,
        height=300,
        value=[
            ["🤖", "Hi! I'm CourseMate. What course are you interested in exploring today (e.g., computer science or mathematics)?"]
        ]
    )

    msg = gr.Textbox(label="👤 You:", placeholder="Ask me anything about UNM courses!", lines=1)
    clear = gr.Button("Clear")

    def respond(user_query, chat_history):
        return chatbot_conversation(user_query, chat_history)

    msg.submit(respond, [msg, chatbot], [chatbot, msg])
    clear.click(reset_chat, None, [chatbot, msg])
    
demo.launch(share=True)