# Import required packages

In [8]:
#load packages
import pandas as pd
import os
import json
import chromadb
from sentence_transformers import SentenceTransformer
import torch
from huggingface_hub import login
from dotenv import load_dotenv
import torchvision

# Load in Documents

In [2]:
# Read in csv for majors
majors_meta = pd.read_csv("Scraped_data/majors_meta_cleaned.csv")
majors_meta.head()

Unnamed: 0.1,Unnamed: 0,Major,Text,School
0,0,"Architectural History, B.Ar.H.","Architectural History, B.Ar.H. Print-Friendly ...",School of Architecture
1,1,"Architecture, B.S.","Architecture, B.S. Print-Friendly Page (opens ...",School of Architecture
2,2,"Urban and Environmental Planning, B.U.E.P.","Urban and Environmental Planning, B.U.E.P. Pri...",School of Architecture
3,3,"African-American and African Studies, B.A.","African American and African Studies, B.A. Pri...",College of Arts and Sciences
4,4,"Anthropology, B.A.","Anthropology, B.A. Print-Friendly Page (opens ...",College of Arts and Sciences


In [3]:
import glob
csv_files = glob.glob("./Lous_List_Database/*.csv") # Get all files within folder
courses = pd.concat(
    (pd.read_csv(f).assign(filename=os.path.basename(f)[:-4]) for f in csv_files),
    ignore_index=True
)

courses.head()

Unnamed: 0,ClassNumber,Mnemonic,Number,Section,Type,Units,Instructor,Days,Room,Title,Topic,Status,Enrollment,EnrollmentLimit,Waitlist,Description,filename
0,10003,AAS,1010,100,Lecture,4,"Robert Vinson, Naseemah Mohamed",TuTh 12:30pm - 1:45pm,Minor Hall 125,Introduction to African-American and African S...,,Open,0,180,0,This introductory course surveys the histories...,Fall_25_UVA_w_descr
1,12774,AAS,1010,101,Discussion,0,To Be Announced,We 6:00pm - 6:50pm,New Cabell Hall 283,Introduction to African-American and African S...,,Open,0,20,0,This introductory course surveys the histories...,Fall_25_UVA_w_descr
2,10006,AAS,1010,102,Discussion,0,To Be Announced,We 5:00pm - 5:50pm,New Cabell Hall 287,Introduction to African-American and African S...,,Open,0,20,0,This introductory course surveys the histories...,Fall_25_UVA_w_descr
3,10004,AAS,1010,103,Discussion,0,To Be Announced,Tu 7:00pm - 7:50pm,New Cabell Hall 411,Introduction to African-American and African S...,,Open,0,20,0,This introductory course surveys the histories...,Fall_25_UVA_w_descr
4,10005,AAS,1010,104,Discussion,0,To Be Announced,Tu 8:00pm - 8:50pm,New Cabell Hall 411,Introduction to African-American and African S...,,Open,0,20,0,This introductory course surveys the histories...,Fall_25_UVA_w_descr


# Store data in ChromaDB vectorized database

### Creating and Testing Embedding

In [4]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

embed_model = HuggingFaceEmbedding(model_name = "all-MiniLM-L6-v2") # Load local embedding model
Settings.embed_model = embed_model
print("Embedding a test sentence:", Settings.embed_model.get_text_embedding("test sentence")[:5])

Embedding a test sentence: [0.042972832918167114, 0.09663482010364532, -0.002129172207787633, 0.07826829701662064, -0.00641746586188674]


### Creating ChromaDB and Saving Documents

#### 1. Majors Index

In [6]:
from llama_index.core.schema import Document
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore

# Convert each row to a LlamaIndex document
documents = [
    Document(
        text = row["Text"],
        metadata = {"Major": row["Major"], "School": row["School"]}
    )
    for _, row in majors_meta.iterrows()
]

# 1. Initialize Chroma vector store for majors (persistent DB stored in ./chroma_db folder)
chroma_store = ChromaVectorStore.from_params(
    collection_name = "majors_collection",
    persist_dir = "./chroma_db"  # local directory for vector store
)

# 2. Create the majors index — this will auto-chunk + auto-embed
index = VectorStoreIndex.from_documents(documents, vector_store = chroma_store)
index.storage_context.persist() # 3. Persist to disk

#### 2. Courses Index

In [7]:
# Convert each row to a LlamaIndex document
course_docs = []
for _, row in courses.iterrows():
    course_text = f"""Course ID: {row['Mnemonic']+str(row['Number'])}
        Title: {row['Title']}
        Credits: {row['Units']},
        Instructor: {row['Instructor']},
        Schedule: {row['Days']},
        Period: {row['filename']}"""
    course_docs.append(Document(text=course_text, metadata=row.to_dict()))
    
# 1. Initialize Chroma vector store for courses
courses_chroma_store = ChromaVectorStore.from_params(
    collection_name = "courses_collection", 
    persist_dir = "./chroma_db")

# 2. Create the courses index
courses_index = VectorStoreIndex.from_documents(course_docs, vector_store = courses_chroma_store)
courses_index.storage_context.persist() # 3. Persist to disk

# Retrieval

In [9]:
from llama_index.core import StorageContext, load_index_from_storage

# 1. Reload the Index from Storage
#storage_context = StorageContext.from_defaults(persist_dir = "./chroma_db")
#index = load_index_from_storage(storage_context)

# 2. Set Up a Query Engine (Retriever + LLM)
retriever = index.as_retriever(similarity_top_k = 2, embed_model = embed_model)

query = "core courses for computer science major"
nodes = retriever.retrieve(query)

for i, node in enumerate(nodes, 1):
    print(f"\n📚 Result {i}:")
    print(node.metadata)
    print(node.text[:500])


📚 Result 1:
{'Major': 'Computer Science, B.S.', 'School': 'School of Engineering and Applied Science'}
Some students with programming experience may wish to take the place-out test, which satisfies the requirement but does not award degree credit.  Please contact the department for more information about place-out tests. Choose one of the following: CS 1110 - Introduction to Programming Credits:          3 CS 1111 - Introduction to Programming Credits:          3 CS 1112 - Introduction to Programming Credits:          3 CS 1113 - Introduction to Programming Credits:          3 Foundation Courses 

📚 Result 2:
{'Major': 'Computer Science, B.A.', 'School': 'College of Arts and Sciences'}
Computer Science, B.A. Print-Friendly Page (opens a new window) Return to: College of Arts & Sciences: Degree Programs The BA degree program in Computer Science provides students with a solid foundation in computer science theory and practice. An important goal of the program is give students the abilit

In [10]:
# Querying the courses index
nodes = courses_index.as_retriever(similarity_top_k=3).retrieve("Data Science Spring 2025")

for i, node in enumerate(nodes, 1):
    print(f"\n📚 Result {i}:")
    print(node.metadata)
    print(node.text[:500])


📚 Result 1:
{'ClassNumber': 12120, 'Mnemonic': 'STAT', 'Number': 1601, 'Section': '001', 'Type': 'Lecture', 'Units': '3', 'Instructor': 'Taylor Brown', 'Days': 'MoWe 2:00pm - 3:15pm', 'Room': 'Ridley Hall G008', 'Title': 'Introduction to Data Science with R', 'Topic': nan, 'Status': 'Closed', 'Enrollment': 93, 'EnrollmentLimit': 90, 'Waitlist': 0, 'Description': nan, 'filename': 'Spring_2023'}
Course ID: STAT1601
        Title: Introduction to Data Science with R
        Credits: 3,
        Instructor: Taylor Brown,
        Schedule: MoWe 2:00pm - 3:15pm,
        Period: Spring_2023

📚 Result 2:
{'ClassNumber': 12121, 'Mnemonic': 'STAT', 'Number': 1601, 'Section': '002', 'Type': 'Lecture', 'Units': '3', 'Instructor': 'Taylor Brown', 'Days': 'MoWe 3:30pm - 4:45pm', 'Room': 'Ridley Hall G008', 'Title': 'Introduction to Data Science with R', 'Topic': nan, 'Status': 'Closed', 'Enrollment': 90, 'EnrollmentLimit': 90, 'Waitlist': 0, 'Description': nan, 'filename': 'Spring_2023'}
Course ID: 

# Retrieve relevant information from both indices

In [11]:
# Retrieve major requirements
major_retriever = index.as_retriever(similarity_top_k=1)
major_info = major_retriever.retrieve("Biology Major")[0].get_content()

# Retrieve available courses (you can filter this by Spring 2025)
course_retriever = courses_index.as_retriever(similarity_top_k=20)
available_courses = course_retriever.retrieve("Biology Fall 2025")
available_course_texts = "\n\n".join([node.get_content() for node in available_courses])

# Generation

In [12]:
from llama_index.llms.huggingface import HuggingFaceLLM


In [13]:
# Set system prompt
SYSTEM_PROMPT = """You are a helpful academic advisor at UVA. Given the student's major requirements and available courses, generate a schedule for Spring 2025. 
- Include only courses relevant to their major
- Avoid time conflicts
- Aim for 12 to 15 credits
- Each bullet should include: Course ID, Title, Days & Time, and Instructor.
- Include course title, schedule, and professor.
- Only use courses from the list provided below.

Format:

* <Course ID>: <Title> | <Schedule> | <Instructor>"""

In [14]:
load_dotenv() # Load environment variables from .env file
token = os.getenv("TOKEN") # Retrieve the token

# Log into huggingface
login(token = token)

# 1. Load model
llm = HuggingFaceLLM(model_name = "mistralai/Mistral-7B-Instruct-v0.1",
                     tokenizer_name = "mistralai/Mistral-7B-Instruct-v0.1",
                     device_map = "auto",       # uses GPU if available, else CPU
                     max_new_tokens = 512,  # or even 1024
                     model_kwargs = {
                         "offload_folder": "./offload",  # If needed for disk-based offloading
                         "torch_dtype": torch.float16,
                         "trust_remote_code": True
                     },
                     generate_kwargs = {
                         "temperature": 0.1
                     },
                     system_prompt = SYSTEM_PROMPT)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [15]:
# Set system prompt
SYSTEM_PROMPT = """You are a helpful academic advisor at UVA. Given the student's major requirements and available courses, generate a schedule for Spring 2025. 
- Include only courses relevant to their major
- Avoid time conflicts
- Aim for 12 to 15 credits
- Each bullet should include: Course ID, Title, Days & Time, and Instructor.
- Include course title, schedule, and professor.
- Only use courses from the list provided below.

Format:

* <Course ID>: <Title> | <Schedule> | <Instructor>"""

# Combine system prompt with information retrieved from previous queries
full_prompt = f"""{SYSTEM_PROMPT}

Major Requirements:
{major_info}

Available Courses:
{available_course_texts}

Generate the schedule below:
"""

response = llm.complete(full_prompt)
print(response.text)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



* <Course ID>: <Title> | <Schedule> | <Instructor>

Course ID: BIOL1040
        Title: The DNA Revolution in Science and Society
        Credits: 3,
        Instructor: Douglas Taylor,
        Schedule: TuTh 3:30pm - 4:50pm,
        Period: Fall_2024

Course ID: BIOL2100
        Title: Introduction to Biology with Laboratory: Cell Biology & Genetics
        Credits: 0,
        Instructor: David Kittlesen,
        Schedule: We 2:00pm - 4:30pm,
        Period: Fall_2025

Course ID: BIOL2200
        Title: Introduction to Biology w/Laboratory: Organismal & Evolutionary Biology
        Credits: 0,
        Instructor: Jessamyn Manson,
        Schedule: MoWeFr 10:00am - 10:50am,
        Period: Fall_2025

Course ID: BIOL4610
        Title: Molecular Evolution: Diversity, Mutants, and the Biological Myth of Race
        Credits: 3,
        Instructor: Alan Bergland,
        Schedule: TuTh 11:00am - 12:15pm,
        Period: Spring_2022

Course ID: BIOL3020
        Title: Evolution and Ecology

### Function to generate repsonse

In [19]:
def generate_response(query, k_s = 20):
    # Retrieve context on the major
    major_retriever = index.as_retriever(similarity_top_k=1)
    major_info = major_retriever.retrieve(query)[0].get_content()
    
    # Retrieve available courses
    course_retriever = courses_index.as_retriever(similarity_top_k=20)
    available_courses = course_retriever.retrieve(query)
    available_course_texts = "\n\n".join([node.get_content() for node in available_courses])

    # Construct full prompt with retrieved information
    full_prompt = f"""{SYSTEM_PROMPT}

    Major Requirements:
    {major_info}

    Available Courses:
    {available_course_texts}

    Generate the schedule below:
    """
    
    response = llm.complete(full_prompt) # prompt response
    return response.text

query = "second year biology major for fall 2025"
print(generate_response(query))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



    * BIOL2200: Introduction to Biology w/Laboratory: Organismal & Evolutionary Biology | Tu 2:00pm - 2:50pm,
    * BIOL4840: Capstone for the Bachelor of Science in Biology | Tu 2:00pm - 2:50pm,
    * BIOL2200: Introduction to Biology w/Laboratory: Organismal & Evolutionary Biology | Tu 2:00pm - 2:50pm,
    * BIOL2200: Introduction to Biology w/Laboratory: Organismal & Evolutionary Biology | Tu 2:00pm - 2:50pm,
    * BIOL2200: Introduction to Biology w/Laboratory: Organismal & Evolutionary Biology | Tu 2:00pm - 2:50pm,
    * BIOL2200: Introduction to Biology w/Laboratory: Organismal & Evolutionary Biology | Tu 2:00pm - 2:50pm,
    * BIOL2200: Introduction to Biology w/Laboratory: Organismal & Evolutionary Biology | Tu 2:00pm - 2:50pm,
    * BIOL2200: Introduction to Biology w/Laboratory: Organismal & Evolutionary Biology | Tu 2:00pm - 2:50pm,
    * BIOL2200: Introduction to Biology w/Laboratory: Organismal & Evolutionary Biology | Tu 2:00pm - 2:50pm,
    * BIOL2200: Introduction to B

### Function to generate response, splits reponse to 2 parts, for better document retrieval 

In [102]:
import re

def generate_response(query, k_s=20):
    # --- STEP 1: Extract major and year ---
    major_query = ""
    course_query = ""

    # Match something like: "second year biology major"
    major_match = re.search(
    r"(first|second|third|fourth)?\s*year\s+([a-zA-Z\s&]+?)\s+(major|student)",
    query,
    re.IGNORECASE
    )

    if major_match:
        year = major_match.group(1) or ""
        major_name = major_match.group(2).strip()
        full_major_phrase = f"{year} year {major_name} major".strip() if year else f"{major_name} major"
        major_query = re.sub(r'\s+', ' ', full_major_phrase)  # normalize spacing
    else:
        major_query = ""  # or fallback to query if needed

    # extract semester info 
    term_match = re.search(r"(fall|spring|summer|winter)\s+\d{4}", query, re.IGNORECASE)
    term_phrase = term_match.group(0) if term_match else ""

    # make queries
    if major_name:
        course_query = f"{term_phrase} {major_name} courses"
    else:
        course_query = query

    #retrieval majors
    major_retriever = index.as_retriever(similarity_top_k=3)
    major_info = major_retriever.retrieve(major_query)[0].get_content()

    # retrieve courses
    course_retriever = courses_index.as_retriever(similarity_top_k=k_s)
    available_courses = course_retriever.retrieve(course_query)
    available_course_texts = "\n\n".join([node.get_content() for node in available_courses])

    # prompt llm
    full_prompt = f"""{SYSTEM_PROMPT}

Major Requirements:
{major_info}

Available Courses:
{available_course_texts}

User Query:
{query}

Generate the schedule below:
"""

    response = llm.complete(full_prompt)
    return response.text


In [64]:
generate_response("Can you make a semeter schedule, for a second year biology major for fall 2025?")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'\n* <Course ID>: <Title> | <Schedule> | <Instructor>\n\nCourse ID: BIOL4840\n        Title: Capstone for the Bachelor of Science in Biology\n        Credits: 1,\n        Instructor: Jessamyn Manson,\n        Schedule: Tu 2:00pm - 2:50pm,\n        Period: Fall_25_UVA_w_descr\n\nCourse ID: STAT2020\n        Title: Statistics for Biologists\n        Credits: 0,\n        Instructor: Maria Ferrara,\n        Schedule: Th 4:00pm - 4:50pm,\n        Period: Fall_25_UVA_w_descr\n\nCourse ID: STAT2020\n        Title: Statistics for Biologists\n        Credits: 0,\n        Instructor: Maria Ferrara,\n        Schedule: Th 5:00pm - 5:50pm,\n        Period: Fall_25_UVA_w_descr\n\nCourse ID: STAT2020\n        Title: Statistics for Biologists\n        Credits: 4,\n        Instructor: Maria Ferrara,\n        Schedule: MoWe 3:30pm - 4:45pm,\n        Period: Fall_25_UVA_w_descr\n\nCourse ID: ENWR1510\n        Title: Writing and Critical Inquiry\n        Credits: 3,\n        Instructor: Rhiannon Goad,\n  

In [18]:
print('\n* <Course ID>: <Title> | <Schedule> | <Instructor>\n\nCourse ID: BIOL4840\n        Title: Capstone for the Bachelor of Science in Biology\n        Credits: 1,\n        Instructor: Jessamyn Manson,\n        Schedule: Tu 2:00pm - 2:50pm,\n        Period: Fall_25_UVA_w_descr\n\nCourse ID: STAT2020\n        Title: Statistics for Biologists\n        Credits: 0,\n        Instructor: Maria Ferrara,\n        Schedule: Th 4:00pm - 4:50pm,\n        Period: Fall_25_UVA_w_descr\n\nCourse ID: STAT2020\n        Title: Statistics for Biologists\n        Credits: 0,\n        Instructor: Maria Ferrara,\n        Schedule: Th 5:00pm - 5:50pm,\n        Period: Fall_25_UVA_w_descr\n\nCourse ID: STAT2020\n        Title: Statistics for Biologists\n        Credits: 4,\n        Instructor: Maria Ferrara,\n        Schedule: MoWe 3:30pm - 4:45pm,\n        Period: Fall_25_UVA_w_descr\n\nCourse ID: ENWR1510\n        Title: Writing and Critical Inquiry\n        Credits: 3,\n        Instructor: Rhiannon Goad,\n        Schedule: MoWeFr 9:00am - 9:50am,\n        Period: Fall_25_UVA_w_descr\n\nCourse ID: PHYS2010\n        Title: Principles of Physics 1 for Pre-Health Students\n        Credits: 3,\n        Instructor: Rebecca Grouchy,\n        Schedule: TuTh 9:30am - 10:45am,\n        Period: Fall_25_UVA_w_descr\n\nCourse ID: ENWR1510\n        Title: Writing and Critical Inquiry\n        Credits: ')


* <Course ID>: <Title> | <Schedule> | <Instructor>

Course ID: BIOL4840
        Title: Capstone for the Bachelor of Science in Biology
        Credits: 1,
        Instructor: Jessamyn Manson,
        Schedule: Tu 2:00pm - 2:50pm,
        Period: Fall_25_UVA_w_descr

Course ID: STAT2020
        Title: Statistics for Biologists
        Credits: 0,
        Instructor: Maria Ferrara,
        Schedule: Th 4:00pm - 4:50pm,
        Period: Fall_25_UVA_w_descr

Course ID: STAT2020
        Title: Statistics for Biologists
        Credits: 0,
        Instructor: Maria Ferrara,
        Schedule: Th 5:00pm - 5:50pm,
        Period: Fall_25_UVA_w_descr

Course ID: STAT2020
        Title: Statistics for Biologists
        Credits: 4,
        Instructor: Maria Ferrara,
        Schedule: MoWe 3:30pm - 4:45pm,
        Period: Fall_25_UVA_w_descr

Course ID: ENWR1510
        Title: Writing and Critical Inquiry
        Credits: 3,
        Instructor: Rhiannon Goad,
        Schedule: MoWeFr 9:00am - 9:50

# Tests

## Prompt Options
Can you make a semester schedule for a second year biomedical engineering student for fall 2025?

Can you make a semester schedule or a fourth year biomedical engineering student for fall 2025?

Can you make a semester schedule for a first year economics student for fall 2025?

Can you make a semester schedule for a third year economics student for fall 2025?

Can you make a semester schedule for a third year architecture student for fall 2025?

In [103]:
generate_response("Can you make a semester schedule for a second year biomedical engineering student for fall 2025?", k_s=20)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'\n* <Course ID>: <Title> | <Schedule> | <Instructor>\n\nCourse ID: BME4550\n        Title: Special Topics in Biomedical Engineering\n        Credits: 3,\n        Instructor: David Chen,\n        Schedule: Fr 8:30am - 11:30am,\n        Period: Fall_2025\n\nCourse ID: BME6550\n        Title: Special Topics in Biomedical Engineering\n        Credits: 3,\n        Instructor: Nikki Hastings,\n        Schedule: TuTh 8:00am - 10:45am,\n        Period: Fall_2025\n\nCourse ID: BME6056\n        Title: Going Pro, Professional Development in Biomedical Engineering\n        Credits: 1 - 2,\n        Instructor: Kimberly Kelly,\n        Schedule: Mo 11:00am - 12:00pm,\n        Period: Spring_2025\n\nCourse ID: BME6550\n        Title: Special Topics in Biomedical Engineering\n        Credits: 3,\n        Instructor: Frederick Epstein, John Hossack,\n        Schedule: MoWe 2:00pm - 3:15pm,\n        Period: Spring_2025\n\nCourse ID: BME6550\n        Title: Special Topics in Biomedical Engineering\n    

In [104]:
print('\n* <Course ID>: <Title> | <Schedule> | <Instructor>\n\nCourse ID: BME4550\n        Title: Special Topics in Biomedical Engineering\n        Credits: 3,\n        Instructor: David Chen,\n        Schedule: Fr 8:30am - 11:30am,\n        Period: Fall_2025\n\nCourse ID: BME6550\n        Title: Special Topics in Biomedical Engineering\n        Credits: 3,\n        Instructor: Nikki Hastings,\n        Schedule: TuTh 8:00am - 10:45am,\n        Period: Fall_2025\n\nCourse ID: BME6056\n        Title: Going Pro, Professional Development in Biomedical Engineering\n        Credits: 1 - 2,\n        Instructor: Kimberly Kelly,\n        Schedule: Mo 11:00am - 12:00pm,\n        Period: Spring_2025\n\nCourse ID: BME6550\n        Title: Special Topics in Biomedical Engineering\n        Credits: 3,\n        Instructor: Frederick Epstein, John Hossack,\n        Schedule: MoWe 2:00pm - 3:15pm,\n        Period: Spring_2025\n\nCourse ID: BME6550\n        Title: Special Topics in Biomedical Engineering\n        Credits: 3,\n        Instructor: Robert Perry,\n        Schedule: TuTh 4:30pm - 5:45pm,\n        Period: Spring_2025\n\nCourse ID: BME6056\n        Title: Going Pro, Professional Development in Biomedical Engineering\n        Credits: 1 - 2,\n        Instructor: Robert Perry,\n        Schedule: We 3:30pm - 4:45pm,\n        Period: Fall_2025\n\nCourse ID: BME6550\n        Title: Special Topics in Biomedical Engineering\n        Credits: 3,\n        Instructor: Steven Caliari,\n       ')


* <Course ID>: <Title> | <Schedule> | <Instructor>

Course ID: BME4550
        Title: Special Topics in Biomedical Engineering
        Credits: 3,
        Instructor: David Chen,
        Schedule: Fr 8:30am - 11:30am,
        Period: Fall_2025

Course ID: BME6550
        Title: Special Topics in Biomedical Engineering
        Credits: 3,
        Instructor: Nikki Hastings,
        Schedule: TuTh 8:00am - 10:45am,
        Period: Fall_2025

Course ID: BME6056
        Title: Going Pro, Professional Development in Biomedical Engineering
        Credits: 1 - 2,
        Instructor: Kimberly Kelly,
        Schedule: Mo 11:00am - 12:00pm,
        Period: Spring_2025

Course ID: BME6550
        Title: Special Topics in Biomedical Engineering
        Credits: 3,
        Instructor: Frederick Epstein, John Hossack,
        Schedule: MoWe 2:00pm - 3:15pm,
        Period: Spring_2025

Course ID: BME6550
        Title: Special Topics in Biomedical Engineering
        Credits: 3,
        Instructor

In [109]:
generate_response("Can you make a semester schedule or a fourth year biomedical engineering student for fall 2025?", k_s=20)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'\n* <Course ID>: <Title> | <Schedule> | <Instructor>\n\nSpring 2025 Schedule:\n\n* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings\n* BME4550: Special Topics in Biomedical Engineering | We 11:00am - 12:00pm | Kimberly Kelly\n* BME4550: Special Topics in Biomedical Engineering | We 11:00am - 12:00pm | David Chen\n* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings\n* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings\n* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings\n* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings\n* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings\n* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings\n* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | 

In [113]:
print('\n* <Course ID>: <Title> | <Schedule> | <Instructor>\n\nSpring 2025 Schedule:\n\n* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings\n* BME4550: Special Topics in Biomedical Engineering | We 11:00am - 12:00pm | Kimberly Kelly\n* BME4550: Special Topics in Biomedical Engineering | We 11:00am - 12:00pm | David Chen\n* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings\n* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings\n* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings\n* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings\n* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings\n* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings\n* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings\n* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings\n* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings\n* BME65')


* <Course ID>: <Title> | <Schedule> | <Instructor>

Spring 2025 Schedule:

* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings
* BME4550: Special Topics in Biomedical Engineering | We 11:00am - 12:00pm | Kimberly Kelly
* BME4550: Special Topics in Biomedical Engineering | We 11:00am - 12:00pm | David Chen
* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings
* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings
* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings
* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings
* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings
* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings
* BME6550: Special Topics in Biomedical Engineering | TuTh 8:00am - 10:45am | Nikki Hastings


In [110]:
generate_response('Can you make a semester schedule for a first year economics student for fall 2025?', k_s=20)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'\n* ECON2020 - Principles of Economics: Macroeconomics | MoTuWeThFr 10:30am - 12:45pm | Jessica Kripalani\n* ECON2010 - Principles of Economics: Microeconomics | Tu 9:00am - 9:50am | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Th 6:00pm - 6:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Fr 12:00pm - 12:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Tu 8:00am - 8:50am | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Th 5:00pm - 5:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | We 2:00pm - 2:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | We 11:00am - 11:50am | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | We 6:00pm - 6:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Th 6:00pm - 6:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomic

In [114]:
print('\n* ECON2020 - Principles of Economics: Macroeconomics | MoTuWeThFr 10:30am - 12:45pm | Jessica Kripalani\n* ECON2010 - Principles of Economics: Microeconomics | Tu 9:00am - 9:50am | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Th 6:00pm - 6:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Fr 12:00pm - 12:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Tu 8:00am - 8:50am | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Th 5:00pm - 5:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | We 2:00pm - 2:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | We 11:00am - 11:50am | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | We 6:00pm - 6:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Th 6:00pm - 6:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | We 5:00pm - 5:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Tu 2:00pm - 2:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Th 7:00pm - 7:50pm | To Be Announced\n* ECON2')


* ECON2020 - Principles of Economics: Macroeconomics | MoTuWeThFr 10:30am - 12:45pm | Jessica Kripalani
* ECON2010 - Principles of Economics: Microeconomics | Tu 9:00am - 9:50am | To Be Announced
* ECON2010 - Principles of Economics: Microeconomics | Th 6:00pm - 6:50pm | To Be Announced
* ECON2010 - Principles of Economics: Microeconomics | Fr 12:00pm - 12:50pm | To Be Announced
* ECON2010 - Principles of Economics: Microeconomics | Tu 8:00am - 8:50am | To Be Announced
* ECON2010 - Principles of Economics: Microeconomics | Th 5:00pm - 5:50pm | To Be Announced
* ECON2010 - Principles of Economics: Microeconomics | We 2:00pm - 2:50pm | To Be Announced
* ECON2010 - Principles of Economics: Microeconomics | We 11:00am - 11:50am | To Be Announced
* ECON2010 - Principles of Economics: Microeconomics | We 6:00pm - 6:50pm | To Be Announced
* ECON2010 - Principles of Economics: Microeconomics | Th 6:00pm - 6:50pm | To Be Announced
* ECON2010 - Principles of Economics: Microeconomics | We 5:00p

In [111]:
generate_response('Can you make a semester schedule for a third year economics student for fall 2025?', k_s = 20)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


'\n* ECON2020 - Principles of Economics: Macroeconomics | MoTuWeThFr 10:30am - 12:45pm | Jessica Kripalani\n* ECON2010 - Principles of Economics: Microeconomics | Tu 9:00am - 9:50am | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Th 6:00pm - 6:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Fr 12:00pm - 12:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Tu 8:00am - 8:50am | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Th 5:00pm - 5:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | We 2:00pm - 2:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | We 11:00am - 11:50am | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | We 6:00pm - 6:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Th 6:00pm - 6:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomic

In [115]:
print('\n* ECON2020 - Principles of Economics: Macroeconomics | MoTuWeThFr 10:30am - 12:45pm | Jessica Kripalani\n* ECON2010 - Principles of Economics: Microeconomics | Tu 9:00am - 9:50am | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Th 6:00pm - 6:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Fr 12:00pm - 12:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Tu 8:00am - 8:50am | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Th 5:00pm - 5:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | We 2:00pm - 2:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | We 11:00am - 11:50am | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | We 6:00pm - 6:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Th 6:00pm - 6:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | We 5:00pm - 5:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Tu 2:00pm - 2:50pm | To Be Announced\n* ECON2010 - Principles of Economics: Microeconomics | Th 7:00pm - 7:50pm | To Be Announced\n* ECON2')


* ECON2020 - Principles of Economics: Macroeconomics | MoTuWeThFr 10:30am - 12:45pm | Jessica Kripalani
* ECON2010 - Principles of Economics: Microeconomics | Tu 9:00am - 9:50am | To Be Announced
* ECON2010 - Principles of Economics: Microeconomics | Th 6:00pm - 6:50pm | To Be Announced
* ECON2010 - Principles of Economics: Microeconomics | Fr 12:00pm - 12:50pm | To Be Announced
* ECON2010 - Principles of Economics: Microeconomics | Tu 8:00am - 8:50am | To Be Announced
* ECON2010 - Principles of Economics: Microeconomics | Th 5:00pm - 5:50pm | To Be Announced
* ECON2010 - Principles of Economics: Microeconomics | We 2:00pm - 2:50pm | To Be Announced
* ECON2010 - Principles of Economics: Microeconomics | We 11:00am - 11:50am | To Be Announced
* ECON2010 - Principles of Economics: Microeconomics | We 6:00pm - 6:50pm | To Be Announced
* ECON2010 - Principles of Economics: Microeconomics | Th 6:00pm - 6:50pm | To Be Announced
* ECON2010 - Principles of Economics: Microeconomics | We 5:00p

In [112]:
generate_response('Can you make a semester schedule for a third year architecture student for fall 2025?', k_s = 20)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


"\n* ARCH 1010: History of Architecture I | Mo 11:00am - 11:50am, Tu 11:00am - 11:50am, We 11:00am - 11:50am | Andrew Johnston\n* ARH 3120: Theories of Architecture | TuTh 11:00am - 12:15pm | Alissa Diamond\n* ARH 4591: Undergraduate Seminar in the History of Architecture | TuTh 11:00am - 12:15pm | Elgin Cleckley, Lisa Reilly\n* ARCH 3500: Special Topics in Architecture | Th 6:00pm - 8:30pm | Jose Ibarra\n* ARCH 6500: Special Topics in Architecture | Th 6:00pm - 8:30pm | Jose Ibarra\n* ARCH 5500: Special Topics in Architecture | Th 6:00pm - 8:30pm | Evan Shieh\n* ARCH 5010: Introduction to Architecture | MoTuWeThFr 8:30am - 6:00pm | Devin Dobrowolski, Felipe Correa, Jonah Coe-Scharff, Katie Stranix\n\nTotal Credits: 15\n\nNote: This schedule is subject to change based on the availability of courses and the student's preferences."

In [116]:
print("\n* ARCH 1010: History of Architecture I | Mo 11:00am - 11:50am, Tu 11:00am - 11:50am, We 11:00am - 11:50am | Andrew Johnston\n* ARH 3120: Theories of Architecture | TuTh 11:00am - 12:15pm | Alissa Diamond\n* ARH 4591: Undergraduate Seminar in the History of Architecture | TuTh 11:00am - 12:15pm | Elgin Cleckley, Lisa Reilly\n* ARCH 3500: Special Topics in Architecture | Th 6:00pm - 8:30pm | Jose Ibarra\n* ARCH 6500: Special Topics in Architecture | Th 6:00pm - 8:30pm | Jose Ibarra\n* ARCH 5500: Special Topics in Architecture | Th 6:00pm - 8:30pm | Evan Shieh\n* ARCH 5010: Introduction to Architecture | MoTuWeThFr 8:30am - 6:00pm | Devin Dobrowolski, Felipe Correa, Jonah Coe-Scharff, Katie Stranix\n\nTotal Credits: 15\n\nNote: This schedule is subject to change based on the availability of courses and the student's preferences.")


* ARCH 1010: History of Architecture I | Mo 11:00am - 11:50am, Tu 11:00am - 11:50am, We 11:00am - 11:50am | Andrew Johnston
* ARH 3120: Theories of Architecture | TuTh 11:00am - 12:15pm | Alissa Diamond
* ARH 4591: Undergraduate Seminar in the History of Architecture | TuTh 11:00am - 12:15pm | Elgin Cleckley, Lisa Reilly
* ARCH 3500: Special Topics in Architecture | Th 6:00pm - 8:30pm | Jose Ibarra
* ARCH 6500: Special Topics in Architecture | Th 6:00pm - 8:30pm | Jose Ibarra
* ARCH 5500: Special Topics in Architecture | Th 6:00pm - 8:30pm | Evan Shieh
* ARCH 5010: Introduction to Architecture | MoTuWeThFr 8:30am - 6:00pm | Devin Dobrowolski, Felipe Correa, Jonah Coe-Scharff, Katie Stranix

Total Credits: 15

Note: This schedule is subject to change based on the availability of courses and the student's preferences.


## Answer relevence

In [117]:
import numpy as np
from numpy.linalg import norm

In [118]:
model = SentenceTransformer("all-MiniLM-L6-v2")

#get embeddings
def get_embedding(text):
    embedding = model.encode(text)

    return embedding

#calculate cosine_similarity
def cosine_similarity(a, b):
    return np.dot(a, b) / (norm(a) * norm(b))

In [119]:
def get_sim_score(user_query, generated_question):
    emb_q = get_embedding(user_query)
    emb_qi = get_embedding(generated_question)

    sim_score = cosine_similarity(emb_q, emb_qi)

    return print(f"User Query: {user_query}\n Generated Question: {generated_question}\nSimilarity score: {sim_score:.4f}\n -------------")

In [120]:
#querys to test
querys = ['Can you make a semester schedule for a second year biomedical engineering student for fall 2025?',
          'Can you make a semester schedule or a fourth year biomedical engineering student for fall 2025?',
          'Can you make a semester schedule for a first year economics student for fall 2025?',
          'Can you make a semester schedule for a third year economics student for fall 2025?',
          'Can you make a semester schedule for a third year architecture student for fall 2025?']

#generated responses generated with output and ask to "generate a question for the given answer"
gen_responses= [
    'What are some of the Special Topics and Professional Development courses in Biomedical Engineering offered at UVA in Fall and Spring 2025, including their instructors and schedules?',
    'What courses are listed under Special Topics in Biomedical Engineering for Spring 2025, and who are the instructors along with their scheduled class times?',
    'What sections of ECON2010 and ECON2020 are being offered, including their schedules and instructors, for the upcoming semester?',
    'What are the available sections, meeting times, and instructors for ECON2010 and ECON2020 in the upcoming term?',
    'What architecture courses are included in a 15-credit schedule, along with their meeting times and instructors?'  
]

In [121]:
for i,query in enumerate(querys):
    get_sim_score(query, gen_responses[i])

User Query: Can you make a semester schedule for a second year biomedical engineering student for fall 2025?
 Generated Question: What are some of the Special Topics and Professional Development courses in Biomedical Engineering offered at UVA in Fall and Spring 2025, including their instructors and schedules?
Similarity score: 0.6176
 -------------
User Query: Can you make a semester schedule or a fourth year biomedical engineering student for fall 2025?
 Generated Question: What courses are listed under Special Topics in Biomedical Engineering for Spring 2025, and who are the instructors along with their scheduled class times?
Similarity score: 0.6882
 -------------
User Query: Can you make a semester schedule for a first year economics student for fall 2025?
 Generated Question: What sections of ECON2010 and ECON2020 are being offered, including their schedules and instructors, for the upcoming semester?
Similarity score: 0.5777
 -------------
User Query: Can you make a semester sch