In [64]:
from dotenv import load_dotenv
load_dotenv()

True

In [65]:
"""
/api/utils.py
Util functions for course planning.
"""

from typing import List
from models import CompletedCourse

def num_units_completed(completed_courses: List[CompletedCourse]):
    """Returns the number of units completed"""
    return sum([float(c.credits) for c in completed_courses])

def year_standing(units: float) -> int:
    if units < 12:
        return 1
    elif units < 26.5:
        return 2
    elif units < 41.5:
        return 3
    else:
        return 4

def is_course_req(req):
    """Return True or False if the requirement is a course requirement and not a higher order requirement"""
    if type(req) != dict:
        return False
    req_list = req.get('reqList')
    if req_list is None or type(req_list) != list or len(req_list) == 0 or type(req_list[0]) != str:
        return False
    return len(re.findall(r"[a-zA-Z]{2,4}\d{3}", req_list[0])) != 0

def minimum_year_satisfied(units, rule):
    """Return True or False if the student satisfies a rule like minimum third-year"""
    if units < 12 and ('second' in rule or 'third' in rule or 'fourth' in rule or 'fifth' in rule):
        return False
    elif units < 26.5 and ('third' in rule or 'fourth' in rule or 'fifth' in rule):
        return False
    elif units < 41.5 and ('fourth' in rule or 'fifth' in rule):
        return False
    elif units > 42 and 'fifth' in rule:
        return False
    return True

def is_req_satisfied(req, units, completed_course_codes):
    """Return True or False if the requirement is satisfied given completed courses"""
    if type(req) == str: # min year standing, could be other, may need to look into this...
        return minimum_year_satisfied(units, req)
    
    if type(req) == list:
        return all(is_req_satisfied(r, units, completed_course_codes) for r in req)

    qty = req.get('quantity')
    reqlist = req.get('reqList')

    if is_course_req(req): 
        if qty == "ALL":
            if not all(code in completed_course_codes for code in req['reqList']):
                return False
        elif type(qty) == int:
            ct = 0
            for i in range(len(reqlist)):
                code = reqlist[i]
                if code in completed_course_codes:
                    ct += 1
            if ct < qty:
                return False
        elif qty is None:
            # print("course req, qty is none")
            # pprint(req)
            return False
        
    else: # higher order prereqs
        if qty == "ALL":
            return all(is_req_satisfied(x, units, completed_course_codes) for x in reqlist)
        elif type(qty) == int:
            ct = 0
            for i in range(len(reqlist)):
                if is_req_satisfied(reqlist[i], units, completed_course_codes):
                    ct += 1
            if ct < qty:
                return False
        elif qty is None:
            # print("upper req, qty is none")
            # print(req)
            return False
    
    return True

def can_take(course, units, completed_course_codes):
    """Return True or False if all requirements are satisfied for course given completed courses"""
    prereqs = course.get('prerequisites')
    if not prereqs:
        return True
    return all(is_req_satisfied(prereq, units, completed_course_codes) for prereq in prereqs)

def get_unsatisfied_course_reqs(req, units, completed_course_codes):
    new = []
    if is_course_req(req):
        if not is_req_satisfied(req, units, completed_course_codes):
            return [req]
    elif type(req) == list:
        for r in req:
            if not is_req_satisfied(req, units, completed_course_codes):
                new += get_unsatisfied_course_reqs(r, units, completed_course_codes)
    elif type(req) == str: # other reqs... dont bother with them here
        pass
    else:
        if not is_req_satisfied(req, units, completed_course_codes):
            new += get_unsatisfied_course_reqs(req['reqList'], units, completed_course_codes)
    return new

def get_course_reqs_left(requirements, units, completed_course_codes):
    """Return a dictionary of the course requirements left. Keys are year and values are a list of course reqs."""
    reqs_left = {}

    if requirements is None:
        return {}
    
    # Get unsatisfied reqs
    for info, req in requirements.items():
        reqs_left[info] = get_unsatisfied_course_reqs(req, units, completed_course_codes)

    # Modify based on what's needed to be taken
    for info, reqs in reqs_left.items():
        for req in reqs:
            qty = req.get('quantity')
            if qty is None: # Unfortunately there are some unparsed requirements.
                continue
            if qty == 'ALL':
                req['reqList'] = [code for code in req['reqList'] if code not in completed_course_codes]

            elif type(qty) == int:
                ct = 0
                new_codes = []
                for code in req['reqList']:
                    if code in completed_course_codes:
                        ct += 1
                    else:
                        new_codes.append(code)
                req['quantity'] = qty - ct
                req['reqList'] = new_codes
            else:
                raise Exception

    reqs_left = {k:v for k,v in reqs_left.items() if v!=[]}
    return reqs_left

def get_courses_left(requirements, units, completed_course_codes):
    course_reqs_left = get_course_reqs_left(requirements, units, completed_course_codes)
    course_reqs = []
    for reqs in course_reqs_left.values():
        for req in reqs:
            course_reqs.append(req)
    course_codes = []
    for req in course_reqs:
        if any(type(x) == dict for x in req['reqList']):
            continue
        course_codes += req['reqList']
    
    return course_codes

def get_useful_courses(db, requirements, units, completed_course_codes):
    """Returns a triplet containing courses left in degree which (potential_courses, missing_prereq, not_offered)"""
    courses_left = get_courses_left(requirements, units, completed_course_codes)
    courses = db.courses.aggregate([{'$match': {'_id': {'$in': courses_left}}}])
    potential_courses = [x['_id'] for x in courses if can_take(x, units, completed_course_codes)]
    missing_prereq = [x['_id'] for x in courses if x not in potential_courses]
    not_offered = [code for code in courses_left if code not in potential_courses]
    return (potential_courses, missing_prereq, not_offered)

In [66]:
"""
The Cosmongo class encapsulates a LangChain 
agent that can be used to answer questions about UVic
degrees, courses, and sections in Fall 24 and Spring 25
"""
import json
from typing import List
from langchain.schema.document import Document
from langchain.agents import Tool
from langchain.agents.agent_toolkits import create_conversational_retrieval_agent
from langchain.tools import StructuredTool
from langchain_core.messages import SystemMessage
import pymongo

from models import CompletedCourse

class Cosmongo:
    """
    Class for handling langchain conversational agent interactions
    """
    def __init__(
            self, 
            db, 
            llm,
            courses_retriever,
            degrees_retriever,
            username: str,
            session_id: str, 
            completed_courses: List[CompletedCourse]=None,
            messages: List[str]=None):            

        self.db = db
        self.courses_retriever = courses_retriever
        self.degrees_retriever = degrees_retriever

        system_message = SystemMessage(
            content = """Welcome to the Academic Advisor Assistant for the University of Victoria (UVic).
                         As an academic advisor assistant, you help with information about UVic's courses and degrees.
                         Ask questions about specific courses, degree programs, or general inquiries related to UVic.
                         Do not make up any courses, if you dont a course with tools, then say I cannot find that course.
                         Only reference the courses that are explicitly given to you as context.
                         Do not provide links.""")

        self.agent_executor = create_conversational_retrieval_agent(
            llm, 
            self._make_tools(), 
            system_message=system_message, 
            verbose=True, 
            handle_parsing_errors=True)

        self.username = username
        self.session_id = session_id
        self.completed_courses = [] if completed_courses is None else completed_courses
        self.messages = [] if messages is None else messages

    def run(self, prompt: str) -> str:
        """
        Run the AI agent.
        """
        result = self.agent_executor({"input": prompt})
        self.messages += [prompt, result["output"]]
        self._backup_chat()
        return result["output"]
    
    def _backup_chat(self):
        self.db.chat.bulk_write([pymongo.UpdateOne({"_id": self.session_id}, 
                                                   {"$set": {"messages": self.messages,
                                                             "username": self.username}}, 
                                                   upsert=True)])
        self.db.users.bulk_write([pymongo.UpdateOne({"username": self.username},
                                                    {"$addToSet": {"chatSessions": self.session_id}})])

    def _make_tools(self):
        courses_retriever_chain = self.courses_retriever | format_course_doc
        degrees_retriever_chain = self.degrees_retriever | format_degree_doc

        return [Tool(
                    name = "vector_search_degrees", 
                    func = degrees_retriever_chain.invoke,
                    description = "Find degrees similar to the question."),
                Tool(
                    name = "vector_search_courses", 
                    func = courses_retriever_chain.invoke,
                    description = "Find courses similar to question."),
                StructuredTool.from_function(
                    self._get_course_by_id, 
                    description="useful for finding information about a specific course when you have the subject and course number"),
                StructuredTool.from_function(
                    self._can_take, 
                    description="useful when the user asks if they satisty prerequisites for a course"),
                StructuredTool.from_function(
                    self._get_degree_by_title, 
                    description="useful when looking for information about a specific degree"),
                StructuredTool.from_function(
                    self._determine_course_offering, 
                    description="useful for finding if a course is offered and getting details about it's upcoming sections")]
    
    def _get_course_by_id(self, code: str) -> str:
        """
        Retrieves a course by it's course code, eg: MATH101
        """
        code = code.replace(' ', '').upper()
        doc = self.db.calendar_courses.find_one({"_id": code})  
        if doc:
            return json.dumps({'code': doc['_id'], 
                               'name': doc['name'], 
                               'description': doc['description']})
        else:
            return f"{code} does not exist in my database. Are you sure this is the correct course code."

    def _can_take(self, code: str) -> str:
        """
        Determines if the user satisfies the prerequisites for a course
        """
        if len(self.completed_courses) == 0:
            return "Unsure, the user has not uploaded their completed courses."
        
        code = code.replace(' ', '').upper()
        course = self.db.calendar_courses.find_one({"_id": code})
        if course is None:
            return f"{code} does not exist in my database. Are you sure this is the correct course code."
        print(self.completed_courses)
        units = num_units_completed(self.completed_courses)
        completed_course_codes = [x.code for x in self.completed_courses]
        if can_take(course, units, completed_course_codes):
            return "Yes, the prerequisites are satisfied"
        else:
            not_satisfied = get_unsatisfied_course_reqs(course['prerequisites'], units, completed_course_codes)
            return f"No, the prerequisites {not_satisfied} are not satisfied"


    def _get_degree_by_title(self, title: str) -> str:
        """
        Retrieves a degree by it's name, eg: Mathematics
        """
        doc = self.db.degrees.find_one({"title": title})
        if doc:
            return json.dump({'code': doc['_id'],
                            'title':title, 
                            'description': doc['description'],
                            'required course list': ', '.join(doc['requirementCourseList'])})
        else:
            return f"{title} does not exist in my database. Are you sure this is the program title."

    def _determine_course_offering(self, code: str) -> str:
        """
        Find if a course is offered
        """
        code = code.replace(' ', '').upper()
        doc = self.db.courses.find_one({"_id": code})

        if doc:
            sections = doc.get('sections')
            out_sec = []
            if sections:
                for sec in sections:
                    out_sec.append({'start': sec['start'],
                                    'end': sec['end'],
                                    'start_date': sec['startDate'],
                                    'days': sec['days'],
                                    'seq': sec['seq']})

            return json.dumps({"is_offered": "yes", 'sections': out_sec})
        else:
            return json.dumps({"is_offered": "no"})
    
    def __str__(self):
        return f"Cosmongo(id={self.session_id[:8]}, user={self.username}, messages={self.messages})"
    
    def __repr__(self):
        return self.__str__()


def filter_dict(di, keys):
    return {k: v for k, v in di.items() if k in keys}

def annotate_dict(di):
    return ", ".join([str(k) + ": " + str(v) for k, v in di.items()])


def format_course_doc(docs: List[Document]) -> str:
    """Format the course docs."""
    return ", ".join([annotate_dict(filter_dict(doc.metadata, ['_id', 'name', 'description']))
                    for doc in docs])

def format_degree_doc(docs: List[Document]) -> str:
    """Format the degree docs."""
    return ", ".join([annotate_dict(filter_dict(doc.metadata, ['title', 'description']))
                    for doc in docs])

### Creating Cosmongo Agent

To create a Cosmongo agent you need to give it a db client, llm client, courses retriever, degrees retriever, session id, and message history.

In [67]:
import os

DB_CONNECTION_STRING = os.getenv("DB_CONNECTION_STRING")
DB_NAME = os.getenv("DB_NAME")

AOAI_ENDPOINT_CHAT = os.getenv("AOAI_ENDPOINT_CHAT")
AOAI_KEY_CHAT = os.getenv("AOAI_KEY_CHAT")

AOAI_ENDPOINT_EMBEDDING = os.getenv("AOAI_ENDPOINT_EMBEDDING")
AOAI_KEY_EMBEDDING = os.getenv("AOAI_KEY_EMBEDDING")

AOAI_API_VERSION = os.getenv("AOAI_API_VERSION")

EMBEDDINGS_DEPLOYMENT_NAME = os.getenv("EMBEDDINGS_DEPLOYMENT_NAME")
COMPLETIONS_DEPLOYMENT_NAME = os.getenv("COMPLETIONS_DEPLOYMENT_NAME")


In [68]:
import pymongo
import certifi
from openai import AzureOpenAI
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
from langchain_community.vectorstores import AzureCosmosDBVectorSearch

In [69]:
def create_retriever(
    connection_string: str, 
    embedding_model,
    collection: str, 
    top_k: int = 3):
    vector_store =  AzureCosmosDBVectorSearch.from_connection_string(
        connection_string = connection_string,
        namespace = f"db.{collection}",
        embedding = embedding_model,
        index_name = "VectorSearchIndex",
        embedding_key = "embedding",
        text_key = "notes") # TODO: How to ask for more than one field here?
    return vector_store.as_retriever(search_kwargs={"k": top_k})

In [70]:
# Setup CosmosDB client
db_client = pymongo.MongoClient(DB_CONNECTION_STRING,
                                tlsCAFile=certifi.where())
db = db_client.db

# Old ai client. Maybe get rid of it. just keep it.
ai_client = AzureOpenAI(
    azure_endpoint = AOAI_ENDPOINT_EMBEDDING,
    api_key = AOAI_KEY_EMBEDDING,
    api_version = AOAI_API_VERSION)

llm = AzureChatOpenAI(
    temperature = 0,
    openai_api_version = AOAI_API_VERSION,
    azure_endpoint = AOAI_ENDPOINT_CHAT,
    openai_api_key = AOAI_KEY_CHAT,         
    azure_deployment = COMPLETIONS_DEPLOYMENT_NAME)

# Setup Azure Vector Store retrievers
embedding_model = AzureOpenAIEmbeddings(
    openai_api_version = AOAI_API_VERSION,
    azure_endpoint = AOAI_ENDPOINT_EMBEDDING,
    openai_api_key = AOAI_KEY_EMBEDDING,   
    azure_deployment = EMBEDDINGS_DEPLOYMENT_NAME,
    chunk_size=10)

courses_retriever = create_retriever(
    DB_CONNECTION_STRING, 
    embedding_model,
    'calendar_courses', 
    top_k=3)

degrees_retriever = create_retriever(
    DB_CONNECTION_STRING, 
    embedding_model,
    'degrees', 
    top_k=3)

  db_client = pymongo.MongoClient(DB_CONNECTION_STRING,
  client: MongoClient = MongoClient(connection_string, appname=appname)


### Load agents from DB.Chat

In [14]:
# list(db.calendar_courses.aggregate([{'$match': {'_id': {'$in': ["MATH101", "MATH236"]}}}, 
#                                     {'$project': {'_id': 1, 'name': 1, 'credits': 1}}]))

[{'_id': 'MATH101', 'name': 'Calculus II'},
 {'_id': 'MATH236', 'name': 'Introduction to Real Analysis'}]

In [73]:
agent_pool = list(db.chat.find({}))
users = list(db.users.find({}))
if len(agent_pool) == 0:
    agent_pool = {}

from pprint import pprint
# pprint(users[0])
# pprint(users[-1])

agent_pool = {chat['_id']: Cosmongo(db_client.db,
                                    llm,
                                    courses_retriever,
                                    degrees_retriever,
                                    username=chat['username'],
                                    session_id=chat['_id'], 
                                    completed_courses=[CompletedCourse(**x) for x in user['completedCourses']],
                                    messages=chat['messages'])
                    for chat in agent_pool
                    for user in users
                    if chat['username'] == user['username']}

agent_pool

{'zw0cxiro9w96fhanz4qt6oz7tg': Cosmongo(id=zw0cxiro, user=tony, messages=['What courses do you have on inverse mappings?', "Here are some courses related to inverse mappings:\n\n1. **Geophysical Inverse Theory (EOS526)**: This course covers inverse theory and its applications in Earth and Ocean Sciences. Topics include non-uniqueness, general linear least-squares, singular-value decomposition, regularization, linearization, global inversion, Bayesian inversion, and Markov-chain Monte Carlo methods. Applications are drawn from the research literature and include topics such as inversion of seismic, acoustic, and geo-electromagnetic data, tomography, and matched-field inversion.\n\n2. **Ethnographic Mapping and Indigenous Cartographies (ANTH460)**: This is a skills-based seminar on land use and occupancy mapping (also called Traditional Use Study mapping) of indigenous territories in respect of land claims and resource rights, and critically evaluating power relations, knowledge practice

### Write agents to DB.Chat

In [48]:
# On shutdown, store the agent states and session ids in CosmosDB
db.chat.bulk_write([pymongo.UpdateOne({"_id": id}, 
                                      {"$set": {"messages": agent.messages,
                                                "username": agent.username}}, 
                                      upsert=True) 
                    for id, agent in agent_pool.items()])

db.users.bulk_write([pymongo.UpdateOne({"username": agent.username},
                                       {"$push": {"chatSessions": agent.session_id}})
                     for agent in agent_pool.values()])

BulkWriteResult({'writeErrors': [], 'writeConcernErrors': [], 'nInserted': 0, 'nUpserted': 0, 'nMatched': 1, 'nModified': 1, 'nRemoved': 0, 'upserted': []}, acknowledged=True)