In [None]:
# import libraries
import nltk
import random
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import string
import re

nltk.download('punkt')
nltk.download('stopwords')
import sqlite3

In [None]:
# define DB class that interacts with the database

class Db:
    def __init__(self):
        self.conn_string = 'data/university_chatbot.db'
        self.init_db()
    
    def is_already_initialized(self):
        return self.get_one("SELECT name FROM sqlite_master WHERE type='table' AND name='students'")

    def init_db(self):

        if self.is_already_initialized():
            return
        
        # Create tables
        # Students Table
        self.execute_mutation('''
            CREATE TABLE IF NOT EXISTS students (
                matriculation_number INTEGER PRIMARY KEY,
                name TEXT NOT NULL,
                surname TEXT NOT NULL,
                address TEXT
            )
            ''')

        # Courses Table
        self.execute_mutation('''
            CREATE TABLE IF NOT EXISTS courses (
                course_id INTEGER PRIMARY KEY,
                course_name TEXT NOT NULL,
                instructor TEXT NOT NULL
            )
            ''')

        # Registrations Table
        self.execute_mutation('''
            CREATE TABLE IF NOT EXISTS registrations (
                registration_id INTEGER PRIMARY KEY,
                matriculation_number INTEGER NOT NULL,
                course_id INTEGER NOT NULL,
                FOREIGN KEY (matriculation_number) REFERENCES students (matriculation_number),
                FOREIGN KEY (course_id) REFERENCES courses (course_id)
            )
            ''')

        # Exam Results Table
        self.execute_mutation('''
            CREATE TABLE IF NOT EXISTS exam_results (
                result_id INTEGER PRIMARY KEY,
                matriculation_number INTEGER NOT NULL,
                course_id INTEGER NOT NULL,
                grade TEXT NOT NULL,
                FOREIGN KEY (matriculation_number) REFERENCES students (matriculation_number),
                FOREIGN KEY (course_id) REFERENCES courses (course_id)
            )
            ''')
        self.fill_db_with_sample_data()

    def fill_db_with_sample_data(self):
        # Sample data for students
        students = [
            (201001, 'Alice', 'Smith', 'Jakobstraße 123'),
            (201002, 'Bob', 'Johnson', 'Schlägelstraße 456'),
            (201003, 'Carol', 'Williams', 'Borsigplatz 789'),
            (201004, 'David', 'Brown', 'Kamener Straße 101'),
            (201005, 'Eve', 'Davis', 'Priorstraße 202'),
            (201006, 'Frank', 'Miller', 'Burgholzstraße 303'),
            (201007, 'Grace', 'Wilson', 'Clausthaler Straße 404'),
            (201008, 'Henry', 'Moore', 'Nordstraße 505'),
            (201009, 'Ivy', 'Taylor', 'Am Waldfried 606'),
            (201010, 'Jack', 'Anderson', 'Westfalenhüttenallee 707')
        ]
        # Sample data for courses
        courses = [
            (101, 'Introduction to Computer Science', 'Dr. John Doe'),
            (102, 'Advanced Mathematics', 'Dr. Jane Smith'),
            (103, 'Physics for Engineers', 'Dr. Emily Johnson'),
            (104, 'Data Structures and Algorithms', 'Dr. Alan Turing'),
            (105, 'Database Systems', 'Dr. Edgar Codd'),
            (106, 'Artificial Intelligence', 'Dr. Ada Lovelace'),
            (107, 'Machine Learning', 'Dr. Geoffrey Hinton'),
            (108, 'Software Engineering', 'Dr. Grace Hopper'),
            (109, 'Web Development', 'Dr. Tim Berners-Lee'),
            (110, 'Operating Systems', 'Dr. Linus Torvalds'),
            (111, 'Computer Networks', 'Dr. Vint Cerf')
        ]

        # Insert students into the database
        self.execute_mass_mutation('INSERT INTO students VALUES (?,?,?,?)', students)

        # Insert courses into the database
        self.execute_mass_mutation('INSERT INTO courses VALUES (?,?,?)', courses)

        # Generate registrations (2 for each student)
        registrations = []
        for student in students:
            registered_courses = random.sample(courses, 2)  # Randomly pick 2 courses for each student
            for course in registered_courses:
                registrations.append((None, student[0], course[0]))

        # Insert registrations into the database
        self.execute_mass_mutation('INSERT INTO registrations (registration_id, matriculation_number, course_id) VALUES (?,?,?)', registrations)

        # Generate exam results (1 for each student)
        exam_results = []
        for student in students:
            course_id = random.choice(registrations)[2]  # Randomly pick one course from registrations
            grade = random.choice(['1.0','1.3','1.5','1.7', '2.0', '2.3','2.5','2.7', '3.0','3.3','3.5','3.7', '4.0', '5.0', 'F'])  # Randomly assign a grade
            exam_results.append((None, student[0], course_id, grade))

        # Insert exam results into the database
        self.execute_mass_mutation('INSERT INTO exam_results (result_id, matriculation_number, course_id, grade) VALUES (?,?,?,?)', exam_results)
            

    def get_one(self, query, args=()):
        conn = sqlite3.connect(self.conn_string)
        cursor = conn.cursor()
        cursor.execute(query, args)
        result = cursor.fetchone()
        conn.close()
        return result
    
    def get_many(self, query):
        conn = sqlite3.connect(self.conn_string)
        cursor = conn.cursor()
        cursor.execute(query)
        result = cursor.fetchall()
        conn.close()
        return result
    
    def execute_mutation(self, query, args = ()):
        conn = sqlite3.connect(self.conn_string)
        cursor = conn.cursor()
        cursor.execute(query, args)
        conn.commit()
        conn.close()

    def execute_mass_mutation(self, query, params = []):
        conn = sqlite3.connect(self.conn_string)
        cursor = conn.cursor()
        cursor.executemany(query, params)
        conn.commit()
        conn.close()   


In [None]:

class ChatBot:
    def __init__(self):
        self.db = Db()
        self.prompts = []
        self.answers = []
        self.intent = None
        self.matric_number_pattern = r'\b\d{5,7}\b'
    
    def chat(self, prompt):
        print("Bot: " + prompt)
        self.prompts.append(prompt)
        answer = input()
        print("You: "+ answer)
        self.answers.append(answer)
        #self.process_input(answer)
        return answer
    
    #Try to Identify Intents and repeat until a single Intent is found
    def start_conversation(self):
        response = self.chat("Lets get started")
        while self.intent is None:
            print(response)
            #check response for Intents
            words = self.process_input(response)
            intents = self.identify_intent(words)
            if len(intents) > 1:
                print("Multiple Intents provided")
                print("I have found the following possible intent: ", intents.__str__())
                for intent in intents:
                    response = self.chat(f"I have identified the following intent: {intent}. Shall we process that action?")
                    processed_input = self.process_input(response)
                    if self.identify_yes_no_answer(processed_input) == 'yes':
                        self.intent = intent
                        self.act()
                        print("We have processed your request!")
                        self.intent = None
                        break
                response = self.chat("Lets try again. What can I do for you?")             
            else:
                if intents[0] == "unknown_intent":
                    self.intent = intents[0]
                    self.act()
                    #If we come back from the action we need a new response
                    response = self.chat("What shall we do now?")
                else:
                    print("I have identified the following intent: ", intents[0])
                    response = self.chat("Do you want to process with this action?")
                    processed_input = self.process_input(response)
                    if self.identify_yes_no_answer(processed_input) == 'yes':
                        self.intent = intents[0]
                        self.act()
                        print("We have processed your request!")

            
    def process_input(self, user_input = None):
        if user_input is None:
            user_input = self.answers[-1]
        tokens = word_tokenize(user_input)
        tokens = [w.lower() for w in tokens]
        table = str.maketrans('', '', string.punctuation)
        stripped = [w.translate(table) for w in tokens]
        words = [word for word in stripped if word.isalpha()]
        stop_words = set(stopwords.words('english'))
        words = [w for w in words if not w in stop_words]
        return words
    
    def identify_intent(self, processed_input):
        intent_keywords = {
            'register_exam': ['register', 'exam', 'enroll', 'sign', 'apply', 'examination', 'registration'],
            'deregister_exam': ['deregister', 'exam'],
            'query_exam_status': ['query', 'exam', 'status', 'registered', 'check', 'enrollment', 'status', 'examination', 'status'],
            'query_exam_grade': ['query', 'exam', 'status', 'grade', 'score', 'result', 'marks', 'performance'],
            'change_address': ['change', 'address', 'update', 'modify', 'new', 'relocate', 'relocates', 'move', 'moved'],
            'change_surname': ['change', 'surname', 'family', 'update surname', 'modify', 'last', 'marry', 'married', 'divorced', 'divorced', 'divorced'],
            'suggest_course': ['suggest', 'course'],
            'test': ['test'],
            'abort': ['abort']
            # Add more intents and associated keywords here for Query
        }
        
        matched_intents = []
        for intent,keywords in intent_keywords.items():
            if any(keyword in processed_input for keyword in keywords):
                matched_intents.append(intent)
        
        if not matched_intents:
            return ['unknown_intent']
        
        return matched_intents
    
    def act(self):
        if self.intent == 'unknown_intent':
            print("Sorry, I didn't understand.")
            self.intent = None
        elif self.intent == 'change_address':
            self.change_address()
        elif self.intent == 'change_surname':
            self.change_surname()        
        elif self.intent == 'register_exam':
                    self.register_exam(),
        elif self.intent == 'deregister_exam':
                    self.deregister_exam(),
        elif self.intent == 'query_exam_status':
            self.query_exam_status()
        elif self.intent == 'query_exam_grade':
            self.query_exam_grade()
        elif self.intent == 'suggest_course': 
            self.suggest_course()
        elif self.intent == 'abort':
            self.abort()
        elif self.intent == 'test':
            self.test()
        # Add more intents and associated actions here
    
    def has_postcode(user_input):
        postcode_pattern = re.compile(r'\b\d{5}\b')
        return bool(re.search(postcode_pattern, user_input))
    
    def identify_yes_no_answer(self, input):
        intent_keywords = {
            'yes': ['true', 'yes','yeah', 'yep'],
            'no': ['wrong', 'false', 'no', 'not', 'nope', 'nah'],
            # Add more intents and associated keywords here
        }

        for intent, keywords in intent_keywords.items():
            if any(keyword in input for keyword in keywords):
                    return intent

        return 'unknown_intent'
    
    #Iterate backwards through answers to find matriculation number
    def find_matriculation_number(self, answers):
        for answer in reversed(answers):
            match = re.search(self.matric_number_pattern, answer.strip())
            if match:
                confirm = self.chat(f"Is your matriculation {match.group()}")
                processed_input = self.process_input(confirm)
                if self.identify_yes_no_answer(processed_input) == 'yes':
                    return match.group()
        return None
        # Method to find course ID from user answers
    def find_course_id(self, answers):
        # Query the database for course names and IDs
        courses = self.db.get_many('SELECT course_id, course_name FROM courses')
        course_dict = {str(id): name.lower() for id, name in courses}

        for answer in reversed(answers):
            # Check for course ID (as a string) or course name in the answer
            for course_id_str, course_name in course_dict.items():
                if course_id_str in answer or course_name in answer.lower():
                    # Confirm with the user
                    course_desc = f"{course_name} (ID: {course_id_str})"
                    confirm = self.chat(f"Are you referring to the course {course_desc}?")
                    processed_input = self.process_input(confirm)
                    if self.identify_yes_no_answer(processed_input) == 'yes':
                        return int(course_id_str)  # Convert back to int before returning
        return None
    
    def change_surname(self):

        self.chat("What is your Matriculation Number?")
        # Ask for student's matriculation number
        matriculation_number = self.find_matriculation_number(self.answers)

        while matriculation_number is None:
            self.chat("What is your Matriculation Number?")
            matriculation_number = self.find_matriculation_number(self.answers)
        
        
        # Check if student exists
        student=  self.db.get_one('SELECT * FROM students WHERE matriculation_number=?', (matriculation_number,))
        
        if student is None:
            print("Sorry, you are not registered as a student.")
        else:
            # Ask for new surname
            new_surname_input = self.chat("Please enter your new surname: ")
            surname = ''

            def validate_surname_input(new_surname_input):
                tokens = word_tokenize(new_surname_input)
                tagged = nltk.pos_tag(tokens)
                nn_words = [word[0] for word in tagged if word[1] in ['NNP', 'NN'] and word[0].isalpha() and word[0].lower() != 'surname']
                while len(nn_words) == 0 or len(nn_words) > 1:
                    new_surname_input = self.chat("Please make sure you have entered your new surname correctly.")
                    tokens = word_tokenize(new_surname_input)
                    tagged = nltk.pos_tag(tokens)
                    nn_words = [word[0] for word in tagged if word[1] in ['NNP', 'NN'] and word[0].isalpha() and word[0].lower() != 'surname']
                
                surname = nn_words[0]
                return surname
            
            surname = validate_surname_input(new_surname_input)

            answer = self.chat("Let me summarize once again: Your new surname is " + surname + "? ")
            processed_answer = self.identify_yes_no_answer(answer)
            
            while processed_answer == 'unknown_intent':
                answer = self.chat("Sorry, I didn't understand. Please answer with yes or no. ")
                processed_answer = self.identify_yes_no_answer(answer)
            
            while processed_answer == 'no':
                new_surname_input = self.chat("Please enter your correct new surname: ")
                surname = validate_surname_input(new_surname_input)
                answer = self.chat("Let me summarize once again: Your new surname is " + surname + "? ")
                processed_answer = self.identify_yes_no_answer(answer)
            
            self.db.execute_mutation('UPDATE students SET surname=? WHERE matriculation_number=?', (surname, matriculation_number))

            print("Your surname has been updated.")

            # Update student's surname
    def query_exam_status(self):
        matriculation_number = self.find_matriculation_number(self.answers)
        while matriculation_number is None:
            matriculation_number = self.find_matriculation_number([self.chat("What is your Matriculation Number?")])
        
        course_id = self.find_course_id(self.answers)
        while course_id is None:
            course_id = self.find_course_id([self.chat("What is your Course called or the ID?")])
        
        course_id = int(course_id)
        matriculation_number = int(matriculation_number)
        # look for needed Variables in answers
        registration = self.db.get_many(f'SELECT * FROM registrations WHERE matriculation_number={matriculation_number} AND course_id={course_id}')
        if registration:
            print(f"You are registered for Course {course_id}!")
        else:
            print(f"No registration for Course {course_id} found!")

    def query_exam_grade(self):
        # Find matriculation number from stored answers or ask the user
        matriculation_number = self.find_matriculation_number(self.answers)
        while matriculation_number is None:
            matriculation_number = self.find_matriculation_number([self.chat("What is your Matriculation Number?")])
        
        course_id = self.find_course_id(self.answers)
        while course_id is None:
            course_id = self.find_course_id([self.chat("What is your Course called or the ID?")])

        matriculation_number = int(matriculation_number)
        course_id = int(course_id)

        # Query the database for the exam grade
        result = self.db.get_many(f'SELECT grade FROM exam_results WHERE matriculation_number={matriculation_number} AND course_id={course_id}')
        print(result)
        # Handle the response based on the query result
        if result:
            print(f"Your grade for Course {course_id} is {result[0][0]}")
        else:
            print("No grade available or Examination not yet passed")
        
# In case the user changes his Mind abort resets the state and 
    def abort(self):
         self.intent = None
        
# Test purpose
    def test(self):
        print("Test Test")
        print("Test Test")
        print("Test Test")

    def register_exam(self):
        matriculation_number = self.chat("Please enter your matriculation number: ")

        # Check if student exists
        student = self.db.get_one('SELECT * FROM students WHERE matriculation_number=?', (matriculation_number,))
        if student is None:
            print("Sorry, you are not registered as a student.")
        else:
            while True:
                course_name = self.chat("Please enter the name of the course you want to register for: ")
                course = self.db.get_one('SELECT course_id FROM courses WHERE course_name = ?', (course_name,))

                if course is None:
                    print("Incorrect course name, please provide the correct one.")
                    continue

                answer = self.chat(f"Let me summarize once again: you want to register for the exam {course_name}?")
                processed_answer = self.identify_yes_no_answer(answer)

                while processed_answer == 'unknown_intent':
                    answer = self.chat("Sorry, I didn't understand. Please answer with yes or no.")
                    processed_answer = self.identify_yes_no_answer(answer)

                if processed_answer == 'yes':
                    # Check if already registered
                    registration = self.db.get_one('SELECT * FROM registrations WHERE matriculation_number = ? AND course_id = ?', (matriculation_number, course[0]))
                    if registration:
                        print(f"Student with matriculation number {matriculation_number} is already registered for course {course_name}.")
                        return

                    # Perform registration
                    self.db.execute_mutation('INSERT INTO registrations (matriculation_number, course_id) VALUES (?, ?)', (matriculation_number, course[0]))
                    print(f"Student with matriculation number {matriculation_number} registered for course {course_name}.")
                    return
                elif processed_answer == 'no':
                    break  # Restart the loop if the user says no


    def deregister_exam(self):
        matriculation_number = self.chat("Please enter your matriculation number: ")

        # Check if student exists
        student = self.db.get_one('SELECT * FROM students WHERE matriculation_number=?', (matriculation_number,))
        if student is None:
            print("Sorry, you are not registered as a student.")
        else:
            while True:
                course_name = self.chat("Please enter the name of the course you want to deregister from: ")
                course = self.db.get_one('SELECT course_id FROM courses WHERE course_name = ?', (course_name,))

                if course is None:
                    print("Incorrect course name, please provide the correct one.")
                    continue

                answer = self.chat(f"Let me summarize once again: you want to deregister from the exam {course_name}?")
                processed_answer = self.identify_yes_no_answer(answer)

                while processed_answer == 'unknown_intent':
                    answer = self.chat("Sorry, I didn't understand. Please answer with yes or no.")
                    processed_answer = self.identify_yes_no_answer(answer)

                if processed_answer == 'yes':
                    # Check if actually registered
                    registration = self.db.get_one('SELECT * FROM registrations WHERE matriculation_number = ? AND course_id = ?', (matriculation_number, course[0]))
                    if not registration:
                        print(f"Student with matriculation number {matriculation_number} is not registered for course {course_name}.")
                        return

                    # Perform deregistration
                    self.db.execute_mutation('DELETE FROM registrations WHERE matriculation_number = ? AND course_id = ?', (matriculation_number, course[0]))
                    print(f"Student with matriculation number {matriculation_number} deregistered from course {course_name}.")
                    return
                elif processed_answer == 'no':
                    break  # Exit the loop if the user says no
    def extract_info(self, tree, label):
        info_list = []
        for subtree in tree.subtrees():
            if subtree.label() == label:
                info_list.append(" ".join(word for word, tag in subtree.leaves()))
        return info_list


    def get_post_code(self,user_input):
        postcode_pattern = re.compile(r'\b\d{5}\b')
        result = re.search(postcode_pattern, user_input)
        first_match = result.group(0) if result else None
        return first_match


    def change_address(self):
        self.chat("Please enter your matriculation number: ")
        matriculation_number = self.find_matriculation_number(self.answers)
        while matriculation_number is None:
            self.chat("Please enter your matriculation number: ")
            matriculation_number = self.find_matriculation_number(self.answers)

        #Check if student exists
        student=  self.db.get_one('SELECT * FROM students WHERE matriculation_number=?', (matriculation_number,))
        if student is None:
            print("Sorry, you are not registered as a student.")
        else:
            #Ask for new address
            new_address = self.chat("Please enter your new address: ")
            def validate_address_input(new_address):
                tokens = word_tokenize(new_address)
                tagged = nltk.pos_tag(tokens)
                # Grammar for City and Postal Number
                city_grammar = r"""
                    CITY: {<CD>?<NNP|NN><CD>?}
                """
                city_cp = nltk.RegexpParser(city_grammar)
                city_result = city_cp.parse(tagged)
                city_combinations = self.extract_info(city_result, 'CITY')
                city_filtered_combinations = [item for item in city_combinations if self.get_post_code(item) is not None]
                street_grammar = r"""
                STREET: {<DT|NNP>?<NNP|NN>?<NNP><CD>}
                """
                street_cp = nltk.RegexpParser(street_grammar)
                street_result = street_cp.parse(tagged)
                street_pattern = r"((Ober|Unter den|An |Im |Platz |Berg |Am |Alt\-).+|(?:([A-Z][a-zäüö-]+){1,2})).([Cc]haussee|[Aa]llee|[sS]tr(\.|(a(ss|ß)e))|[Rr]ing|berg|gasse|grund|hörn| Nord|graben|[mM]arkt|[Uu]fer|[Ss]tieg|[Ll]inden|[Dd]amm|[pP]latz|brücke|Steinbüchel|Burg|stiege|[Ww]eg|rain|park|[Ww]eide|[Hh][oö]f|pfad|garten|bogen).+?(\d{1,4})([a-zäöüß]+)?(\-?\d{1,4}[a-zäöüß]?)?"
                street_combination = self.extract_info(street_result, 'STREET')
                street_filtered_combinations = [item for item in street_combination if re.match(street_pattern, item)]
               


                while len(street_filtered_combinations) == 0:
                    new_street_input = self.chat("Please make sure you have entered your street name and number correctly.")
                    tokens = word_tokenize(new_street_input)
                    tagged = nltk.pos_tag(tokens)
                    street_result = street_cp.parse(tagged)
                    street_combination = self.extract_info(street_result, 'STREET')
                    street_filtered_combinations = [item for item in street_combination if re.match(street_pattern, item)]

                while len(city_filtered_combinations) == 0:
                    new_city_input = self.chat("Please make sure you have entered your city and postal code correctly.")
                    tokens = word_tokenize(new_city_input)
                    tagged = nltk.pos_tag(tokens)
                    city_result = city_cp.parse(tagged)
                    city_combinations = self.extract_info(city_result, 'CITY')
                    city_filtered_combinations = [item for item in city_combinations if self.get_post_code(item) is not None]

                new_address = street_filtered_combinations[0] + ", " + city_filtered_combinations[0]
                return new_address
            final_address = validate_address_input(new_address)
            final_answer = self.chat("Let me summarize once again: You have moved out and your new address is " + final_address + "? ")
            while self.identify_yes_no_answer(final_answer) == 'no':
                new_address = self.chat("Please enter your new address ")
                final_answer = validate_address_input(new_address)
            
            self.db.execute_mutation('UPDATE students SET address=? WHERE matriculation_number=?', (final_address, matriculation_number))
            print("Your address has been updated.")

    def suggest_course(self):
        # Request the student's matriculation number
        self.chat("Please enter your matriculation number: ")
        matriculation_number = self.find_matriculation_number(self.answers)
        while matriculation_number is None:
            self.chat("Please enter your matriculation number: ")
            matriculation_number = self.find_matriculation_number(self.answers)
            
        student = self.db.get_one('SELECT * FROM students WHERE matriculation_number=?', (matriculation_number,))
        if student is None:
            self.chat("Sorry, you are not registered as a student.")
            return

        # Check if the student has registered for any courses
        registered_courses_query = f"SELECT course_id FROM registrations WHERE matriculation_number = '{matriculation_number}'"
        registered_courses = self.db.get_many(registered_courses_query)
        if not registered_courses:
            self.chat("No registered courses found for matriculation number: " + matriculation_number)
            return

        # Extract just the course IDs from the query results
        registered_course_ids = [course[0] for course in registered_courses]

        print(registered_course_ids)

        # Course recommendation rules
        course_recommendations = [
            ([102, 108], 106),  # Advanced Mathematics + Software Engineering -> Artificial Intelligence
            ([105, 104], 111),  # Database Systems + Data Structures and Algorithms -> Computer Networks
            ([101, 103], 109),  # Introduction to Computer Science + Physics for Engineers -> Web Development
            ([104, 106], 107),  # Data Structures and Algorithms + Artificial Intelligence -> Machine Learning
            ([109, 105], 108),  # Web Development + Database Systems -> Software Engineering
        ]

        # Iterate through the course recommendations and check if the student meets the prerequisites
        for prerequisites, recommendation in course_recommendations:
            if all(prerequisite in registered_course_ids for prerequisite in prerequisites):
                recommended_course_name = self.db.get_one("SELECT course_name FROM courses WHERE course_id=?", (recommendation,))
                confirm = self.chat(f"Based on your previously taken courses, I suggest you take the course: {recommended_course_name}. Do you want to proceed with this recommendation?")
                if self.identify_yes_no_answer(confirm) == 'yes':
                    self.chat("Great! You can proceed to register for this course.")
                    return
                else:
                    self.chat("No problem! Feel free to ask for another recommendation or inquire about something else.")
                    return

        # If no recommendation was found
        self.chat("Based on your previously taken courses, there are no specific recommendations. Consider another university.")


chatbot = ChatBot()
chatbot.start_conversation()
