### Library imports and initial setup

In [1]:
from langchain_ollama.llms import OllamaLLM
from langchain_core.prompts import ChatPromptTemplate
import json
import sys
import os

In [12]:
## Paths in the project

# Base Folder:
base_folder = os.path.dirname(os.getcwd())

# Answers path
answers_path = os.path.join(base_folder, 'Answers')

# Vector_DBs path
vector_dbs_path = os.path.join(base_folder, 'Vector_DBs')

# Code path
code_path = os.path.join(base_folder, 'Code')

# AI_Prepositions path
ai_prepositions_path = os.path.join(base_folder, 'AI_Prepositions')

# Corpus path
corpus_path = os.path.join(base_folder, 'Corpus')

# Ground_Truth path
ground_truth_path = os.path.join(base_folder, 'Ground_Truth')

# RAGAS_Results path
ragas_results_path = os.path.join(base_folder, 'RAGAS_Results')

In [14]:
############### Only for vector_ia_db ####################
# Add the Vector_DBs folder to the path
sys.path.append(vector_dbs_path)
from vector_ia_db import init_retriever
retriever = init_retriever(force_recreate=False) 

############### Other vector DBs ####################
# If we are using other vector DBs (not vector_ia_db) we need to export the code as follows:
# sys.path.append(vector_dbs_path)
# from Vector_768_50 import retriever  # Change the name according to the file you are going to use

Loading existing vector store...


### Select the LLM

In [15]:
# Initialize the model
model = OllamaLLM(model = "mistral:7b-instruct-v0.2-q8_0")

### Define the teamplate for the chatbot

In [16]:
template = """
You are a helpful, friendly, and knowledgeable AI assistant designed to support future international students of **NOVA IMS** – a leading school of Information Management and Data Science in Lisbon, Portugal.

Your job is to provide accurate, encouraging, and easy-to-understand answers related to:
- NOVA IMS Master’s and Postgraduate programs,
- Portuguese Student VISA application requirements,
- How to obtain residency after arriving in Portugal,
- Finding housing and understanding living costs in Lisbon,
- Other first steps for settling into life as a new international student in Lisbon.

Use the following retrieved documents to provide accurate and relevant responses. You should **not mention document names, document IDs, or file references** — focus only on delivering a helpful and human response to the user.

Your answers should be:
- **Natural and conversational** — avoid sounding like you're quoting a file
- **Clear and informative** — explain concepts simply and accurately
- **Supportive and empathetic** — acknowledge that moving abroad is a big and exciting step

Avoid technical language or internal references. Your goal is to make international students feel informed, confident, and supported.

---
If the user’s question is **not related to NOVA IMS, studying in Portugal, or moving to Lisbon as a student**, respond with:
*"I'm here to assist with questions about NOVA IMS, sturdent life in Potugal, and related topics — let me know how I can help!"*

If the user’s question **is relevant** but **cannot be answered from the provided documents**, say:
*"That’s an important question! Although I don’t have that information at the moment, I recommend reaching out to NOVA IMS directly or consulting the appropriate service for the most accurate and up-to-date details."*

---
Context:
{context}

user: {question}
Assistant:
"""

In [17]:
# Create a chat prompt template using the previous template
prompt = ChatPromptTemplate.from_template(template)

# Create a processing chain by combining the prompt template with the model
chain = prompt | model

In [18]:
# List to store the relevante data of the interaction between the user and chatbot
dataset = []

while True:
    print("\n\n-------------------------------------")
    question = input("Ask your question (q to quit): ")
    print("\n\n")
    
    if question.lower() == "q":
        break

    # Obtain the context
    information = retriever.invoke(question)

    # Get the answer
    result = chain.invoke({"context": information, "question": question})

    # Display the answer
    print(result)

    # Store it in the dataset
    dataset.append({
        "question": question,
        "answer": result,
        "contexts": information
    })

# This code was the first attemp to interact with the system



-------------------------------------



 Hello there! I'm here to assist with questions about NOVA IMS, student life in Portugal, and related topics. If you have any specific queries about the university, its programs, or moving to Lisbon as an international student, feel free to ask! For instance, I can help answer questions about NOVA IMS faculty members, academic roles, application timelines, and more. Let me know how I can help!


-------------------------------------



 The average monthly rent in Lisbon varies depending on the type of accommodation you're looking for. For a private room, it's around €490. A studio apartment typically costs around €1,056 per month, and an apartment will set you back about €1,750 on average. Keep in mind that these prices can change, so it's always a good idea to do some research or contact local housing providers for the most current information. I hope this helps, and remember, moving abroad is an exciting step! Let me know if there's anythin

### Function to obtain the necessary data to use RAGAS

In [None]:
def process_questions(questions, retriever, chain):
    """
    Process a list of questions through the RAG pipeline.
    
    Args:
        questions (list): List of questions to process
        retriever: Document retriever object for finding relevant context
        chain: LangChain processing chain for generating answers
    
    Returns:
        list: Dataset containing questions, answers, and retrieved contexts
    """

    dataset = []

    for question in questions:
        # Obtaining retriever documents
        retrieved_docs = retriever.invoke(question)

        # Extract text from each document
        if isinstance(retrieved_docs, list):
            context = [doc.page_content if hasattr(doc, "page_content") else str(doc) for doc in retrieved_docs]
        else:
            # Check if it is a list or not
            context = [retrieved_docs.page_content] if hasattr(retrieved_docs, "page_content") else [str(retrieved_docs)]

        # Obtain chain response
        answer = chain.invoke({"context": context, "question": question})

        # Store the interaction
        dataset.append({
            "question": question,
            "answer": answer,
            "contexts": context
        })

    return dataset


In [None]:
questions_list = [
    "What is the average monthly rent in Lisbon?", # 1
    "How much does transportation cost in Lisbon?", # 2
    "What is the overall cost of living in Lisbon?", # 3
    "What steps should I take after arriving in Lisbon?", # 4
    "Does NOVA IMS accept international students?", # 5
    "As a non-EU student, what are the requirements to study at NOVA IMS in Lisbon?", # 6
    "What is the Portuguese Social Security Identification Number (NISS), and why do I need it?", # 7
    "How can a foreign citizen obtain a NISS?", # 8
    "How many master's programs does NOVA IMS offer?", # 9
    "Can you provide a list of the master's programs available at NOVA IMS?", # 10
    "Is there a master's program related to marketing?", # 11
    "What information can you provide about the Master’s Degree in Data Science and Advanced Analytics, specializing in Business Analytics?", # 12
    "Who is the coordinator of the Master’s Degree in Data Science and Advanced Analytics, specializing in Business Analytics?", # 13
    "As a foreign student, how much does it cost to study for a Bachelor's Degree at NOVA IMS?", # 14
    "What are the entry requirements for the Postgraduate Program in Enterprise Data Science & Analytics?", # 15
    "How can I apply to the Postgraduate Program in Enterprise Data Science & Analytics?", # 16
    "Are there any discounts available for the Master’s Degree in Information Management, specializing in Business Intelligence?", # 17
    "Does Professor Fernando Bação coordinate any academic programs?", # 18
    "Can I apply to multiple programs at NOVA IMS?", # 19
    "Does NOVA IMS provide accommodation options?", # 20
    "Can you recommend websites for finding accommodation in Lisbon?", # 21
    "I’m interested in learning Portuguese. Does the university offer a Portuguese language course?", # 22
    "I want to apply to three programs. Is that possible, and is there an application fee?", # 23
    "As a higher education student, how can I obtain a residence permit in Portugal?", # 24
    "What documents do I need to apply for a residence permit as a higher education student?", # 25
    "How can I prove my financial means to qualify for a residence permit?" # 26
]

In [None]:
# Run the function using the questions_list
result_dataset = process_questions(questions_list, retriever, chain)

In [None]:
# Change the name according with the model you are using
mistral_ai_dataset = result_dataset

In [None]:
# Export the results of the model
dataset = mistral_ai_dataset

# Save results
with open(os.path.join(answers_path, "mistral_ai_answers.json"), "w", encoding="utf-8") as f:  # Change the name according to the model and version
    json.dump(dataset, f, ensure_ascii=False, indent=4)

In [27]:
with open(os.path.join(answers_path, "mistral_ai_answers.json"), "r", encoding="utf-8") as f:
    dataset = json.load(f)

In [None]:
# We are going to merge the question, answer, and context with the ground truth

# Import the .json file with the ground truth answers
with open(os.path.join(ground_truth_path, "Ground_truth_answers.json"), "r", encoding="utf-8") as f:
    Ground_truth = json.load(f)

In [32]:
# Add the “ground_truth” key to the dictionary
test = []
for data_dict, x_dict in zip(dataset, Ground_truth):
    data_dict["ground_truth"] = x_dict["ground_truth"]
    test.append(data_dict)

In [None]:
# Export .json file with ground truth
with open(os.path.join(ground_truth_path, "mistral_ai_truth.json"), "w", encoding="utf-8") as f:  # Change the name according to the model and version
    json.dump(test, f, ensure_ascii=False, indent=4)