<a href="https://colab.research.google.com/github/jeffreyong15/Counsel.NLP/blob/main/Llama_RAG(new).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### INSTALLATIONS

In [None]:
!pip install langgraph

In [None]:
!pip install numpy==1.26.4

In [None]:
%pip install datasets langchain-huggingface langchain-chroma

In [None]:
!pip install huggingface-hub transformers langchain-community

In [None]:
!pip install pandas==2.2.2

In [None]:
!pip install -qU "langchain[mistralai]"

In [None]:
!pip install -q rouge_score
!pip install -q bert-score

#### FILE IMPORTS

In [None]:
import time
import sys
import warnings
import json
import re
import pandas as pd
from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
from langchain.chat_models import init_chat_model
from langchain.vectorstores import Chroma
from langchain import hub
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
from langchain.schema import Document
from huggingface_hub import notebook_login
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
import spacy
from rouge_score import rouge_scorer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from bert_score import score
import matplotlib.pyplot as plt
import seaborn as sns
from spacy.lang.en.stop_words import STOP_WORDS
from transformers import logging as transformers_logging

In [None]:
import nltk
nltk.download('all', quiet=True)

In [None]:
import os
os.environ["HF_TOKEN"] = "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"

In [None]:
notebook_login()

#### LOAD JSON FILE

In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
def load_json_data(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        json_data = json.load(file)

    if not json_data:
        raise ValueError("JSON data is empty")

    print(f"Successfully loaded {len(json_data)} courses")
    return json_data

dataset_path = "/content/drive/MyDrive/CMPE-295A/dataset/SJSU_courses_with_metadata_updated.json"
gen_path = "/content/drive/MyDrive/CMPE-295A/dataset/complete_Gen_Advising.json"
SJSU_dataset = load_json_data(dataset_path)
Gen_Advising_Dataset = load_json_data(gen_path)

#### PREPARE DATASET

In [None]:
# Process dataset (Courses & Majors)
class_mapping = {}
code = ["No prerequisites listed", "No corequisites listed"]
majors = []
category = []
def process_data(json_data, gen_data):
    documents = []
    for item in gen_data:
        content = [
            f"Title: {item.get('title', 'N/A')}",
            f"Description: {item.get('description', 'N/A')}"
        ]
        doc = Document(
            page_content="\n".join(content),
            metadata={"title": item.get('title', 'N/A')}
        )
        documents.append(doc)


    for item in json_data:
        majors.append(item['metadata']['major']) if item['metadata']['major'] not in majors else None
        if item['id'].isdigit():
            category.append(item['metadata']['category']) if item['metadata']['category'] not in category else None
        title = item.get('title', 'N/A')
        if title != "N/A":
            class_name = title.split("-")[0].strip()
            code.append(class_name)
            class_mapping[class_name] = title
        content = [
            f"Title: {item.get('title', 'N/A')}",
            f"Type: {'Major' if 'core_courses' in item else 'Course'}",
            f"Units: {item.get('units', 'N/A')}",
            f"Description: {item.get('description', 'N/A')}",
            f"Grading: {item.get('grading', 'N/A')}",
            f"Class Structure: {item.get('class_structure', 'Class structure not found')}"
        ]

        # Handle prerequisites & corequisites
        if item.get('prerequisite(s)'):
            content.append("Prerequisite(s): " + ", ".join(item['prerequisite(s)']))

        if item.get('corequisite(s)'):
            content.append("Corequisite(s): " + ", ".join(item['corequisite(s)']))

        if item.get('pre/corequisite(s)'):
            content.append("Pre/Corequisite(s): " + ", ".join(item['pre/corequisite(s)']))

        if item.get('notes'):
            content.append("Note(s): " + ", ".join(item['notes']))

        # Handle core courses
        if 'core_courses' in item:
            content.append("\nCore Courses:")
            for course in item.get('core_courses', []):
                content.append(f"- {course['course']}: {course['title']} ({course['units']} units)")

        # Handle specialization tracks
        if 'specialization_tracks' in item:
            content.append("\nSpecialization Tracks:")

            for specialization, details in item['specialization_tracks'].items():
                content.append(f"\n- {specialization}:")

                if isinstance(details, list):  # MSAI-style specialization (direct list of courses)
                    for course in details:
                        content.append(f"  - {course['course']}: {course['title']} ({course['units']} units)")

                elif isinstance(details, dict):  # MSSE-style specialization (nested dictionary)
                    if 'overview' in details:
                        content.append(f"  Overview: {details['overview']}")

                    if 'required_core_courses' in details:
                        content.append("\n  Required Core Courses:")
                        for course in details['required_core_courses']:
                            content.append(f"    - {course['course']}: {course['title']} ({course['units']} units)")

                    if 'specialization_choice_courses' in details:
                        content.append("\n  Specialization Choice Courses:")
                        for course in details['specialization_choice_courses']:
                            content.append(f"    - {course['course']}: {course['title']} ({course['units']} units)")

        # Handle elective courses
        if 'elective_courses' in item:
            content.append("\nElective Courses:")
            if 'overview' in item['elective_courses']:
                content.append(f"  Overview: {item['elective_courses']['overview']}")
                if 'restricted_courses' in item['elective_courses']:
                    content.append("\n  Restricted Courses (cannot be taken as electives):")
                    for course in item['elective_courses']['restricted_courses']:
                        if isinstance(course, dict):
                            content.append(f"    - {course['course']}: {course['title']} ({course['units']} units)")
                        elif isinstance(course, str):
                            content.append(f"    - {course}")
            else:
                for area, courses in item['elective_courses'].items():
                    content.append(f"\n- {area}:")
                    for course in courses:
                        if isinstance(course, dict):
                            content.append(f"  - {course['course']}: {course['title']} ({course['units']} units)")
                        elif isinstance(course, str):
                            content.append(f"  - {course}")

        # Handle graduate writing requirement
        if 'graduate_writing_requirement' in item:
            content.append("\nGraduate Writing Requirement:")
            gww = item['graduate_writing_requirement']
            if 'courses' in gww:  # Multi-course format
                for course in gww['courses']:
                    content.append(f"  - {course['course']}: {course['title']} ({course['units']} units)")
                    if 'description' in course:
                        content.append(f"    Description: {course['description']}")
            elif 'course' in gww:  # Single-course format
                content.append(f"  - {gww['course']}: {gww['title']} ({gww['units']} units)")

        # Handle culminating experience
        if 'culminating_experience' in item:
            content.append("\nCulminating Experience Options:")
            for option, courses in item['culminating_experience'].items():
                content.append(f"\n- {option}:")
                for course in courses:
                    if isinstance(course, dict):
                        content.append(f"  - {course['course']}: {course['title']} ({course['units']} units)")
                    elif isinstance(course, str):
                        content.append(f"  - {course}")


        doc = Document(
        page_content="\n".join(content),
        metadata={"title": item.get('title', 'N/A'),
                  "class_name": class_name if item['id'].isdigit() else 'N/A',
                  "type": "Major" if 'core_courses' in item else "Course",
                  "major": item['metadata']['major'],
                  "category": item['metadata']['category'] if item['id'].isdigit() else 'N/A',
                  "prereq": item.get("prerequisite(s)", "N/A")[0],
                  "coreq": item.get("corequisite(s)", "N/A")[0]}
    )
        documents.append(doc)

    return documents

In [None]:
documents = process_data(SJSU_dataset, Gen_Advising_Dataset)
for d in documents:
  if len(d.metadata) > 1:
    p = d.metadata["prereq"]
    co = d.metadata["coreq"]
    prereq = []
    i = 1
    j = 1
    for c in code:
      if c in p:
        d.metadata[f"prereq_{i}"] = c
        i += 1
      if c in co:
        d.metadata[f"coreq_{j}"] = c
        j += 1
    if i == 1:
      d.metadata[f"prereq_{i}"] = "N/A"
    if j == 1:
      d.metadata[f"coreq_{j}"] = "N/A"
    del d.metadata['prereq']
    del d.metadata['coreq']

In [None]:
code = code[:-3]

In [None]:
documents[800].metadata

#### VECTOR STORE SAVE

In [None]:
directory = "./vector__store"
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'})
vector_store = Chroma.from_documents(documents=documents, embedding=embeddings, persist_directory=directory)

In [None]:
vector_store.persist()

In [None]:
vector_store.get()

#### VECTOR STORE LOAD

In [None]:
# LOAD WITH THIS
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'})
directory = "/content/drive/MyDrive/CMPE-295A/dataset/vector__store"
vector_store = Chroma(persist_directory=directory, embedding_function=embeddings)
vector_store.get()

In [None]:
# $gt, $gte, $lt, $lte, $ne, $eq, $in, $nin
results = vector_store.similarity_search(
    "What courses require MATH 32 as a prerequisite?",
    k=5,
    filter={"$or": [{"$and": [{"prereq_1": {"$eq": "BIOL 115"}},
                    {"prereq_2": {"$eq": "BIOL 118"}}]},
                    {"$and": [{"prereq_1": {"$eq": "BIOL 118"}},
                    {"prereq_2": {"$eq": "BIOL 115"}}]}]}
)
for res in results:
    print(f"* {res.page_content}")

#### MODEL

In [None]:
prompt = hub.pull("rlm/rag-prompt")

# Llama_model = "meta-llama/Llama-3.2-1B-Instruct"

# llm = HuggingFaceEndpoint(repo_id=Llama_model,
#                             task="text-generation",
#                             max_new_tokens=512,
#                             do_sample=False,
#                             repetition_penalty=1.03)

Llama_model = "meta-llama/Llama-3.2-3B-Instruct"

llm = HuggingFaceEndpoint(
    repo_id=Llama_model,
    task="text-generation",
    max_new_tokens=256,
    do_sample=False,
    temperature=0.4,
    repetition_penalty=1.03
)


In [None]:
file_path = "/content/drive/MyDrive/CMPE-295A/dataset/courses.txt"
with open(file_path, "r") as f:
  courses = f.read().splitlines()
courses.remove('')

In [None]:
def classify_question(question: str):
  if "between" in question:
    filters = []
    for c in courses:
      if c in question:
        filters.append({"class_name":{"$eq": c}})
    filter = {"$or": filters}
    return filter if len(filters) != 0 else None
  elif "require" in question or "have" in question:
    if "corequisite" in question and "prerequisite" in question:
      coreq_pos = question.find("corequisite")
      prereq_pos = question.find("prerequisite")
      filters = []
      if coreq_pos < prereq_pos:
        sec_1 = question[:coreq_pos]
        sec_2 = question[coreq_pos:]
        i = 1
        j = 1
        for c in courses:
          if c in sec_1:
            filters.append({f"coreq_{i}": {"$eq": c}})
            i += 1
          if c in sec_2:
            filters.append({f"prereq_{j}": {"$eq": c}})
            j += 1
        filter = {"$and": filters} if "and" in question else {"$or": filters}
        if len(filters) < 2:
          filter = filters
        return filter[0] if len(filters) != 0 else None
      else:
        sec_1 = question[:prereq_pos]
        sec_2 = question[prereq_pos:]
        for c in courses:
          i = 1
          j = 1
          if c in sec_1:
            filters.append({f"prereq_{i}": {"$eq": c}})
            i += 1
          if c in sec_2:
            filters.append({f"coreq_{j}": {"$eq": c}})
            j += 1
        filter = {"$and": filters} if "and" in question else {"$or": filters}
        if len(filters) < 2:
          filter = filters
        return filter[0] if len(filters) != 0 else None

    # PREREQ
    elif "prerequisite" in question:
      # Require multiple prerequisites
        filters = []
        i = 1
        for c in courses:
          if c in question:
            filters.append({f"prereq_{i}": {"$eq": c}})
            i += 1
        filter = {"$and": filters} if "and" in question else {"$or": filters}
        if len(filters) < 2:
          filter = filters
        return filter[0] if len(filters) != 0 else None
    else:
      filters = []
      i = 1
      for c in courses:
        if c in question:
          filters.append({f"coreq_{i}": {"$eq": c}})
          i += 1
      filter = {"$and": filters} if "and" in question else {"$or": filters}
      if len(filters) < 2:
        filter = filters
      return filter[0] if len(filters) !=0 else None
  elif "need" in question:
    last_course = ""
    for c in courses:
      if c in question:
        if question.find(last_course) < question.find(c):
          last_course = c
    return {"class_name": last_course}
  else:
    #FIND THE CLASS
    for c in courses:
      if c in question:
        return {"class_name": c}
  return None

In [None]:
prompt_template = """
Answer the question based on the context below.
Do not make up information. Be concise and to the point.

Context: {context}

Question: {question}

Answer:
"""

class State(TypedDict):
    question: str
    context: List[Document]
    answer: str
    source_documents: List[str]

def retrieve(state: State) -> State:
    filter = classify_question(state["question"])
    retrieved_docs = vector_store.similarity_search(
        state["question"],
        k=10,
        filter=filter
    )
    # Extract source document content
    source_documents = [doc.page_content for doc in retrieved_docs]
    return {"context": retrieved_docs, "source_documents": source_documents}

# def generate(state: State) -> State:
#     docs_content = "\n\n".join(doc.page_content for doc in state["context"])
#     messages = prompt.invoke({"question": state["question"], "context": docs_content})
#     response = llm.invoke(messages)
#     return {"answer": response, "source_documents": state["source_documents"]}

def generate(state: State) -> State:
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt_template.format(context=docs_content, question=state["question"])
    response = llm.invoke(messages)

    # Remove excessive spaces and line breaks
    response = response.replace("\n", " ").replace("  ", " ")

    # Remove unwanted phrases
    response = re.sub(r"\bI don't know\b|\bAdditionally\b|\bIn conclusion\b|\bbased on the context\b", "", response).strip()

    # Remove list artifacts like "\t+" or "\t"
    response = re.sub(r"\t\+|\t", "", response)

    return {"answer": response, "source_documents": state["source_documents"]}


graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [None]:
questions = ["What are the prerequisites for KIN 1?",
        "List all the different CMPE courses.",
        "What are the core courses for the MSAI major?",
        "Can you provide the description of CMPE 252?",
        "What are the restricted courses for MSSE major as an elective course?",
        "What are specialization tracks for MSAI major?",
        "What are the prerequisites for CS 156?",
        "What are the corequisites for BIOL 124?",
        "What are the pre/corequisites for ARTH 11?",
        "What is the class structure for AE 110?",
        "What are the prerequisites for the MSCMPE major?",
        "What are all the elective courses for the MSAI major?",
        "What are the culminating experience options for the MSSE major?",
        "What are the graduate writing requirement for the MSSE major?",
        "What courses require CMPE 252 as a prerequisite?",
        "Which KIN courses fulfill Movement Area 5 Team requirements?",
        "How many units is ISE 297?",
        "Is there a beginning-level swimming course at SJSU?",
        "Do I need instructor consent to enroll in ADV 116 - Spartan Daily Advertising Staff?",
        "Do I need MATH 33A to enroll in AE 105 - Mathematical Methods for Aerospace Engineers?",
        "Can I take ADV 127 - Practical Qualitative Research in Advertising without taking ADV 91?",
        "What is the grading system for CHIN 132?",
        "What are the course recommendations for Software Engineering(MSSE) major?",
        "What are the best courses to take for machine learning?",
        "Can you recommend an advertising course that focuses on digital media?",
        "I want to take a dance class. What are my options?",
        "What are the core courses for Software Engineering(MSSE) major?",
        "Which aerospace courses involve MATLAB programming?",
        "What is the difference between KIN 35A and KIN 35B?",
        "What are the prerequisites for BUS4 119A?",
        "How many units should i complete as a software engineer major at SJSU?",
        "Are there any Software Engineering courses that focus specifically on cloud computing and distributed systems?",
        "What are the necessary prerequisites for taking What are the necessary prerequisites for taking Introduction to Database Management Systems (CS 157A)?",
        "If I want to take CS 160, which courses should I complete first?",
        "If I want to focus on cybersecurity, can I substitute any SE courses for CS security-related electives?",
        "Can a Software Engineering major take AI-focused courses from the CS department as electives?",
        "What are the best elective choices for a CS student who wants to specialize in data science?",
        "How can a graduate student clear their provisional admission status?",
        "Are graduate students allowed to leave for a semester?",
        "Are undergraduate courses considered in GPA calculation for graduates?",
        "How do I switch to a different graduate program?",
        "Can I enroll in two masters program at the same time?",
        "What resources are available for graduate students through the SJSU Writing Center?",
        "Are there organizations to connect with Alumni?",
        "What should graduate students do if they need to change their graduation date?",
        "How do I maintain my F-1 Status?",
        "What are the requirements for the J-1 visitor program?",
        "What grades are considered unsatisfactory?",
        "What grades are condidered satisfactory?",
        "What is the deadline for submitting my candidacy form?",
        "How do I negotiate an offer with my employer?",
        "What are some interview tips?",
        "What type of questions are asked in interviews?",
        "What opportunities are there for graudate students?",
        "What should I do after accepted a job offer?",
        "How should I format my resume as an international?",
        "What are some tips to maximize my experience at a career fair?",
        "How important is it to network?",
        "Who can I talk to about financial aid related information?",
        "Does SJSU have any counseling services?",
        "What are some common interview questions?"
        ]

In [None]:
warnings.filterwarnings('ignore')

question_id = []
question = []
responses = []
source_docs = []
response_times = []
response_lengths = []

for idx, q in enumerate(questions, start=1):
    print(q)
    question_id.append(idx)
    question.append(q)
    start_time = time.time()
    response = graph.invoke({"question": q})
    elapsed_time = time.time() - start_time
    answer_length = len(response["answer"])

    responses.append(response["answer"])
    source_docs.append(response["source_documents"])
    response_times.append(round(elapsed_time, 2))
    response_lengths.append(answer_length)

In [None]:
df = pd.DataFrame({
    "Question_id": question_id,
    "Questions": question,
    "Responses": responses,
    "Source_Documents": source_docs,
    "Response_Time": response_times,
    "Response_Length": response_lengths
})

responses_df = df.drop(columns=[col for col in df.columns if 'Source_Documents' in col or col == 'Question_id'])
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)
df.to_csv("answers.csv", index=False)
responses_df

#### EVALUATION

In [None]:
# def calculate_rouge_scores(prediction: str, reference: str) -> dict:
#     scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
#     scores = scorer.score(reference, prediction)
#     return {
#         'rouge1': scores['rouge1'].fmeasure,
#         'rouge2': scores['rouge2'].fmeasure,
#         'rougeL': scores['rougeL'].fmeasure
#     }

# def calculate_bleu_score(prediction: str, reference: str) -> float:
#     smoother = SmoothingFunction()
#     prediction_tokens = word_tokenize(prediction.lower())
#     reference_tokens = [word_tokenize(reference.lower())]
#     return sentence_bleu(reference_tokens, prediction_tokens, smoothing_function=smoother.method1)

nlp = spacy.load("en_core_web_sm")

def calculate_bertscore(prediction: str, reference: str):
    P, R, F1 = score([prediction], [reference], lang="en", verbose=False)
    return F1.mean().item()

def check_answer_presence(prediction: str, source_docs: list) -> float:
    stopwords_set = set(STOP_WORDS)

    pred_doc = nlp(prediction)
    source_text = ' '.join(source_docs) if source_docs else ''
    source_doc = nlp(source_text)

    pred_elements = set()
    pred_elements.update([ent.text.lower() for ent in pred_doc.ents])
    pred_elements.update([chunk.text.lower() for chunk in pred_doc.noun_chunks])

    if not pred_elements:
        return 1.0

    matches = 0
    for element in pred_elements:
        if element in source_text.lower() and element not in stopwords_set:
            matches += 1

    return matches / len(pred_elements)

def check_consistency(prediction: str, source_docs: list) -> float:
    stopwords_set = set(STOP_WORDS)

    pred_doc = nlp(prediction)
    source_doc = nlp(' '.join(source_docs)) if source_docs else nlp('')

    pred_numbers = [token.text for token in pred_doc if token.like_num and token.text not in stopwords_set]
    source_numbers = [token.text for token in source_doc if token.like_num and token.text not in stopwords_set]

    if not pred_numbers:
        return 1.0

    matches = sum(1 for num in pred_numbers if num in source_numbers)
    return matches / len(pred_numbers) if pred_numbers else 1.0

def evaluate_model(responses_df, reference_dict):
    individual_metrics_data = {
        'question_id': [],
        'bertscore_f1': [],
        'answer_presence': [],
        'consistency': []
    }

    all_metrics = {
        'bertscore_f1': [],
        'answer_presence': [],
        'consistency': []
    }

    for _, row in responses_df.iterrows():
        question_id = row['Question_id']
        model_response = row['Responses']
        source_documents = row['Source_Documents']

        reference_answer = reference_dict.get(question_id, "")

        model_response = model_response if isinstance(model_response, str) else ''
        source_documents = ' '.join([doc.page_content if hasattr(doc, 'page_content') else str(doc) for doc in source_documents]) if isinstance(source_documents, list) else source_documents

        bert_score = calculate_bertscore(model_response, reference_answer)
        answer_presence = check_answer_presence(model_response, [source_documents])
        consistency = check_consistency(model_response, [source_documents])

        individual_metrics_data['question_id'].append(question_id)
        individual_metrics_data['bertscore_f1'].append(bert_score)
        individual_metrics_data['answer_presence'].append(answer_presence)
        individual_metrics_data['consistency'].append(consistency)

        all_metrics['bertscore_f1'].append(bert_score)
        all_metrics['answer_presence'].append(answer_presence)
        all_metrics['consistency'].append(consistency)

    average_metrics = {
        'bertscore_f1': sum(all_metrics['bertscore_f1']) / len(all_metrics['bertscore_f1']),
        'answer_presence': sum(all_metrics['answer_presence']) / len(all_metrics['answer_presence']),
        'consistency': sum(all_metrics['consistency']) / len(all_metrics['consistency'])
    }

    # Create DataFrames
    individual_metrics_df = pd.DataFrame(individual_metrics_data)
    average_metrics_df = pd.DataFrame([average_metrics])

    return individual_metrics_df, average_metrics_df

In [None]:
# Disable transformers warnings
transformers_logging.set_verbosity_error()

reference_path = "/content/drive/MyDrive/CMPE-295A/dataset/SJSU_reference_answer.json"
with open(reference_path, 'r') as file:
    reference_data = json.load(file)

reference_ans = {item['question_id']: item['reference_answer'] for item in reference_data}

individual_metrics_df, average_metrics_df = evaluate_model(df, reference_ans)

In [None]:
individual_metrics_df

In [None]:
average_metrics_df

#### VISUALIZATION

In [None]:
# Melt the DataFrame for easier plotting
df_melted = individual_metrics_df.melt(id_vars=['question_id'],
                    value_vars=['bertscore_f1', 'answer_presence', 'consistency'],
                    var_name='metric',
                    value_name='score')

In [None]:
# Plot for BERTScore
plt.figure(figsize=(10, 6))
sns.barplot(x='question_id', y='score', data=df_melted[df_melted['metric'] == 'bertscore_f1'],
            ci=None, palette="spring")

plt.title('BERTScore Comparison', fontsize=13)
plt.xlabel('Question ID', fontsize=11)
plt.ylabel('BERTScore F1', fontsize=11)
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

In [None]:
# Plot for Answer Presence
plt.figure(figsize=(10, 6))
sns.barplot(x='question_id', y='score', data=df_melted[df_melted['metric'] == 'answer_presence'],
            ci=None, palette="magma")

plt.title('Answer Presence Comparison', fontsize=13)
plt.xlabel('Question ID', fontsize=11)
plt.ylabel('Answer Presence', fontsize=11)
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

In [None]:
# Plot for Consistency
plt.figure(figsize=(10, 6))
sns.barplot(x='question_id', y='score', data=df_melted[df_melted['metric'] == 'consistency'],
            ci=None, palette="YlGnBu")

plt.title('Consistency Comparison', fontsize=13)
plt.xlabel('Question ID', fontsize=11)
plt.ylabel('Consistency', fontsize=11)
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

In [None]:
def plot_metrics(individual_df):
    mean_metrics = individual_df[['bertscore_f1', 'answer_presence', 'consistency']].mean()

    ax = mean_metrics.plot(kind='bar', figsize=(10, 6), color=['skyblue', 'lightgreen', 'salmon'])

    for p in ax.patches:
        ax.annotate(f'{p.get_height():.4f}',
                    (p.get_x() + p.get_width() / 2., p.get_height()),
                    ha='center', va='center', fontsize=12, color='black',
                    xytext=(0, 5), textcoords='offset points')

    plt.title("Average Performance of the Model Across Metrics")
    plt.ylabel("Score")
    plt.xlabel("Metric")
    plt.xticks(rotation=0)
    plt.tight_layout()
    plt.show()

def summary_statistics(individual_df):
    summary = individual_df[['bertscore_f1', 'answer_presence', 'consistency']].agg(['mean', 'std'])
    print(summary)

In [None]:
plot_metrics(individual_metrics_df)

In [None]:
summary_statistics(individual_metrics_df)