<a href="https://colab.research.google.com/github/babatundeibukun/Moderating_deliberations_with_LLMs/blob/main/automatic_testing_for_prompt_Few_shot_COT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
%pip install --upgrade openai



In [None]:
from openai import OpenAI
import os
MODEL = "gpt-4o"

client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "API_KEY"))

#Final automatic testing code

In [None]:
import openai
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# Define OpenAI API function to check if two texts have the same meaning
def check_same_meaning(text1, text2):
    response_analysis = client.chat.completions.create(
      model=MODEL,
      messages=[
          {"role": "system", "content": "You are a helpful assistant that is very meticulous and reads every text line by line and separates different points in one sentence"},
          {"role": "user", "content": f"Do the following two texts have the same meaning?\nText 1: {text1}\nText 2: {text2}\nAnswer with 'yes' or 'no'."},
      ],
      max_tokens=1024,
      temperature=0,
      top_p=1.0,
      frequency_penalty=0,
      presence_penalty=0
    )


    return response_analysis.choices[0].message.content.split('\n') == 'Yes' or 'yes'

# Define function to calculate cosine similarity
def calculate_cosine_similarity(text1, text2):
    vectorizer = TfidfVectorizer().fit_transform([text1, text2])
    vectors = vectorizer.toarray()
    cosine_sim = cosine_similarity(vectors)
    return cosine_sim[0, 1]

# Initialize cumulative metrics
metrics = {
    "answers": {"TP": 0, "FP": 0, "FN": 0, "TN": 0},
    "answer_classifications": {"TP": 0, "FP": 0, "FN": 0, "TN": 0},
    "arguments": {"TP": 0, "FP": 0, "FN": 0, "TN": 0},
    "argument_classifications": {"TP": 0, "FP": 0, "FN": 0, "TN": 0},
}

# Main comparison function
def compare(annotated, model, metrics):
    local_metrics = {
        "answers": {"TP": 0, "FP": 0, "FN": 0, "TN": 0},
        "answer_classifications": {"TP": 0, "FP": 0, "FN": 0, "TN": 0},
        "arguments": {"TP": 0, "FP": 0, "FN": 0, "TN": 0},
        "argument_classifications": {"TP": 0, "FP": 0, "FN": 0, "TN": 0},
    }

    answer_comparisons = []
    argument_comparisons = []

    for ann_answer in annotated:
        ann_text = ann_answer['answer']
        ann_classification = ann_answer['classification']

        best_match = None
        highest_similarity = 0

        for model_answer in model:
            model_text = model_answer['answer']
            cosine_sim = calculate_cosine_similarity(ann_text, model_text)
            if cosine_sim > 0.5:
                if check_same_meaning(ann_text, model_text):
                    if cosine_sim > highest_similarity:
                        highest_similarity = cosine_sim
                        best_match = model_answer

        if best_match:
            answer_comparisons.append({'Annotated Answer': ann_text, 'Model Answer': best_match['answer']})

            metrics["answers"]["TP"] += 1
            local_metrics["answers"]["TP"] += 1

            if ann_classification.lower() == best_match['classification'].lower():
                metrics["answer_classifications"]["TP"] += 1
                local_metrics["answer_classifications"]["TP"] += 1
            else:
                metrics["answer_classifications"]["FP"] += 1
                local_metrics["answer_classifications"]["FP"] += 1

            # Check for each argument in the annotated answer
            matched_model_args = set()
            for ann_arg in ann_answer.get('arguments', []):
                ann_arg_text = ann_arg['text']
                ann_arg_classification = ann_arg['classification']
                match_found = False

                for model_arg in best_match.get('arguments', []):
                    model_arg_text = model_arg['text']
                    model_arg_classification = model_arg['classification']

                    if model_arg_text in matched_model_args:
                        continue

                    cosine_sim_arg = calculate_cosine_similarity(ann_arg_text, model_arg_text)
                    if cosine_sim_arg > 0.5 and check_same_meaning(ann_arg_text, model_arg_text):
                        argument_comparisons.append({'Annotated Argument': ann_arg_text, 'Model Argument': model_arg_text})
                        metrics["arguments"]["TP"] += 1
                        local_metrics["arguments"]["TP"] += 1

                        if ann_arg_classification.lower() == model_arg_classification.lower():
                            metrics["argument_classifications"]["TP"] += 1
                            local_metrics["argument_classifications"]["TP"] += 1
                        else:
                            metrics["argument_classifications"]["FP"] += 1
                            local_metrics["argument_classifications"]["FP"] += 1
                        matched_model_args.add(model_arg_text)
                        match_found = True
                        break

                if not match_found:
                    metrics["arguments"]["FN"] += 1
                    local_metrics["arguments"]["FN"] += 1
                    metrics["argument_classifications"]["FN"] += 1
                    local_metrics["argument_classifications"]["FN"] += 1

            for _ in range(len(best_match.get('arguments', [])) - len(ann_answer.get('arguments', []))):
                metrics["arguments"]["FP"] += 1
                local_metrics["arguments"]["FP"] += 1
                metrics["argument_classifications"]["FP"] += 1
                local_metrics["argument_classifications"]["FP"] += 1
        else:
            metrics["answers"]["FN"] += 1
            local_metrics["answers"]["FN"] += 1
            metrics["answer_classifications"]["FN"] += 1
            local_metrics["answer_classifications"]["FN"] += 1
            for ann_arg in ann_answer.get('arguments', []):
                metrics["arguments"]["FN"] += 1
                local_metrics["arguments"]["FN"] += 1
                metrics["argument_classifications"]["FN"] += 1
                local_metrics["argument_classifications"]["FN"] += 1

    for model_answer in model:
        if not any(comp['Model Answer'] == model_answer['answer'] for comp in answer_comparisons):
            metrics["answers"]["FP"] += 1
            local_metrics["answers"]["FP"] += 1
            metrics["answer_classifications"]["FP"] += 1
            local_metrics["answer_classifications"]["FP"] += 1
            for model_arg in model_answer.get('arguments', []):
                if not any(comp['Model Argument'] == model_arg['text'] for comp in argument_comparisons):
                    metrics["arguments"]["FP"] += 1
                    local_metrics["arguments"]["FP"] += 1
                    metrics["argument_classifications"]["FP"] += 1
                    local_metrics["argument_classifications"]["FP"] += 1

    return local_metrics, metrics

# Function to display metrics
def display_metrics(metrics, title="Metrics"):
    print(f"\n{title}:")
    for key, value in metrics.items():
        print(f"{key.capitalize()}: {value}")

# Display initial metrics
display_metrics(metrics, "Initial Metrics")






Initial Metrics:
Answers: {'TP': 0, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 0, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 0, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 0, 'FN': 0, 'TN': 0}


#Test case 6

In [None]:
# Example usage with the sixth test case
annotated_answers_6 = [
    {
        "answer": "Too much homework.",
        "classification": "Barrier",
        "arguments": [
            {
                "text": "Homework is beneficial. Some weeks are homework-free, while other weeks are overwhelming with homework. If only there is a way for professors to coordinate and distribute workloads evenly across the semester.",
                "classification": "CON"
            }
        ]
    }
]

model_answers_6 = [
    {
        "answer": "Too much homework.",
        "classification": "Barrier",
        "arguments": [
            {
                "text": "Homework is beneficial. Some weeks are homework-free, while other weeks are overwhelming with homework. If only there is a way for professors to coordinate and distribute workloads evenly across the semester.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the sixth test case and update metrics
local_metrics, metrics = compare(annotated_answers_6, model_answers_6, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 6")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 6:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 1, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 1, 'FN': 0, 'TN': 0}


#Test Case 7

In [None]:
# Example usage with the seventh test case
annotated_answers_7 = [
    {
        "answer": "The content of cognitive science classes should be re-evaluated.",
        "classification": "Solution",
        "arguments": []
    },
    {
        "answer": "We should organize the course content so that students interested in pursuing a PhD in cognitive science can do so without fear. The course should provide a background in cognitive science and cover all areas of the field, such as psychology, neuroscience, linguistics, etc. If not all, at least psychology, neuroscience, and philosophy should be included as they are very important.",
        "classification": "Solution",
        "arguments": []
    }
]

model_answers_7 = [
    {
        "answer": "Content of cognitive science classes should be re-evaluated",
        "classification": "Barrier",
        "arguments": []
    },
    {
        "answer": "We should organize the course content so that students interested in pursuing PhD in cognitive science can do so without fear.",
        "classification": "Solution",
        "arguments": [
            {
                "text": "Hopefully, the course should give a background in cognitive science and touch on all areas of this field, such as psychology, neuroscience, linguistics, etc. If not all, at least psychology, neuroscience, and philosophy might be very important.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the seventh test case and update metrics
local_metrics, metrics = compare(annotated_answers_7, model_answers_7, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 7")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 7:
Answers: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 0, 'FP': 1, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 1, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 3, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 2, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 2, 'FN': 0, 'TN': 0}


#Test case 8

In [None]:
# Test case 8
annotated_answers_8 = [
    {
        "answer": "There is a need for specialization.",
        "classification": "Solution",
        "arguments": [
            {
                "text": "I don't think it is a good thing, allow student to explore and be a tool for many jobs.",
                "classification": "CON"
            }
        ]
    }
]

model_answers_8 = [
    {
        "answer": "There is need for specialization",
        "classification": "Solution",
        "arguments": [
            {
                "text": "I don't think it is a good thing, allow student to explore and be a tool for many jobs.",
                "classification": "CON"
            }
        ]
    }
]
# Process the seventh test case and update metrics
local_metrics, metrics = compare(annotated_answers_8, model_answers_8, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 8")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")


Metrics for Test Case 8:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 4, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 3, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 2, 'FP': 1, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 2, 'FN': 0, 'TN': 0}


Test case 9

In [None]:
annotated_answers_9 = [
    {
        "answer": "We do not benefit from some classes",
        "classification": "Barrier",
        "arguments": []
    }
]

model_answers_9 = [
    {
        "answer": "We do not benefit from some classes",
        "classification": "Barrier",
        "arguments": [
            {
                "text": "I disagree with scientific English being boring; it would even be more beneficial if it were a compulsory course throughout the master's program.",
                "classification": "CON"
            }
        ]
    }
]

# Process the ninth test case and update metrics
local_metrics, metrics = compare(annotated_answers_9, model_answers_9, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 9")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")


Metrics for Test Case 9:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 0, 'FP': 1, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 1, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 5, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 4, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 2, 'FP': 2, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 3, 'FN': 0, 'TN': 0}


#Test case 10

In [None]:
# Example usage with the tenth test case
annotated_answers_10 = [
    {
        "answer": "No midterm break",
        "classification": "Barrier",
        "arguments": [
            {
                "text": "this is the most important, because as of now. we are already worn out due to many readings, projects and assignment.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_10 = [
    {
        "answer": "No midterm break",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "this is the most important, because as of now. we are already worn out due to many readings, projects and assignment.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the tenth test case and update metrics
local_metrics, metrics = compare(annotated_answers_10, model_answers_10, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 10")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")


Metrics for Test Case 10:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 6, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 5, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 3, 'FP': 2, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 2, 'FP': 3, 'FN': 0, 'TN': 0}


#Test case 11

In [None]:
# Example usage with the eleventh test case
annotated_answers_11 = [
    {
        "answer": "Too much homework",
        "classification": "Barrier",
        "arguments": [
            {
                "text": "If length of lecture is reduced, there will be time to do home work. Everything is just too much.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_11 = [
    {
        "answer": "Too much homework",
        "classification": "Barrier",
        "arguments": [
            {
                "text": "If length of lecture is reduced, there will be time to do home work.",
                "classification": "PRO"
            },
            {
                "text": "Everything is just too much.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the eleventh test case and update metrics
local_metrics, metrics = compare(annotated_answers_11, model_answers_11, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 11")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")


Metrics for Test Case 11:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 7, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 6, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 4, 'FP': 3, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 3, 'FP': 4, 'FN': 0, 'TN': 0}


#Test case 12

In [None]:
# Example usage with the twelfth test case
annotated_answers_12 = [
    {
        "answer": "Too much hours of classes per week",
        "classification": "Barrier",
        "arguments": []
    }
]

model_answers_12 = [
    {
        "answer": "too many hours of classes per week",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "We could have at most two lectures per day",
                "classification": "PRO"
            }
        ]
    }
]

# Process the twelfth test case and update metrics
local_metrics, metrics = compare(annotated_answers_12, model_answers_12, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 12")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")


Metrics for Test Case 12:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 0, 'FP': 1, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 1, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 8, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 7, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 4, 'FP': 4, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 3, 'FP': 5, 'FN': 0, 'TN': 0}


#Test Case 13

In [None]:

# Example usage with the thirteenth test case
annotated_answers_13 = [
    {
        "answer": "Research as means to foster innovation in SCI Master's Program",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "Master's students should be involved in the research works in SCI. It may also be necessary that they start developing practical knowledge early. I guess the research project should be the student's contribution to the community. It will be most useful after the six-month internship. They need to have the choice to work on relevant subjects that they are interested in.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_13 = [
    {
        "answer": "Research as means to foster innovation in SCI Master's Program",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "Master's students should be involved in the research works in SCI.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Master's students should be involved in the research works in SCI. It may also be necessary that they start developing practical knowledge early. I guess the research project should be the student's contribution to the community. It will be most useful after the six-month internship. They need to have the choice to work on relevant subjects that they are interested in.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "It may also be necessary that they start developing practical knowledge early.",
                "classification": "PRO"
            },
            {
                "text": "I guess the research project should be the student's contribution to the community.",
                "classification": "PRO"
            },
            {
                "text": "It will be most useful after the six-month internship.",
                "classification": "PRO"
            },
            {
                "text": "They need to have the choice to work on relevant subjects that they are interested in.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the thirteenth test case and update metrics
local_metrics, metrics = compare(annotated_answers_13, model_answers_13, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 13")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 13:
Answers: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 4, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 4, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 9, 'FP': 1, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 8, 'FP': 2, 'FN': 0, 'TN': 0}
Arguments: {'TP': 5, 'FP': 8, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 4, 'FP': 9, 'FN': 0, 'TN': 0}


#Test case 14

In [None]:

# Test case 14
annotated_answers_14 = [
    {
        "answer": "The amount of research funding",
        "classification": "Metrics",
        "arguments": [
            {
                "text": "Lots of funding implies that we are doing good research.",
                "classification": "PRO"
            },
            {
                "text": "More funding help to do more research.",
                "classification": "PRO"
            },
            {
                "text": "High funding allow conducting the research more efficiently and allows for the use of modern technology.",
                "classification": "PRO"
            },
            {
                "text": "Its not always true, managers don’t know how to manage well as it is better to choose the right managers.",
                "classification": "CON"
            }
        ]
    }
]

model_answers_14 = [
    {
        "answer": "The amount of research funding.",
        "classification": "Metrics",
        "arguments": [
            {
                "text": "Lots of funding implies that we are doing good research.",
                "classification": "PRO"
            },
            {
                "text": "Its not always true, managers don’t know how to manage well as it is better to choose the right managers.",
                "classification": "CON"
            }
        ]
    }
]

# Process the fourteenth test case and update metrics
local_metrics, metrics = compare(annotated_answers_14, model_answers_14, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 14")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 14:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 2, 'FP': 0, 'FN': 2, 'TN': 0}
Argument_classifications: {'TP': 2, 'FP': 0, 'FN': 2, 'TN': 0}

Updated Metrics:
Answers: {'TP': 10, 'FP': 1, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 9, 'FP': 2, 'FN': 0, 'TN': 0}
Arguments: {'TP': 7, 'FP': 8, 'FN': 2, 'TN': 0}
Argument_classifications: {'TP': 6, 'FP': 9, 'FN': 2, 'TN': 0}


#Test case 15

In [None]:
# Example usage for test case 15
annotated_answers_15 = [
    {
        "answer": "The prestige of the funding agency.",
        "classification": "Metrics",
        "arguments": [
            {
                "text": "World-class funding agencies offers network and international recognition",
                "classification": "PRO"
            },
            {
                "text": "This funding can enable the university to pursue ambitious projects and initiatives that might otherwise be unattainable.",
                "classification": "PRO"
            },
            {
                "text": "International collaborations indicate good research capability and impact.",
                "classification": "PRO"
            },
            {
                "text": "Attracting international funds is a positive reputation.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_15 = [
    {
        "answer": "The prestige of the funding agency.",
        "classification": "Metrics",
        "arguments": [
            {
                "text": "World-class funding agencies offer network and international recognition.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Increased Funding Opportunities.",
        "classification": "Metrics",
        "arguments": [
            {
                "text": "This funding can enable the university to pursue ambitious projects and initiatives that might otherwise be unattainable.",
                "classification": "PRO"
            },
            {
                "text": "International collaborations indicate good research capability and impact.",
                "classification": "PRO"
            },
            {
                "text": "Attracting international funds is a positive reputation.",
                "classification": "PRO"
            }
        ]
    }
]

local_metrics_15, updated_metrics = compare(annotated_answers_15, model_answers_15, metrics)

# Display metrics for test case 15
display_metrics(local_metrics_15, "Metrics for Test Case 15")
# Display updated metrics
display_metrics(updated_metrics, "Updated Metrics")


Metrics for Test Case 15:
Answers: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 3, 'FN': 3, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 3, 'FN': 3, 'TN': 0}

Updated Metrics:
Answers: {'TP': 11, 'FP': 2, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 10, 'FP': 3, 'FN': 0, 'TN': 0}
Arguments: {'TP': 8, 'FP': 11, 'FN': 5, 'TN': 0}
Argument_classifications: {'TP': 7, 'FP': 12, 'FN': 5, 'TN': 0}


#Test case 16

In [None]:
# Test case 16 dictionary format
annotated_answers_16 = [
    {
        "answer": "Management, i.e., the researcher must supervise the students.",
        "classification": "Solution",
        "arguments": [
            {
                "text": "Without good supervision, the student cannot follow the right paths. Supervision is critical in training future researchers.",
                "classification": "PRO"
            },
            {
                "text": "The effectiveness of supervision depends on the student, their will, and their motivation. A supervisor must be a psychologically stable person with an open mind. The strength of our supervision showcases the ability to transfer knowledge to future researchers and ensure perpetuity. A student will do their best if they are motivated, even with light supervision, and will do the minimum if they are unmotivated, even with forced supervision.",
                "classification": "CON"
            }
        ]
    },
    {
        "answer": "The supervisor must be in the field.",
        "classification": "Solution",
        "arguments": []
    },
    {
        "answer": "The number of supervised students and the quality of the research output should be assessed.",
        "classification": "Metrics",
        "arguments": []
    }
]

model_answers_16 = [
    {
        "answer": "The number of supervised students and the quality of the research output should be assessed.",
        "classification": "Metrics",
        "arguments": [
            {
                "text": "Without good supervision, the student cannot follow the right paths, supervision is critical in training future researchers.",
                "classification": "PRO"
            },
            {
                "text": "The supervisor must be in the field.",
                "classification": "PRO"
            },
            {
                "text": "The supervision depends on the student, his will and his motivation.",
                "classification": "CON"
            },
            {
                "text": "A student will do his best if he wants with light supervision, A student will do his minimum if he wants even with forced supervision.",
                "classification": "CON"
            }
        ]
    }
]

In [None]:
local_metrics_16, updated_metrics = compare(annotated_answers_16, model_answers_16, metrics)

# Display metrics for test case 16
display_metrics(local_metrics_16, "Metrics for Test Case 16")
# Display updated metrics
display_metrics(updated_metrics, "Updated Metrics")


Metrics for Test Case 16:
Answers: {'TP': 1, 'FP': 0, 'FN': 2, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 2, 'TN': 0}
Arguments: {'TP': 0, 'FP': 4, 'FN': 2, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 4, 'FN': 2, 'TN': 0}

Updated Metrics:
Answers: {'TP': 12, 'FP': 2, 'FN': 2, 'TN': 0}
Answer_classifications: {'TP': 11, 'FP': 3, 'FN': 2, 'TN': 0}
Arguments: {'TP': 8, 'FP': 15, 'FN': 7, 'TN': 0}
Argument_classifications: {'TP': 7, 'FP': 16, 'FN': 7, 'TN': 0}


#Test case 17

In [None]:

# Test case 17 dictionary format
annotated_answers_17 = [
    {
        "answer": "The total number of research papers produced.",
        "classification": "Metrics",
        "arguments": [
            {
                "text": "The higher the university's research activity, the more papers are produced, making it a good success indicator.",
                "classification": "PRO"
            },
            {
                "text": "The total number of research papers does not mean much; it is the quality of the papers that matters, i.e., impact factor. Focusing on the number may lead to shallow content. The number is not important if the papers are not relevant. In some fields, the number is not a key performance indicator (e.g., Humanities).",
                "classification": "CON"
            }
        ]
    }
]

model_answers_17 = [
    {
        "answer": "The total number of research papers produced.",
        "classification": "Metric",
        "arguments": [
            {
                "text": "I see that the more the university research activity is high, the more papers are produced. So it is a good success indicator.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "The total number of research papers does not mean much, it is mainly the quality papers that matters, ie, impact factor.",
        "classification": "Metric",
        "arguments": [
            {
                "text": "The focus on number may lead to shallow content.",
                "classification": "CON"
            },
            {
                "text": "The number is not important if not relevant papers.",
                "classification": "CON"
            },
            {
                "text": "In some fields, the number is not a KPI (ei Humanities).",
                "classification": "CON"
            }
        ]
    }
]

local_metrics_17, updated_metrics = compare(annotated_answers_17, model_answers_17, metrics)

# Display metrics for test case 17
display_metrics(local_metrics_17, "Metrics for Test Case 17")
# Display updated metrics
display_metrics(updated_metrics, "Updated Metrics")


Metrics for Test Case 17:
Answers: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 0, 'FP': 2, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 3, 'FN': 1, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 3, 'FN': 1, 'TN': 0}

Updated Metrics:
Answers: {'TP': 13, 'FP': 3, 'FN': 2, 'TN': 0}
Answer_classifications: {'TP': 11, 'FP': 5, 'FN': 2, 'TN': 0}
Arguments: {'TP': 9, 'FP': 18, 'FN': 8, 'TN': 0}
Argument_classifications: {'TP': 8, 'FP': 19, 'FN': 8, 'TN': 0}


#Test case 18

In [None]:
# Test case 18 dictionary format
annotated_answers_18 = [
    {
        "answer": "It is important to be evaluated by international experts and to evaluate research structures and groups every 5 years.",
        "classification": "Metrics",
        "arguments": [
            {
                "text": "Yes, as long as the senior members of the specific scientific community are consulted.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_18 = [
    {
        "answer": "It is important to be evaluated by international experts and to evaluate research structures and groups every 5 years.",
        "classification": "Metrics",
        "arguments": [
            {
                "text": "Yes, as long as the senior members of the specific scientific community are consulted.",
                "classification": "PRO"
            }
        ]
    }
]
local_metrics_18, updated_metrics = compare(annotated_answers_18, model_answers_18, metrics)

# Display metrics for test case 18
display_metrics(local_metrics_18, "Metrics for Test Case 18")
# Display updated metrics
display_metrics(updated_metrics, "Updated Metrics")


Metrics for Test Case 18:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 14, 'FP': 3, 'FN': 2, 'TN': 0}
Answer_classifications: {'TP': 12, 'FP': 5, 'FN': 2, 'TN': 0}
Arguments: {'TP': 10, 'FP': 18, 'FN': 8, 'TN': 0}
Argument_classifications: {'TP': 9, 'FP': 19, 'FN': 8, 'TN': 0}


#Test case 19

In [None]:
# Annotated answer
annotated_answer_19 = [
    {
        "answer": "To hire interns, a professor should not need to go through a lengthy process with HR.",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "Hiring correctly will help to match the project with the right person in time without delay due to HR procedures.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Paying small fees for conference registrations requires a lengthy process with Finance.",
        "classification": "BARRIER",
        "arguments": []
    }
]

model_answers_19 = [
    {
        "answer": "To hire interns, a professor should not need to go through a lengthy process with HR.",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "This will help to match the project with the right person in time without delay due to HR procedures.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Paying small fees for conference registrations requires a lengthy process with Finance.",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "This will help to match the project with the right person in time without delay due to HR procedures.",
                "classification": "PRO"
            }
        ]
    }
]


# Process the new test case and update metrics
# Extract the dictionary from the list
local_metrics, metrics = compare(annotated_answer_19, model_answers_19, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 19")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")


Metrics for Test Case 19:
Answers: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 16, 'FP': 3, 'FN': 2, 'TN': 0}
Answer_classifications: {'TP': 14, 'FP': 5, 'FN': 2, 'TN': 0}
Arguments: {'TP': 11, 'FP': 19, 'FN': 8, 'TN': 0}
Argument_classifications: {'TP': 10, 'FP': 20, 'FN': 8, 'TN': 0}


#Testcase 20

In [None]:
# Test case dictionary format
annotated_answer_20 = [
    {
        "answer": "Lack of internal collaboration.",
        "classification": "Barriers",
        "arguments": [
            {
                "text": "Yes, there is a lot of individualism. Encouraging internal collaboration increases collective intelligence because of the diversity of UM6P’s departments. They should collaborate to find solutions to research problems. There is very limited collaboration between departments; just sharing the information or activity domains of the department will create more possibilities for collaboration.",
                "classification": "PRO"
            }
        ]
    }
]

model_answer_20 = [
    {
        "answer": "Lack of Internal collaboration.",
        "classification": "Barriers",
        "arguments": []
    },
    {
        "answer": "Encouraging internal collaboration increases collective intelligence because of the diversity of Um6p’s departments, they should collaborate to find research problems solutions there is very limited collaboration between departments just sharing the information or activity domains of the department will create more possibilities of collaboration.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "Encouraging internal collaboration increases collective intelligence because of the diversity of Um6p’s departments.",
                "classification": "PRO"
            },
            {
                "text": "They should collaborate to find research problems solutions.",
                "classification": "PRO"
            },
            {
                "text": "There is very limited collaboration between departments.",
                "classification": "CON"
            },
            {
                "text": "Just sharing the information or activity domains of the department will create more possibilities of collaboration.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Avoid conflicts of interest by avoiding that the lead program is in direct 'competition' with the members of its entity.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "Interdisciplinary projects help moderate the effect of this type of conflict.",
                "classification": "PRO"
            },
            {
                "text": "This dynamism was created by calls for multidisciplinary projects, which is a very good initiative but remains insufficient.",
                "classification": "CON"
            }
        ]
    }
]
# Process the new test case and update metrics
# Extract the dictionary from the list
local_metrics, metrics = compare(annotated_answer_20, model_answer_20, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 20")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")


Metrics for Test Case 20:
Answers: {'TP': 1, 'FP': 2, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 2, 'FN': 0, 'TN': 0}
Arguments: {'TP': 0, 'FP': 6, 'FN': 1, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 6, 'FN': 1, 'TN': 0}

Updated Metrics:
Answers: {'TP': 17, 'FP': 5, 'FN': 2, 'TN': 0}
Answer_classifications: {'TP': 15, 'FP': 7, 'FN': 2, 'TN': 0}
Arguments: {'TP': 11, 'FP': 25, 'FN': 9, 'TN': 0}
Argument_classifications: {'TP': 10, 'FP': 26, 'FN': 9, 'TN': 0}


#Test case 21

In [None]:
annotated_answer_21 = [
    {
        "answer": "Creation of a large number of departments working on the same research subjects. (UM6P internal conflict of interest)",
        "classification": "Solution",
        "arguments": [
            {
                "text": "Creating a large number of departments working on the same research topics within UM6P can lead to internal conflicts.",
                "classification": "CON"
            }
        ]
    },
    {
        "answer": "More value must be given to PhD students to boost their mood to work and publish (high price of accommodation and registration fees).",
        "classification": "Solution",
        "arguments": [
            {
                "text": "PhD students already have excellent conditions. They have very good working conditions that other students do not have, neither in Morocco nor in developed countries.",
                "classification": "CON"
            }
        ]
    }
]

model_answer_21 = [
    {
        "answer": "Creation of a large number of departments working on the same research subjects.",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "(UM6P internal conflict of interest)",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "More value must be given to PhD students to boost their mood to work and publish (high price of accommodation and registration fees)",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "PhD have excellent conditions. students have very good working conditions that other students do not have, neither in Morocco nor in developed countries.",
                "classification": "CON"
            }
        ]
    }
]

# Process the new test case and update metrics
# Extract the dictionary from the list
local_metrics, metrics = compare(annotated_answer_21, model_answer_21, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 20")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")


Metrics for Test Case 20:
Answers: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 0, 'FP': 2, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 1, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 0, 'FN': 1, 'TN': 0}

Updated Metrics:
Answers: {'TP': 19, 'FP': 5, 'FN': 2, 'TN': 0}
Answer_classifications: {'TP': 15, 'FP': 9, 'FN': 2, 'TN': 0}
Arguments: {'TP': 12, 'FP': 25, 'FN': 10, 'TN': 0}
Argument_classifications: {'TP': 11, 'FP': 26, 'FN': 10, 'TN': 0}


#Test case 22

In [None]:
# Test case 22 dictionary format
annotated_answer_22 = [
    {
        "answer": "Lack of competent scientific advice.",
        "classification": "Barrier",
        "arguments": [
            {
                "text": "For research to work, there must be a competent scientific council at the university elected by the faculty members and therefore by the researchers themselves.",
                "classification": "PRO"
            }
        ]
    }
]

model_answer_22 = [
    {
        "answer": "Lack of competent scientific advice.",
        "classification": "Barrier",
        "arguments": [
            {
                "text": "For research to work there must be a competent scientific council of the university elected by the faculty members and therefore by the researchers themselves.",
                "classification": "Solution"
            }
        ]
    }
]
# Process the new test case and update metrics
# Extract the dictionary from the list
local_metrics, metrics = compare(annotated_answer_22, model_answer_22, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 20")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")


Metrics for Test Case 20:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 1, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 20, 'FP': 5, 'FN': 2, 'TN': 0}
Answer_classifications: {'TP': 16, 'FP': 9, 'FN': 2, 'TN': 0}
Arguments: {'TP': 13, 'FP': 25, 'FN': 10, 'TN': 0}
Argument_classifications: {'TP': 11, 'FP': 27, 'FN': 10, 'TN': 0}


#Test case 23

In [None]:
# Test case 23 dictionary format
annotated_answer_23 = [
    {
        "answer": "Making all parties feel included—researchers, PhD students, staff, and engineers in research—by first covering critical necessities such as housing, accessibility, and lifestyle.",
        "classification": "Solution",
        "arguments": []
    },
    {
        "answer": "By strategically planning activities and events that will maximize productivity and motivation, and provide opportunities to link and learn together.",
        "classification": "Solution",
        "arguments": []
    }
]

model_answer_23 = [
    {
        "answer": "Making all parties feel included—researchers, PhD students, staff, and engineers in research.",
        "classification": "Solution",
        "arguments": [
            {
                "text": "By first covering critical necessities housing accessibility life style.. and by strategically planning activities events and that will maximise the productivity and motivation in one hand and the opportunity to link and learn together.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
# Extract the dictionary from the list
local_metrics, metrics = compare(annotated_answer_23, model_answer_23, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 23")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")


Metrics for Test Case 23:
Answers: {'TP': 1, 'FP': 0, 'FN': 1, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 1, 'TN': 0}
Arguments: {'TP': 0, 'FP': 1, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 1, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 21, 'FP': 5, 'FN': 3, 'TN': 0}
Answer_classifications: {'TP': 17, 'FP': 9, 'FN': 3, 'TN': 0}
Arguments: {'TP': 13, 'FP': 26, 'FN': 10, 'TN': 0}
Argument_classifications: {'TP': 11, 'FP': 28, 'FN': 10, 'TN': 0}


#Test case 24

In [None]:

# Test case 24 dictionary format
annotated_answer_24 = [
    {
        "answer": "Lack of evaluation skills.",
        "classification": "Barrier",
        "arguments": []
    },
    {
        "answer": "Need for more communication and involvement of the staff in important decisions.",
        "classification": "Barrier",
        "arguments": []
    },
    {
        "answer": "Lack of real sense of leadership.",
        "classification": "Barrier",
        "arguments": []
    },
    {
        "answer": "Program managers must be evaluated and trained in the management of structures.",
        "classification": "Solution",
        "arguments": [
            {
                "text": "A good researcher is not always a good manager.",
                "classification": "PRO"
            }
        ]
    }
]

model_answer_24 = [
    {
        "answer": "Lack of evaluation skills.",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "Need for more communication and involvement of the staff in important decisions.",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "Lack of real sense of leadership.",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "Program managers must be evaluated and trained in the management of structures.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "A good researcher is not always a good manager.",
                "classification": "PRO"
            }
        ]
    }
]
# Process the new test case and update metrics
# Extract the dictionary from the list
local_metrics, metrics = compare(annotated_answer_24, model_answer_24, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 24")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")


Metrics for Test Case 24:
Answers: {'TP': 4, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 4, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 25, 'FP': 5, 'FN': 3, 'TN': 0}
Answer_classifications: {'TP': 21, 'FP': 9, 'FN': 3, 'TN': 0}
Arguments: {'TP': 14, 'FP': 26, 'FN': 10, 'TN': 0}
Argument_classifications: {'TP': 12, 'FP': 28, 'FN': 10, 'TN': 0}


#Test case 25

In [None]:
# Test case 25 dictionary format
annotated_answer_25 = [
    {
        "answer": "Use competition as a driving force to advance research.",
        "classification": "Solutions",
        "arguments": [
            {
                "text": "The University is still young to compete, it is better to evaluate the yearly performance. Evaluating the yearly performance of the University until it is on the same stage of universities of reference on three levels (Morocco, Africa, Worldwide)",
                "classification": "CON"
            }
        ]
    }
]

model_answer_25 = [
    {
        "answer": "Use competition as a driving force to advance research.",
        "classification": "Solutions",
        "arguments": []
    },
    {
        "answer": "The University is still young to compete, it is better to evaluate the yearly performance.",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "Evaluating the yearly performance of the University until it is on the same stage of universities of reference on three levels (Morocco, Africa, Worldwide)",
                "classification": "PRO"
            }
        ]
    }
]
# Process the new test case and update metrics
# Extract the dictionary from the list
local_metrics, metrics = compare(annotated_answer_25, model_answer_25, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 25")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 25:
Answers: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 0, 'FP': 1, 'FN': 1, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 1, 'FN': 1, 'TN': 0}

Updated Metrics:
Answers: {'TP': 26, 'FP': 6, 'FN': 3, 'TN': 0}
Answer_classifications: {'TP': 22, 'FP': 10, 'FN': 3, 'TN': 0}
Arguments: {'TP': 14, 'FP': 27, 'FN': 11, 'TN': 0}
Argument_classifications: {'TP': 12, 'FP': 29, 'FN': 11, 'TN': 0}


#Testcase 26

In [None]:
# Test case 26 dictionary format
annotated_answer_26 = [
    {
        "answer": "Purchasing delay affect research project : Put in place procedures/processes to limit delays and ensure equipment received is the same as the one ordered.",
        "classification": "Barrier",
        "arguments": [
            {
                "text": "Spent a lot of time waiting of Chemical Items Glassware & amp Consumables.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Fluidity of the process: Find a way to make purchasing products easier.",
        "classification": "Solution",
        "arguments": [
            {
                "text": "This process should be as smooth as possible. In European universities, the project manager is responsible for recruiting the postdoc he wants and the only time he goes to HR is for the Ex contract (chemical products).",
                "classification": "PRO"
            }
        ]
    }
]

model_answer_26 = [
    {
        "answer": "Purchasing delay affect research project.",
        "classification": "Barrier",
        "arguments": [
            {
                "text": "This is very important. Put in place procedures/processes to limit delays and ensure equipment received is the same as the one ordered. Fluidity of the process. Find a way to make purchasing products easier. Spent a lot of time waiting of Chemical Items Glassware &amp Consumables.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "This process should be as smooth as possible.",
        "classification": "Solution",
        "arguments": [
            {
                "text": "In European universities, the project manager is responsible for recruiting the postdoc he wants and the only time he goes to HR is for the Ex contract (chemical products).",
                "classification": "PRO"
            }
        ]
    }
]
# Process the new test case and update metrics
# Extract the dictionary from the list
local_metrics, metrics = compare(annotated_answer_26, model_answer_26, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 26")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 26:
Answers: {'TP': 0, 'FP': 2, 'FN': 2, 'TN': 0}
Answer_classifications: {'TP': 0, 'FP': 2, 'FN': 2, 'TN': 0}
Arguments: {'TP': 0, 'FP': 2, 'FN': 2, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 2, 'FN': 2, 'TN': 0}

Updated Metrics:
Answers: {'TP': 26, 'FP': 8, 'FN': 5, 'TN': 0}
Answer_classifications: {'TP': 22, 'FP': 12, 'FN': 5, 'TN': 0}
Arguments: {'TP': 14, 'FP': 29, 'FN': 13, 'TN': 0}
Argument_classifications: {'TP': 12, 'FP': 31, 'FN': 13, 'TN': 0}


#Testcase 27

In [None]:
# Test case 27 dictionary format
annotated_answer_27 = [
    {
        "answer": "Lack of funding for junior faculty.",
        "classification": "Barrier",
        "arguments": [
            {
                "text": "Insufficient access to startup funds means new projects can’t get started. Junior faculty should have enough funding to jumpstart their research and ensure continuity.",
                "classification": "PRO"
            },
            {
                "text": "We can easily find opportunities if we have a solid research proposal. Young researchers must learn to work in a team with their hierarchical manager.",
                "classification": "CON"
            }
        ]
    }
]

model_answer_27 = [
    {
        "answer": "Lack of funding for junior faculty",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "Insufficient (access to) startup funds means new projects can’t get started",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Junior faculty should have enough funding to jump start their research Continuity",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "There Should be support for junior professors because their impact has longer time as they will eventually spend longer time doing research based at UM6P which should strengthen the ecosystem.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "We can easily find an opportunity if we have solid research proposal",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "the young researcher must learn to work in a team with his or her hierarchical manager",
                "classification": "PRO"
            }
        ]
    }
]
# Process the new test case and update metrics
# Extract the dictionary from the list
local_metrics, metrics = compare(annotated_answer_27, model_answer_27, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 27")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 27:
Answers: {'TP': 1, 'FP': 2, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 2, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 2, 'FN': 1, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 2, 'FN': 1, 'TN': 0}

Updated Metrics:
Answers: {'TP': 27, 'FP': 10, 'FN': 5, 'TN': 0}
Answer_classifications: {'TP': 23, 'FP': 14, 'FN': 5, 'TN': 0}
Arguments: {'TP': 15, 'FP': 31, 'FN': 14, 'TN': 0}
Argument_classifications: {'TP': 13, 'FP': 33, 'FN': 14, 'TN': 0}


#Testcase 28

In [None]:
# Test case 28 dictionary format
annotated_answer_28 = [
    {
        "answer": "New departments that need time to mature and have all necessary equipment.",
        "classification": "Barrier",
        "arguments": [
            {
                "text": "It will absolutely take time and hard work to build a pattern for processing. It takes time for new departments to develop and mature. During this period, there may be a lack of experienced people, which can impact the quality and quantity of research output.",
                "classification": "PRO"
            }
        ]
    }
]

model_answer_28 = [
    {
        "answer": "New departments that need time to mature and have all necessary equipment.",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "It will absolutely take time and hard work to build a pattern for processing. It takes time for new departments to develop and mature. During this period, there may be a lack of experienced people, which can impact the quality and quantity of research output.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
# Extract the dictionary from the list
local_metrics, metrics = compare(annotated_answer_28, model_answer_28, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 28")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")


Metrics for Test Case 28:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 28, 'FP': 10, 'FN': 5, 'TN': 0}
Answer_classifications: {'TP': 24, 'FP': 14, 'FN': 5, 'TN': 0}
Arguments: {'TP': 16, 'FP': 31, 'FN': 14, 'TN': 0}
Argument_classifications: {'TP': 14, 'FP': 33, 'FN': 14, 'TN': 0}


#Test case 29

In [None]:
# Test case 29 dictionary format
annotated_answers_29 = [
    {
        "answer": "Via partnership with different sectors such as industry, administration, NGO, etc.",
        "classification": "solutions",
        "arguments": []
    },
    {
        "answer": "give access to the University for all the stakeholders (companies, other universities, etc...)",
        "classification": "solutions",
        "arguments": []
    }
]

model_answers_29 = [
    {
        "answer": "Via partnership with different sectors such as industry, administration, NGO, etc.",
        "classification": "SOLUTION",
        "arguments": []
    },
    {
        "answer": "give access to the University for all the stakeholders (companies, other universities, etc...)",
        "classification": "SOLUTION",
        "arguments": []
    }
]

# Process the new test case and update metrics
local_metrics, metrics = compare(annotated_answers_29, model_answers_29, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 29")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 29:
Answers: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 0, 'FP': 2, 'FN': 0, 'TN': 0}
Arguments: {'TP': 0, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 30, 'FP': 10, 'FN': 5, 'TN': 0}
Answer_classifications: {'TP': 24, 'FP': 16, 'FN': 5, 'TN': 0}
Arguments: {'TP': 16, 'FP': 31, 'FN': 14, 'TN': 0}
Argument_classifications: {'TP': 14, 'FP': 33, 'FN': 14, 'TN': 0}


#Testcase 30

In [None]:
# Test case 30 dictionary format
annotated_answers_30 = [
    {
        "answer": "Access to high quality students/ Postdoc in Morocco and Africa",
        "classification": "Barriers",
        "arguments": [
            {
                "text": "The Lydex experience shows this fact that it is very difficult to find good quality students and PostDocs",
                "classification": "PRO"
            },
            {
                "text": "In africa and in morocco, high quality students and postdocs are everywhere We shouldn’t blame students for our unsuccessful research.",
                "classification": "CON"
            }
        ]
    }
]

model_answers_30 = [
    {
        "answer": "The choice of highly motivated ambitious students since the recruitment stage and motivate them more",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "Absolutely. Should put in place outreach activities to attract talented students quality students",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "It is very difficult to find good quality students and PostDocs",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "We shouldn’t blame students for our unsuccessful research.",
        "classification": "BARRIER",
        "arguments": []
    }
]

# Process the new test case and update metrics
local_metrics, metrics = compare(annotated_answers_30, model_answers_30, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 30")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 30:
Answers: {'TP': 0, 'FP': 3, 'FN': 1, 'TN': 0}
Answer_classifications: {'TP': 0, 'FP': 3, 'FN': 1, 'TN': 0}
Arguments: {'TP': 0, 'FP': 1, 'FN': 2, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 1, 'FN': 2, 'TN': 0}

Updated Metrics:
Answers: {'TP': 30, 'FP': 13, 'FN': 6, 'TN': 0}
Answer_classifications: {'TP': 24, 'FP': 19, 'FN': 6, 'TN': 0}
Arguments: {'TP': 16, 'FP': 32, 'FN': 16, 'TN': 0}
Argument_classifications: {'TP': 14, 'FP': 34, 'FN': 16, 'TN': 0}


#Testcase 31

In [None]:
# Test case 31 dictionary format
annotated_answers_31 = [
    {
        "answer": "Lack of clear procedures for career development.",
        "classification": "Barriers",
        "arguments": [
            {
                "text": "The staff who do PhD do not have a vision of their future after 4 years and also they do not receive an annual bonus even if they keep their tasks in full or in part and also they do not benefit salary increase throughout the PhD period. Clear career development pathways help with strengthening the academic belonging to his/her institution.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_31 = [
    {
        "answer": "Lack of clear procedures for career development.",
        "classification": "Barriers",
        "arguments": []
    },
    {
        "answer": "For staff who do PhD at UM6P: many disadvantages belonging to UM6P.",
        "classification": "Barriers",
        "arguments": [
            {
                "text": "The staff who do PhD do not have a vision of their future after 4 years and also they do not receive an annual bonus even if they keep their tasks in full or in part and also they do not benefit salary increase throughout the PhD period.",
                "classification": "PRO"
            },
            {
                "text": "Clear career development pathways help with strengthening the academic belonging to his/her institution.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics, metrics = compare(annotated_answers_31, model_answers_31, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 31")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 31:
Answers: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 0, 'FP': 2, 'FN': 1, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 2, 'FN': 1, 'TN': 0}

Updated Metrics:
Answers: {'TP': 31, 'FP': 14, 'FN': 6, 'TN': 0}
Answer_classifications: {'TP': 25, 'FP': 20, 'FN': 6, 'TN': 0}
Arguments: {'TP': 16, 'FP': 34, 'FN': 17, 'TN': 0}
Argument_classifications: {'TP': 14, 'FP': 36, 'FN': 17, 'TN': 0}


#Test case 32

In [None]:
# Test case 32 dictionary format
annotated_answers_32 = [
    {
        "answer": "Trying to merge both the american/moroccan/french systems of promotion.",
        "classification": "Barrier",
        "arguments": []
    },
    {
        "answer": "Research ethics",
        "classification": "Barrier",
        "arguments": [
            {
                "text": "This should not pose a problem if all researchers are trained in the same research ethics from the moment they become involved and if sanctions are applied to those who do not respect this ethical.",
                "classification": "CON"
            }
        ]
    }
]

model_answers_32 = [
    {
        "answer": "Trying to merge both the american/moroccan/french systems of promotion.",
        "classification": "Barrier",
        "arguments": [
            {
                "text": "This is counterproductive.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Research ethics",
        "classification": "Barrier",
        "arguments": [
            {
                "text": "This should not pose a problem if all researchers are trained in the same research ethics from the moment they become involved and if sanctions are applied to those who do not respect this ethical.",
                "classification": "CON"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics, metrics = compare(annotated_answers_32, model_answers_32, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 32")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 32:
Answers: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 33, 'FP': 14, 'FN': 6, 'TN': 0}
Answer_classifications: {'TP': 27, 'FP': 20, 'FN': 6, 'TN': 0}
Arguments: {'TP': 17, 'FP': 35, 'FN': 17, 'TN': 0}
Argument_classifications: {'TP': 15, 'FP': 37, 'FN': 17, 'TN': 0}


#Testcase 33

In [None]:
# Test case 33 dictionary format
annotated_answers_33 = [
    {
        "answer": "Do not make a difference between PhD student and Staff UM6P",
        "classification": "Solutions",
        "arguments": [
            {
                "text": "respect for levels PhD students and staff have distinct resource needs",
                "classification": "CON"
            }
        ]
    }
]

model_answers_33 = [
    {
        "answer": "Do not make a difference between PhD student and Staff UM6P",
        "classification": "Solutions",
        "arguments": [
            {
                "text": "respect for levels PhD students and staff have distinct resource needs",
                "classification": "CON"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics, metrics = compare(annotated_answers_33, model_answers_33, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 33")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 33:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 34, 'FP': 14, 'FN': 6, 'TN': 0}
Answer_classifications: {'TP': 28, 'FP': 20, 'FN': 6, 'TN': 0}
Arguments: {'TP': 18, 'FP': 35, 'FN': 17, 'TN': 0}
Argument_classifications: {'TP': 16, 'FP': 37, 'FN': 17, 'TN': 0}


#Testcase 34

In [None]:
# Test case 34 dictionary format
annotated_answers_34 = [
    {
        "answer": "Building a reputation which is a matter of work and time, taking by consideration the UM6P is a new born comparing to its peers in the fileds.",
        "classification": "Solutions",
        "arguments": [
            {
                "text": "UM6P being a new university can limit the opportunity to publish in the best venues",
                "classification": "PRO"
            },
            {
                "text": "No, the reputation comes from doing quality work, famous institutions often have a long history of research accomplishments, which may overshadow the limited track record of a new institutions.",
                "classification": "CON"
            }
        ]
    }
]

model_answers_34 = [
    {
        "answer": "UM6P being a new university can limit the opportunity to publish in the best venues",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "The reputation or the image if UM6P costs the university a lot money in purchasing equipment and consumables. On average the cost to sell to Um6p is 30% higher than the other universities across Morocco. This increase project costs.",
                "classification": "CON"
            },
            {
                "text": "No, the reputation comes from doing quality work famous institutions often have a long history of research accomplishments, which may overshadow the limited track record of a new institutions.",
                "classification": "CON"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics, metrics = compare(annotated_answers_34, model_answers_34, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 34")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 34:
Answers: {'TP': 0, 'FP': 1, 'FN': 1, 'TN': 0}
Answer_classifications: {'TP': 0, 'FP': 1, 'FN': 1, 'TN': 0}
Arguments: {'TP': 0, 'FP': 2, 'FN': 2, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 2, 'FN': 2, 'TN': 0}

Updated Metrics:
Answers: {'TP': 34, 'FP': 15, 'FN': 7, 'TN': 0}
Answer_classifications: {'TP': 28, 'FP': 21, 'FN': 7, 'TN': 0}
Arguments: {'TP': 18, 'FP': 37, 'FN': 19, 'TN': 0}
Argument_classifications: {'TP': 16, 'FP': 39, 'FN': 19, 'TN': 0}


#Test case 35

In [None]:
# Test case 35 dictionary format
annotated_answers_35 = [
    {
        "answer": "By ensuring a better work environment, the talents would come themselves to join the university and contribute to its development.",
        "classification": "Solution",
        "arguments": [
            {
                "text": "A good work environment can contribute to attracting good students, but it's not sufficient.",
                "classification": "CON"
            }
        ]
    },
    {
        "answer": "A safe space that provides freedom of research plus the financial support are the main attractions of great researchers. Having an environment of this nature will surely attract the best researchers to this ecosystem.",
        "classification": "Solution",
        "arguments": []
    }
]

model_answers_35 = [
    {
        "answer": "By ensuring a better work environment the talents would come themselves to join the University and contribute to its development.",
        "classification": "Solution",
        "arguments": []
    },
    {
        "answer": "A good work environment can contribute to attract good students but it's not sufficient.",
        "classification": "Solution",
        "arguments": []
    },
    {
        "answer": "Safe space that provides freedom of research plus the financial support are the main attractions of great researchers. Having an environment of this nature will surely attract the best researchers to this ecosystem.",
        "classification": "Solution",
        "arguments": []
    }
]

# Process the new test case and update metrics
local_metrics, metrics = compare(annotated_answers_35, model_answers_35, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 35")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 35:
Answers: {'TP': 2, 'FP': 1, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 2, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 0, 'FP': 0, 'FN': 1, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 0, 'FN': 1, 'TN': 0}

Updated Metrics:
Answers: {'TP': 36, 'FP': 16, 'FN': 7, 'TN': 0}
Answer_classifications: {'TP': 30, 'FP': 22, 'FN': 7, 'TN': 0}
Arguments: {'TP': 18, 'FP': 37, 'FN': 20, 'TN': 0}
Argument_classifications: {'TP': 16, 'FP': 39, 'FN': 20, 'TN': 0}


#Test case 36

In [None]:
# Test case 36 dictionary format
annotated_answers_36 = [
    {
        "answer": "Use the unique combination attractive traits of research and highlight the fulfilment felt while working a research project.",
        "classification": "Solutions",
        "arguments": [
            {
                "text": "The fact that UM6p gives a decent living for a PHD student is attractive in itself, but it is necessary to highlight the most attractive trait which is the fulfilment felt from working in a research project.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "This can be done by putting in place programs that connect professors with potential students",
        "classification": "Solutions",
        "arguments": []
    }
]

model_answers_36 = [
    {
        "answer": "Use the unique combination attractive traits of research and highlight the fulfilment felt while working a research project.",
        "classification": "Solutions",
        "arguments": [
            {
                "text": "The fact that UM6p gives a decent living for a PHD student is attractive in itself, but it is necessary to highlight the most attractive trait which is the fulfilment felt from working in a research project.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Putting in place programs that connect professors with potential students",
        "classification": "Solution",
        "arguments": []
    }
]

# Process the new test case and update metrics
local_metrics, metrics = compare(annotated_answers_36, model_answers_36, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 36")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 36:
Answers: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 38, 'FP': 16, 'FN': 7, 'TN': 0}
Answer_classifications: {'TP': 31, 'FP': 23, 'FN': 7, 'TN': 0}
Arguments: {'TP': 19, 'FP': 37, 'FN': 20, 'TN': 0}
Argument_classifications: {'TP': 17, 'FP': 39, 'FN': 20, 'TN': 0}


#Testcase 37

In [None]:
# Test case 37 dictionary format
annotated_answers_37 = [
    {
        "answer": "Create a UM6P Analytics platform to allow all UM6P doctoral students to access any device and avoid customization of equipment.",
        "classification": "Solution",
        "arguments": [
            {
                "text": "The more tools at the disposal of student the more application possibilities will emerge.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_37 = [
    {
        "answer": "Create a UM6P Analytics platform to allow all UM6P doctoral students to access any device and avoid customization of equipment.",
        "classification": "Solution",
        "arguments": [
            {
                "text": "The more tools at the disposal of student the more application possibilities will amerge.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics, metrics = compare(annotated_answers_37, model_answers_37, metrics)

# Display metrics for this test case
display_metrics(local_metrics, "Metrics for Test Case 37")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 37:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 39, 'FP': 16, 'FN': 7, 'TN': 0}
Answer_classifications: {'TP': 32, 'FP': 23, 'FN': 7, 'TN': 0}
Arguments: {'TP': 20, 'FP': 37, 'FN': 20, 'TN': 0}
Argument_classifications: {'TP': 18, 'FP': 39, 'FN': 20, 'TN': 0}


#testcase 38

In [None]:
# Test case 38 dictionary format
annotated_answers_38 = [
    {
        "answer": "Provide a safe space for scientific staff and PhD students to work and suggest ideas.",
        "classification": "Solutions",
        "arguments": [
            {
                "text": "The scientist needs to have the research facilities and a good environment to do his work properly.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_38 = [
    {
        "answer": "Provide a safe space for scientific staff and PhD students to work and suggest ideas.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "The scientist needs to have the research facilities and a good environment to do his work properly.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_38, metrics = compare(annotated_answers_38, model_answers_38, metrics)

# Display metrics for this test case
display_metrics(local_metrics_38, "Metrics for Test Case 38")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 38:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 0, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 40, 'FP': 16, 'FN': 7, 'TN': 0}
Answer_classifications: {'TP': 32, 'FP': 24, 'FN': 7, 'TN': 0}
Arguments: {'TP': 21, 'FP': 37, 'FN': 20, 'TN': 0}
Argument_classifications: {'TP': 19, 'FP': 39, 'FN': 20, 'TN': 0}


#Testcase 39

In [None]:
# Test case 39 dictionary format
annotated_answers_39 = [
    {
        "answer": "the number of job offers that our graduates receive",
        "classification": "Metrics",
        "arguments": [
            {
                "text": "Number of jobs received reflects the value of UM6P within the labor market",
                "classification": "PRO"
            },
            {
                "text": "The quantity does not reflect the quality of the offers",
                "classification": "CON"
            },
            {
                "text": "Are these offers consistent with the field and level of studies?",
                "classification": "CON"
            }
        ]
    }
]

model_answers_39 = [
    {
        "answer": "the number of job offers that our graduates receive",
        "classification": "METRIC",
        "arguments": [
            {
                "text": "Number of jobs received reflects the value of UM6P within the labor market",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Are these offers consistent with the field and level of studies?",
        "classification": "METRIC",
        "arguments": [
            {
                "text": "The quantity does not reflect the quality of the offers",
                "classification": "CON"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_39, metrics = compare(annotated_answers_39, model_answers_39, metrics)

# Display metrics for this test case
display_metrics(local_metrics_39, "Metrics for Test Case 39")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 39:
Answers: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 0, 'FP': 2, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 1, 'FN': 2, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 1, 'FN': 2, 'TN': 0}

Updated Metrics:
Answers: {'TP': 41, 'FP': 17, 'FN': 7, 'TN': 0}
Answer_classifications: {'TP': 32, 'FP': 26, 'FN': 7, 'TN': 0}
Arguments: {'TP': 22, 'FP': 38, 'FN': 22, 'TN': 0}
Argument_classifications: {'TP': 20, 'FP': 40, 'FN': 22, 'TN': 0}


#Testcase 40

In [None]:
# Test case 40 dictionary format
annotated_answers_40 = [
    {
        "answer": "Achieve international accreditation for all our programs (undergrad and post grad)",
        "classification": "Solution",
        "arguments": []
    },
    {
        "answer": "Yes UM6P should follow the international universities courses and programs to have the international accreditation and also to have be able to do students exchange with different universities worldwide",
        "classification": "Solution",
        "arguments": []
    }
]

model_answers_40 = [
    {
        "answer": "Achieve international accreditation for all our programs (undergrad and post grad)",
        "classification": "METRIC",
        "arguments": [
            {
                "text": "important KPI for recognition of UM6P programs",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "UM6P should follow the international universities courses and programs to have the international accreditation and also to have be able to do students exchange with different universities worldwide",
        "classification": "SOLUTION",
        "arguments": []
    }
]

# Process the new test case and update metrics
local_metrics_40, metrics = compare(annotated_answers_40, model_answers_40, metrics)

# Display metrics for this test case
display_metrics(local_metrics_40, "Metrics for Test Case 40")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 40:
Answers: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 0, 'FP': 1, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 1, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 43, 'FP': 17, 'FN': 7, 'TN': 0}
Answer_classifications: {'TP': 33, 'FP': 27, 'FN': 7, 'TN': 0}
Arguments: {'TP': 22, 'FP': 39, 'FN': 22, 'TN': 0}
Argument_classifications: {'TP': 20, 'FP': 41, 'FN': 22, 'TN': 0}


#Testcase 41

In [None]:
# Test case 41 dictionary format
annotated_answers_41 = [
    {
        "answer": "Implementing an international curriculum allows students to gain a broader understanding of global issues, different cultural perspectives, and diverse approaches to learning.",
        "classification": "Solution",
        "arguments": [
            {
                "text": "It helps foster a more inclusive and globally aware student body, enhancing the overall quality of education.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Establishing partnerships with renowned international universities or institutions can demonstrate UM6Ps commitment to providing a global education. Collaborative research projects, joint degree programs, and faculty exchanges are potential measures of success in fostering international collaborations.",
        "classification": "Solution",
        "arguments": []
    }
]

model_answers_41 = [
    {
        "answer": "Implementing an international curriculum allows students to gain a broader understanding of global issues, different cultural perspectives, and diverse approaches to learning.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "It helps foster a more inclusive and globally aware student body, enhancing the overall quality of education.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Establishing partnerships with renowned international universities or institutions can demonstrate UM6Ps commitment to providing a global education.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "Collaborative research projects, joint degree programs, and faculty exchanges are potential measures of success in fostering international collaborations.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_41, metrics = compare(annotated_answers_41, model_answers_41, metrics)

# Display metrics for this test case
display_metrics(local_metrics_41, "Metrics for Test Case 41")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 41:
Answers: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 45, 'FP': 17, 'FN': 7, 'TN': 0}
Answer_classifications: {'TP': 35, 'FP': 27, 'FN': 7, 'TN': 0}
Arguments: {'TP': 23, 'FP': 40, 'FN': 22, 'TN': 0}
Argument_classifications: {'TP': 21, 'FP': 42, 'FN': 22, 'TN': 0}


#Testcase 42

In [None]:
# Test case 42 dictionary format
annotated_answers_42 = [
    {
        "answer": "The lack of transparency and limited access to resources across different departments hinders student’s ability to benefit from the expertise and opportunities available in various areas.",
        "classification": "Barrier",
        "arguments": [
            {
                "text": "By promoting transparency and facilitating cross-department collaboration, students can easily access the resources and support they need, enabling them to achieve their goals more effectively.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_42 = [
    {
        "answer": "The lack of transparency and limited access to resources across different departments hinders students' ability to benefit from the expertise and opportunities available in various areas.",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "By promoting transparency and facilitating cross-department collaboration, students can easily access the resources and support they need, enabling them to achieve their goals more effectively.",
        "classification": "SOLUTION",
        "arguments": []
    }
]

# Process the new test case and update metrics
local_metrics_42, metrics = compare(annotated_answers_42, model_answers_42, metrics)

# Display metrics for this test case
display_metrics(local_metrics_42, "Metrics for Test Case 42")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 42:
Answers: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 0, 'FP': 0, 'FN': 1, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 0, 'FN': 1, 'TN': 0}

Updated Metrics:
Answers: {'TP': 46, 'FP': 18, 'FN': 7, 'TN': 0}
Answer_classifications: {'TP': 36, 'FP': 28, 'FN': 7, 'TN': 0}
Arguments: {'TP': 23, 'FP': 40, 'FN': 23, 'TN': 0}
Argument_classifications: {'TP': 21, 'FP': 42, 'FN': 23, 'TN': 0}


#Test case 43

In [None]:
# Test case 43 dictionary format
annotated_answers_43 = [
    {
        "answer": "For UM6P to be successful in both teaching and student success, more students should be involved in decision-making. Only certain students are permitted to offer their opinions and contribute to decision-making. Not every student can contribute in the same way.",
        "classification": "Solutions",
        "arguments": [
            {
                "text": "The lack of connection with students, especially given the volume of emails, makes it difficult for them to identify relevant opportunities.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "lack of communication challenge rather than an absence of opportunities.",
        "classification": "Barriers",
        "arguments": [
            {
                "text": "There is a program encouraging volunteering among UM6P students, to work with associations, and do some social work, the initiative is called SEP, we count today more than 40 students who participated UM6P offers many opportunities to students. There are departments whose activity is to help students launch clubs, social initiatives, and start-ups.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_43 = [
    {
        "answer": "More students should be involved in decision-making.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "Only certain students are permitted to offer their opinions and contribute to decision-making.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "The lack of connection with students, especially given the volume of emails, makes it difficult for them to identify relevant opportunities.",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "Absence for opportunities for students to have social interactions and activities in the region.",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "The absence of an opportunity is an opportunity in itself to be created.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "There is a program encouraging volunteering among UM6P students, to work with associations, and do some social work, the initiative is called SEP, we count today more than 40 students who participated.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "UM6P offers many opportunities to students.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "There are departments whose activity is to help students launch clubs, social initiatives, and start-ups.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Maybe this is a lack of communication challenge rather than an absence of opportunities.",
        "classification": "BARRIER",
        "arguments": []
    }
]

# Process the new test case and update metrics
local_metrics_43, metrics = compare(annotated_answers_43, model_answers_43, metrics)

# Display metrics for this test case
display_metrics(local_metrics_43, "Metrics for Test Case 43")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 43:
Answers: {'TP': 1, 'FP': 5, 'FN': 1, 'TN': 0}
Answer_classifications: {'TP': 0, 'FP': 6, 'FN': 1, 'TN': 0}
Arguments: {'TP': 0, 'FP': 3, 'FN': 2, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 3, 'FN': 2, 'TN': 0}

Updated Metrics:
Answers: {'TP': 47, 'FP': 23, 'FN': 8, 'TN': 0}
Answer_classifications: {'TP': 36, 'FP': 34, 'FN': 8, 'TN': 0}
Arguments: {'TP': 23, 'FP': 43, 'FN': 25, 'TN': 0}
Argument_classifications: {'TP': 21, 'FP': 45, 'FN': 25, 'TN': 0}


#Test case 44

In [None]:
# Test case 44 dictionary format
annotated_answers_44 = [
    {
        "answer": "One idea to address the lack of communication and support in universities is to establish a student-led support team. This team would consist of dedicated students who are available to assist their peers in accessing various resources and providing guidance when needed.",
        "classification": "Solution",
        "arguments": [
            {
                "text": "By creating such a team, students can benefit from improved communication channels and receive the necessary support to navigate their academic journey.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_44 = [
    {
        "answer": "One idea to address the lack of communication and support in universities is to establish a student-led support team.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "This team would consist of dedicated students who are available to assist their peers in accessing various resources and providing guidance when needed.",
                "classification": "PRO"
            },
            {
                "text": "By creating such a team, students can benefit from improved communication channels and receive the necessary support to navigate their academic journey.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_44, metrics = compare(annotated_answers_44, model_answers_44, metrics)

# Display metrics for this test case
display_metrics(local_metrics_44, "Metrics for Test Case 44")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")







Metrics for Test Case 44:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 48, 'FP': 23, 'FN': 8, 'TN': 0}
Answer_classifications: {'TP': 37, 'FP': 34, 'FN': 8, 'TN': 0}
Arguments: {'TP': 24, 'FP': 44, 'FN': 25, 'TN': 0}
Argument_classifications: {'TP': 22, 'FP': 46, 'FN': 25, 'TN': 0}


#Test case 45

In [None]:
# Test case 45 dictionary format
annotated_answers_45 = [
    {
        "answer": "Graduate Entrepreneurship Rate",
        "classification": "Solution",
        "arguments": []
    },
    {
        "answer": "Partnerships and Collaborations",
        "classification": "Solution",
        "arguments": []
    },
    {
        "answer": "Student Competitions and Awards",
        "classification": "Solution",
        "arguments": []
    },
    {
        "answer": "Alumni Engagement",
        "classification": "Solution",
        "arguments": []
    },
    {
        "answer": "Research Publications and Citations",
        "classification": "Solution",
        "arguments": []
    },
    {
        "answer": "Stakeholder Surveys",
        "classification": "Solution",
        "arguments": []
    },
    {
        "answer": "Incubators and Accelerators",
        "classification": "Solution",
        "arguments": []
    },
    {
        "answer": "Social Impact Ventures and Start-up Success",
        "classification": "Solution",
        "arguments": []
    }
]

model_answers_45 = [
    {
        "answer": "Graduate Entrepreneurship Rate",
        "classification": "METRIC",
        "arguments": []
    },
    {
        "answer": "Partnerships and Collaborations",
        "classification": "METRIC",
        "arguments": []
    },
    {
        "answer": "Student Competitions and Awards",
        "classification": "METRIC",
        "arguments": []
    },
    {
        "answer": "Alumni Engagement",
        "classification": "METRIC",
        "arguments": []
    },
    {
        "answer": "Research Publications and Citations",
        "classification": "METRIC",
        "arguments": []
    },
    {
        "answer": "Stakeholder Surveys",
        "classification": "METRIC",
        "arguments": []
    },
    {
        "answer": "Incubators and Accelerators",
        "classification": "METRIC",
        "arguments": []
    },
    {
        "answer": "Social Impact Ventures and Start-up Success",
        "classification": "METRIC",
        "arguments": []
    }
]

# Process the new test case and update metrics
local_metrics_45, metrics = compare(annotated_answers_45, model_answers_45, metrics)

# Display metrics for this test case
display_metrics(local_metrics_45, "Metrics for Test Case 45")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 45:
Answers: {'TP': 8, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 0, 'FP': 8, 'FN': 0, 'TN': 0}
Arguments: {'TP': 0, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 56, 'FP': 23, 'FN': 8, 'TN': 0}
Answer_classifications: {'TP': 37, 'FP': 42, 'FN': 8, 'TN': 0}
Arguments: {'TP': 24, 'FP': 44, 'FN': 25, 'TN': 0}
Argument_classifications: {'TP': 22, 'FP': 46, 'FN': 25, 'TN': 0}


#Test case 46

In [None]:
# Test case 46 dictionary format
annotated_answers_46 = [
    {
        "answer": "Develop the entrepreneurial spirit among students",
        "classification": "Solution",
        "arguments": []
    },
    {
        "answer": "Create classroom spaces to exchange ideas or projects",
        "classification": "Solution",
        "arguments": []
    },
    {
        "answer": "Facilitate financing for the different project leaders after having validated the business plan",
        "classification": "Solution",
        "arguments": [
            {
                "text": "The proposal mainly reflects encouragement and not the measure of impact",
                "classification": "CON"
            }
        ]
    }
]

model_answers_46 = [
    {
        "answer": "Develop the entrepreneurial spirit among students",
        "classification": "SOLUTION",
        "arguments": []
    },
    {
        "answer": "Create classroom spaces to exchange ideas or projects",
        "classification": "SOLUTION",
        "arguments": []
    },
    {
        "answer": "Facilitate financing for the different project leaders after having validated the business plan",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "The proposal mainly reflects encouragement and not the measure of impact.",
                "classification": "CON"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_46, metrics = compare(annotated_answers_46, model_answers_46, metrics)

# Display metrics for this test case
display_metrics(local_metrics_46, "Metrics for Test Case 46")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 46:
Answers: {'TP': 3, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 3, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 59, 'FP': 23, 'FN': 8, 'TN': 0}
Answer_classifications: {'TP': 40, 'FP': 42, 'FN': 8, 'TN': 0}
Arguments: {'TP': 25, 'FP': 44, 'FN': 25, 'TN': 0}
Argument_classifications: {'TP': 23, 'FP': 46, 'FN': 25, 'TN': 0}


#Testcase 47

In [None]:
# Test case 47 dictionary format
annotated_answers_47 = [
    {
        "answer": "Number of startups created and funded in the UM6P ecosystem",
        "classification": "Metrics",
        "arguments": []
    },
    {
        "answer": "Real and Impactful startups",
        "classification": "Metrics",
        "arguments": [
            {
                "text": "Entrepreneurial mindset development is more important. Startups are not sufficient for entrepreneurship to raise funds to set up an industrial or service business with an income statement and an investment plan and real commercial development to ensure the viability of entrepreneurs ale in time Following up the startup more important of the number of creation, because every day thousand of startup born but 1 or 2 continues",
                "classification": "CON"
            }
        ]
    }
]

model_answers_47 = [
    {
        "answer": "Number of startups created and funded in the UM6P ecosystem",
        "classification": "Metrics",
        "arguments": []
    },
    {
        "answer": "Real and Impactful startups",
        "classification": "Metrics",
        "arguments": []
    },
    {
        "answer": "Entrepreneurial mindset development is more important.",
        "classification": "Solution",
        "arguments": [
            {
                "text": "Startups are not sufficient for entrepreneurship to raise funds to set up an industrial or service business with an income statement and an investment plan and real commercial development to ensure the viability of entrepreneurs ale in time.",
                "classification": "PRO"
            },
            {
                "text": "Following up the startup more important of the number of creation, because every day thousand of startup born but 1 or 2 continues.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_47, metrics = compare(annotated_answers_47, model_answers_47, metrics)

# Display metrics for this test case
display_metrics(local_metrics_47, "Metrics for Test Case 47")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 47:
Answers: {'TP': 2, 'FP': 1, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 2, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 0, 'FP': 2, 'FN': 1, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 2, 'FN': 1, 'TN': 0}

Updated Metrics:
Answers: {'TP': 61, 'FP': 24, 'FN': 8, 'TN': 0}
Answer_classifications: {'TP': 42, 'FP': 43, 'FN': 8, 'TN': 0}
Arguments: {'TP': 25, 'FP': 46, 'FN': 26, 'TN': 0}
Argument_classifications: {'TP': 23, 'FP': 48, 'FN': 26, 'TN': 0}


#Testcase 48

In [None]:
# Test case 48 dictionary format
annotated_answers_48 = [
    {
        "answer": "Nb of startup launched in a period of time",
        "classification": "Metrics",
        "arguments": [
            {
                "text": "That shows how dynamic the entrepreneurial ecosystem is.",
                "classification": "PRO"
            },
            {
                "text": "Other factors such as the quality and viability of start-ups and their survival and growth rate are more relevant. Number but also thematic and business plan.",
                "classification": "CON"
            },
            {
                "text": "The number of startups, an insufficient criterion, is it viable over time, economically, are we addressing an essential issue for Morocco, an example of sustainable development.",
                "classification": "CON"
            }
        ]
    }
]

model_answers_48 = [
    {
        "answer": "Nb of startup launched in a period of time",
        "classification": "Metrics",
        "arguments": [
            {
                "text": "That shows how dynamic the entrepreneurial ecosystem is.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Other factors such as the quality and viability of start-ups and their survival and growth rate are more relevant.",
        "classification": "Metrics",
        "arguments": [
            {
                "text": "Number but also thematic and business plan.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "The number of startups is an insufficient criterion; is it viable over time, economically, are we addressing an essential issue for Morocco, an example of sustainable development",
        "classification": "METRIC",
        "arguments": []
    }
]

# Process the new test case and update metrics
local_metrics_48, metrics = compare(annotated_answers_48, model_answers_48, metrics)

# Display metrics for this test case
display_metrics(local_metrics_48, "Metrics for Test Case 48")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 48:
Answers: {'TP': 1, 'FP': 2, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 2, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 1, 'FN': 2, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 1, 'FN': 2, 'TN': 0}

Updated Metrics:
Answers: {'TP': 62, 'FP': 26, 'FN': 8, 'TN': 0}
Answer_classifications: {'TP': 43, 'FP': 45, 'FN': 8, 'TN': 0}
Arguments: {'TP': 26, 'FP': 47, 'FN': 28, 'TN': 0}
Argument_classifications: {'TP': 24, 'FP': 49, 'FN': 28, 'TN': 0}


#Test case 49

In [None]:
# Test case 49 dictionary format
annotated_answers_49 = [
    {
        "answer": "To measure UM6P success in entrepreneurship, we need to see if our startups are able to scale up to industrialization and being able to connect with other ecosystems (Moroccan & African ones)",
        "classification": "Metric",
        "arguments": []
    },
    {
        "answer": "This is the major indicator: turnover out of the OCP ecosystem",
        "classification": "Metric",
        "arguments": []
    },
    {
        "answer": "Scale up as well as startup which can long last over the longer periods of time",
        "classification": "Metric",
        "arguments": []
    }
]

model_answers_49 = [
    {
        "answer": "To measure UM6P success in entrepreneurship, we need to see if our startups are able to scale up to industrialization and being able to connect with other ecosystems (Moroccan & African ones)",
        "classification": "METRIC",
        "arguments": [
            {
                "text": "This is the major indicator: torn over out of the OCP ecosystem",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Scale up as well as startup which can long last over the longer periods of time",
        "classification": "METRIC",
        "arguments": []
    }
]

# Process the new test case and update metrics
local_metrics_49, metrics = compare(annotated_answers_49, model_answers_49, metrics)

# Display metrics for this test case
display_metrics(local_metrics_49, "Metrics for Test Case 49")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 49:
Answers: {'TP': 2, 'FP': 0, 'FN': 1, 'TN': 0}
Answer_classifications: {'TP': 2, 'FP': 0, 'FN': 1, 'TN': 0}
Arguments: {'TP': 0, 'FP': 1, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 1, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 64, 'FP': 26, 'FN': 9, 'TN': 0}
Answer_classifications: {'TP': 45, 'FP': 45, 'FN': 9, 'TN': 0}
Arguments: {'TP': 26, 'FP': 48, 'FN': 28, 'TN': 0}
Argument_classifications: {'TP': 24, 'FP': 50, 'FN': 28, 'TN': 0}


#Testcase 50

In [None]:
# Test case 50 dictionary format
annotated_answers_50 = [
    {
        "answer": "Creating a local ecosystem is the best way to measure success in terms of entrepreneurship impact.",
        "classification": "Solution",
        "arguments": [
            {
                "text": "I sense a massive interest in numbers like the total number of startups incubated and the amount of budget invested. I urge the community in this exercise to consider those as vanity metrics. The real, sustainable value is creating an ecosystem, and one of the best ways to measure our success is through measuring the interactions across the ecosystem we are creating.",
                "classification": "PRO"
            },
            {
                "text": "This idea is important so that we don't fall into the trap of thinking we are successful while our success is fragmented through some numbers. Established ecosystems in Silicon Valley, for example, can rely on total numbers of investments and incubated startups. However, in our case, creating a solid ecosystem in Ben Guerir is the most important step ahead, even at the price of lower numbers in investment.",
                "classification": "CON"
            }
        ]
    }
]

model_answers_50 = [
    {
        "answer": "Creation a Local entrepreneurship Ecosystem",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "Creating a local ecosystem is the best way to measure success in terms of entrepreneurship impact",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Creating a local ecosystem is the best way to measure success in terms of entrepreneurship impact",
        "classification": "METRIC",
        "arguments": [
            {
                "text": "I sense a massive interest in numbers like the total number of startups incubated and the amount of budget invested, I urge the community in this exercise to consider those as vanity metrics, the real, sustainable( in the time) value is creating an ecosystem and one of the best ways to measure our success in this is through measuring the interactions across the ecosystem we are creating",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Creating a solid ecosystem in Bengeurir is the most important and step ahead, even at the price of lower number in investments",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "this idea is important so that we don't fall into the trap of thinking we are successful while our success is fragmented through some numbers, established ecosystems in silicon valley for example can rely on total numbers of investments and incubated startups, however in our case creating a solid ecosystem in Bengeurir is the most important and step ahead, even at the price of lower number in investments",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_50, metrics = compare(annotated_answers_50, model_answers_50, metrics)

# Display metrics for this test case
display_metrics(local_metrics_50, "Metrics for Test Case 50")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 50:
Answers: {'TP': 1, 'FP': 2, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 0, 'FP': 3, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 2, 'FN': 1, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 2, 'FN': 1, 'TN': 0}

Updated Metrics:
Answers: {'TP': 65, 'FP': 28, 'FN': 9, 'TN': 0}
Answer_classifications: {'TP': 45, 'FP': 48, 'FN': 9, 'TN': 0}
Arguments: {'TP': 27, 'FP': 50, 'FN': 29, 'TN': 0}
Argument_classifications: {'TP': 25, 'FP': 52, 'FN': 29, 'TN': 0}


#Testcase 51

In [None]:
# Test case 51 dictionary format
annotated_answers_51 = [
    {
        "answer": "Number of IPs based on which startups are created.",
        "classification": "Metrics",
        "arguments": [
            {
                "text": "the number of IPs my be a vanity metric that dosent represnt the vlaue of the impact",
                "classification": "CON"
            },
            {
                "text": "IPs are important mybe hard to translate to value",
                "classification": "CON"
            }
        ]
    },
    {
        "answer": "Value creation: jobs, increase competitiveness",
        "classification": "Solution",
        "arguments": []
    },
    {
        "answer": "Savings to industriels using solutions offered by researchers and engineers from UM6P",
        "classification": "Solution",
        "arguments": []
    }
]

model_answers_51 = [
    {
        "answer": "Number of IPs based on which startups are created",
        "classification": "Metrics",
        "arguments": [
            {
                "text": "the number of IPs my be a vanity metric that dosent represnt the vlaue of the impact",
                "classification": "CON"
            },
            {
                "text": "IPs are important mybe hard to translate to value",
                "classification": "CON"
            }
        ]
    },
    {
        "answer": "Savings to industriels using solutions offered by researchers and engineers from UM6P",
        "classification": "METRIC",
        "arguments": []
    },
    {
        "answer": "Value creation: jobs, increase competitiveness",
        "classification": "METRIC",
        "arguments": []
    }
]

# Process the new test case and update metrics
local_metrics_51, metrics = compare(annotated_answers_51, model_answers_51, metrics)

# Display metrics for this test case
display_metrics(local_metrics_51, "Metrics for Test Case 51")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 51:
Answers: {'TP': 3, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 2, 'FN': 0, 'TN': 0}
Arguments: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 68, 'FP': 28, 'FN': 9, 'TN': 0}
Answer_classifications: {'TP': 46, 'FP': 50, 'FN': 9, 'TN': 0}
Arguments: {'TP': 29, 'FP': 50, 'FN': 29, 'TN': 0}
Argument_classifications: {'TP': 27, 'FP': 52, 'FN': 29, 'TN': 0}


#Testcase 52

In [None]:
# Test case 52 dictionary format
annotated_answers_52 = [
    {
        "answer": "Number of listed companies generated via UM6P that are quoted on a stock exchange.",
        "classification": "Metric",
        "arguments": [
            {
                "text": "If UM6P creates companies and contributes directly to GDP in Morocco and Africa, it will have a social impact and improve livelihoods, working towards zero poverty and other SDGs.",
                "classification": "PRO"
            },
            {
                "text": "This number is far-fetched, and we will have to wait a couple of years to start monitoring this number and measure our success according to it. What we need right now is another metric to measure our progress towards this goal.",
                "classification": "CON"
            }
        ]
    }
]

model_answers_52 = [
    {
        "answer": "Number of listed companies generated via UM6P that are quoted on a stock exchange.",
        "classification": "Metric",
        "arguments": [
            {
                "text": "If UM6P create companies and contribute directly to GDP in Morocco and Africa hence it will have a social impact and improving livelihood towards zero poverty and other SDGs.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "what we need right now is another number to measure our progress towards this number.",
        "classification": "Metric",
        "arguments": [
            {
                "text": "This number is far fetch and we will have to wait for couple of years to be able to start monitoring this number and measure our success according to it.",
                "classification": "CON"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_52, metrics = compare(annotated_answers_52, model_answers_52, metrics)

# Display metrics for this test case
display_metrics(local_metrics_52, "Metrics for Test Case 52")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 52:
Answers: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 1, 'FN': 1, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 1, 'FN': 1, 'TN': 0}

Updated Metrics:
Answers: {'TP': 69, 'FP': 29, 'FN': 9, 'TN': 0}
Answer_classifications: {'TP': 47, 'FP': 51, 'FN': 9, 'TN': 0}
Arguments: {'TP': 30, 'FP': 51, 'FN': 30, 'TN': 0}
Argument_classifications: {'TP': 28, 'FP': 53, 'FN': 30, 'TN': 0}


#Testcase 53

In [None]:
# Test case 53 dictionary format
annotated_answers_53 = [
    {
        "answer": "To be successful, our impact should be seen first on the region where we are (Benguerir). We cannot target Morocco & Africa without having a visible impact on the Rehamna region.",
        "classification": "Metric",
        "arguments": [
            {
                "text": "If we fail to impact the local community in Rhamna, then we will definitely fail to impact any larger scale. Creating the local ecosystem in Benguerir is key, and progress will be based on it.",
                "classification": "PRO"
            },
            {
                "text": "Our impact should go beyond Rhamna. Indeed, Rhamna is a living lab, but developed solutions should target other regions, especially since UM6P is now present in different cities and continents. A business by nature obeys a market.",
                "classification": "CON"
            },
            {
                "text": "Of course, the Rhamna region must benefit from development if possible, but a business first corresponds to a market by nature, allowing marketing to define the most appropriate market whether local or not.",
                "classification": "CON"
            }
        ]
    }
]

model_answers_53 = [
    {
        "answer": "our impact should be seen first on the region where we are (Benguerir). We cannot target Morocco & Africa without having a visible impact on rehamna region. If we fail to impact the local community in Rhamna then we will definately fail to impact any larger scale, creating the local ecosystem in bengeurir is key and progress will be based on it.",
        "classification": "Metric",
        "arguments": []
    },
    {
        "answer": "Our impact should go beyond Rhamna. Indeed Rhamna is a living lab, but developped solutions should target other regions especialy that the UM6P is now on different cities and continents.",
        "classification": "SOLUTION",
        "arguments": []
    },
    {
        "answer": "A business by nature obeys a market.",
        "classification": "SOLUTION",
        "arguments": []
    },
    {
        "answer": "a business first corresponds to a market by nature for marketing to define the most appropriate market whether local or not.",
        "classification": "SOLUTION",
        "arguments": []
    }
]

# Process the new test case and update metrics
local_metrics_53, metrics = compare(annotated_answers_53, model_answers_53, metrics)

# Display metrics for this test case
display_metrics(local_metrics_53, "Metrics for Test Case 53")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 53:
Answers: {'TP': 1, 'FP': 3, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 3, 'FN': 0, 'TN': 0}
Arguments: {'TP': 0, 'FP': 0, 'FN': 3, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 0, 'FN': 3, 'TN': 0}

Updated Metrics:
Answers: {'TP': 70, 'FP': 32, 'FN': 9, 'TN': 0}
Answer_classifications: {'TP': 48, 'FP': 54, 'FN': 9, 'TN': 0}
Arguments: {'TP': 30, 'FP': 51, 'FN': 33, 'TN': 0}
Argument_classifications: {'TP': 28, 'FP': 53, 'FN': 33, 'TN': 0}


#Test case 54

In [None]:
# Test case 54 dictionary format
annotated_answers_54 = [
    {
        "answer": "Number of smallholder farmers impacted. The biggest college at UM6P is SAES, hence the direct impact on smallholders in Morocco and Africa should be a crucial KPI to measure the university's impact.",
        "classification": "Metric",
        "arguments": [
            {
                "text": "We should set the number of farmers as a KPI and also measure how we changed the farmer's livelihood. The number is good, but the impact at the level of each farmer is even more important. Small producers need to see a significant impact.",
                "classification": "CON"
            }
        ]
    }
]

model_answers_54 = [
    {
        "answer": "Number of smallholder farmers impacted",
        "classification": "METRIC",
        "arguments": [
            {
                "text": "Biggest college at UM6P is SAES, hence the direct impact on smallholders in Morocco and Africa should be a crucial KPI to measure the university impact.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "We should set the number of farmers as a KPI and also how did we change the farmers livelihood?",
        "classification": "METRIC",
        "arguments": [
            {
                "text": "The number is good, the impact at the level of each farmer is even more important.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Small producers what a significant impact.",
        "classification": "METRIC",
        "arguments": []
    },
    {
        "answer": "50% of the African continent works in the agricultural sector from production to sale on the markets...sometimes small technological improvements human social educational and of course climatic and average soils of substances and plant production and humans have an exponential impact on the future of the continent",
        "classification": "SOLUTION",
        "arguments": []
    }
]

# Process the new test case and update metrics
local_metrics_54, metrics = compare(annotated_answers_54, model_answers_54, metrics)

# Display metrics for this test case
display_metrics(local_metrics_54, "Metrics for Test Case 54")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 54:
Answers: {'TP': 0, 'FP': 4, 'FN': 1, 'TN': 0}
Answer_classifications: {'TP': 0, 'FP': 4, 'FN': 1, 'TN': 0}
Arguments: {'TP': 0, 'FP': 2, 'FN': 1, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 2, 'FN': 1, 'TN': 0}

Updated Metrics:
Answers: {'TP': 70, 'FP': 36, 'FN': 10, 'TN': 0}
Answer_classifications: {'TP': 48, 'FP': 58, 'FN': 10, 'TN': 0}
Arguments: {'TP': 30, 'FP': 53, 'FN': 34, 'TN': 0}
Argument_classifications: {'TP': 28, 'FP': 55, 'FN': 34, 'TN': 0}


#Test case 55

In [None]:
# Test case 55 dictionary format
annotated_answers_55 = [
    {
        "answer": "The impact or possible future impact of my research, projects, and actions, what actions I am taking or will take in the future that could have an impact",
        "classification": "Metrics",
        "arguments": [
            {
                "text": "Most research done in Morocco has no real impact in society which is a waste of money but Um6p must be different. Action based research and project including give a PhD degree based on relevant startup and give points on KPI to teachers for enabling startups scientific basis and innovation centric.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_55 = [
    {
        "answer": "give a PhD degree based on relevant startup",
        "classification": "SOLUTION",
        "arguments": []
    },
    {
        "answer": "give points on KPI to teachers for enabling startups scientific basis and innovation centric",
        "classification": "SOLUTION",
        "arguments": []
    }
]

# Process the new test case and update metrics
local_metrics_55, metrics = compare(annotated_answers_55, model_answers_55, metrics)

# Display metrics for this test case
display_metrics(local_metrics_55, "Metrics for Test Case 55")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 55:
Answers: {'TP': 0, 'FP': 2, 'FN': 1, 'TN': 0}
Answer_classifications: {'TP': 0, 'FP': 2, 'FN': 1, 'TN': 0}
Arguments: {'TP': 0, 'FP': 0, 'FN': 1, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 0, 'FN': 1, 'TN': 0}

Updated Metrics:
Answers: {'TP': 70, 'FP': 38, 'FN': 11, 'TN': 0}
Answer_classifications: {'TP': 48, 'FP': 60, 'FN': 11, 'TN': 0}
Arguments: {'TP': 30, 'FP': 53, 'FN': 35, 'TN': 0}
Argument_classifications: {'TP': 28, 'FP': 55, 'FN': 35, 'TN': 0}


#Test case 56

In [None]:
# Test case 56 dictionary format
annotated_answers_56 = [
    {
        "answer": "Diversity : Diversity to have a holistic approach to the problems to be solved",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "Diversity is the problem, because we should know the problem and find a solution not just speak about diversity and loose time and ideas",
                "classification": "CON"
            }
        ]
    }
]

model_answers_56 = [
    {
        "answer": "Diversity",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "Diversity to have a holistic approach to the problems to be solved",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Diversity is the problem",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "because we should know the problem and find a solution not just speak about diversity and loose time and ideas",
                "classification": "CON"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_56, metrics = compare(annotated_answers_56, model_answers_56, metrics)

# Display metrics for this test case
display_metrics(local_metrics_56, "Metrics for Test Case 56")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 56:
Answers: {'TP': 0, 'FP': 2, 'FN': 1, 'TN': 0}
Answer_classifications: {'TP': 0, 'FP': 2, 'FN': 1, 'TN': 0}
Arguments: {'TP': 0, 'FP': 2, 'FN': 1, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 2, 'FN': 1, 'TN': 0}

Updated Metrics:
Answers: {'TP': 70, 'FP': 40, 'FN': 12, 'TN': 0}
Answer_classifications: {'TP': 48, 'FP': 62, 'FN': 12, 'TN': 0}
Arguments: {'TP': 30, 'FP': 55, 'FN': 36, 'TN': 0}
Argument_classifications: {'TP': 28, 'FP': 57, 'FN': 36, 'TN': 0}


#Testcase 57

In [None]:
# Test case 57 dictionary format
annotated_answers_57 = [
    {
        "answer": "Lack of skilled monitoring committee to follow-up ideas",
        "classification": "Barrier",
        "arguments": []
    }
]

model_answers_57 = [
    {
        "answer": "Lack of skilled monitoring committee to follow-up ideas",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "Monitoring is absolute necessary in every stages of idea and startup Mentorant experiences",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "Senior business experience is important to guide young people, real business leaders who have managed or set up a business.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_57, metrics = compare(annotated_answers_57, model_answers_57, metrics)

# Display metrics for this test case
display_metrics(local_metrics_57, "Metrics for Test Case 57")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 57:
Answers: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 0, 'FP': 1, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 1, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 71, 'FP': 41, 'FN': 12, 'TN': 0}
Answer_classifications: {'TP': 49, 'FP': 63, 'FN': 12, 'TN': 0}
Arguments: {'TP': 30, 'FP': 56, 'FN': 36, 'TN': 0}
Argument_classifications: {'TP': 28, 'FP': 58, 'FN': 36, 'TN': 0}


#Testcase 58

In [None]:
# Test case 58 dictionary format
annotated_answers_58 = [
    {
        "answer": "projects and collaborations made focus only on research, no impact or entrepreneurship deliverables are considered",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "At um6p we believe too much that the researcher is the best manager the best salesman the best entrepreneur state of mind to change to have the diversity of profiles and skills.",
        "classification": "BARRIER",
        "arguments": []
    }
]

model_answers_58 = [
    {
        "answer": "projects and collaborations made focus only on research, no impact or entrepreneurship deliverables are considered",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "At um6p we believe too much that the researcher is the best manager the best salesman the best entrepreneur state of mind to change to have the diversity of profiles and skills.",
        "classification": "BARRIER",
        "arguments": []
    }
]

# Process the new test case and update metrics
local_metrics_58, metrics = compare(annotated_answers_58, model_answers_58, metrics)

# Display metrics for this test case
display_metrics(local_metrics_58, "Metrics for Test Case 58")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 58:
Answers: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 0, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 73, 'FP': 41, 'FN': 12, 'TN': 0}
Answer_classifications: {'TP': 51, 'FP': 63, 'FN': 12, 'TN': 0}
Arguments: {'TP': 30, 'FP': 56, 'FN': 36, 'TN': 0}
Argument_classifications: {'TP': 28, 'FP': 58, 'FN': 36, 'TN': 0}


#Testcase 59

In [None]:
# Test case 59 dictionary format
annotated_answers_59 = [
    {
        "answer": "Not fully leveraging the potential of its people.",
        "classification": "Barrier",
        "arguments": []
    },
    {
        "answer": "Leveraging People management",
        "classification": "Barrier",
        "arguments": [
            {
                "text": "To improve the potential of an entrepreneur, in particular a startup researcher, management is required and even management by experienced business leaders.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_59 = [
    {
        "answer": "Not fully leveraging the potential of its people.",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "Leveraging People management",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "To improve the potential of an entrepreneur, in particular a startup researcher, management is required and even management by experienced business leaders.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_59, metrics = compare(annotated_answers_59, model_answers_59, metrics)

# Display metrics for this test case
display_metrics(local_metrics_59, "Metrics for Test Case 59")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 59:
Answers: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 75, 'FP': 41, 'FN': 12, 'TN': 0}
Answer_classifications: {'TP': 52, 'FP': 64, 'FN': 12, 'TN': 0}
Arguments: {'TP': 31, 'FP': 56, 'FN': 36, 'TN': 0}
Argument_classifications: {'TP': 29, 'FP': 58, 'FN': 36, 'TN': 0}


#Testcase 60

In [None]:
# Test case 60 dictionary format
annotated_answers_60 = [
    {
        "answer": "Transend Political barriers and not having direct impact on society",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "At the opposit of other universities locally, UM6P has a unique business model where it does have the capacity to connect research to society directly. Hence, it can convey its research and knoweledge to direct action. Just to illustrate this idea, Sustainable Agriculture College, instead of only doing research it may have its own scalable production farm that can innovate in all aspect of national and african agriculture. In such model, it is research by proving as the &quotlearning by doing&quot and UM6P can then streamline research ideas into real life, build its credibility and redifine excellence by pushing the boundaries of possible. E.g.&quot Les Domaines UM6P&quotas a leader in agricol production in Morocco and having farms in many African countries, employing thousands, generating millions and providing food security to thousands...(that was just an example)",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_60 = [
    {
        "answer": "Transend Political barriers and not having direct impact on society",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "UM6P has a unique business model where it does have the capacity to connect research to society directly.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "Hence, it can convey its research and knoweledge to direct action.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Sustainable Agriculture College, instead of only doing research it may have its own scalable production farm that can innovate in all aspect of national and african agriculture.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "In such model, it is research by proving as the \"learning by doing\" and UM6P can then streamline research ideas into real life, build its credibility and redifine excellence by pushing the boundaries of possible.",
                "classification": "PRO"
            },
            {
                "text": "Les Domaines UM6P a leader in agricol production in Morocco and having farms in many African countries, employing thousands, generating millions and providing food security to thousands...(that was just an example)",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_60, metrics = compare(annotated_answers_60, model_answers_60, metrics)

# Display metrics for this test case
display_metrics(local_metrics_60, "Metrics for Test Case 60")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 60:
Answers: {'TP': 1, 'FP': 2, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 2, 'FN': 0, 'TN': 0}
Arguments: {'TP': 0, 'FP': 3, 'FN': 1, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 3, 'FN': 1, 'TN': 0}

Updated Metrics:
Answers: {'TP': 76, 'FP': 43, 'FN': 12, 'TN': 0}
Answer_classifications: {'TP': 53, 'FP': 66, 'FN': 12, 'TN': 0}
Arguments: {'TP': 31, 'FP': 59, 'FN': 37, 'TN': 0}
Argument_classifications: {'TP': 29, 'FP': 61, 'FN': 37, 'TN': 0}


#Testcase 61

In [None]:
# Test case 61 dictionary format
annotated_answers_61 = [
    {
        "answer": "Researchers who think that entrepreneurship is a waste of time and can ruin their career",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "Research with no added value to people will remain only a paper, there should be real impact financially and socially on livelihood. Action based research translation into Startup researcher are not made for entrepreneurship and they should be open to collaborate with business people.",
                "classification": "PRO"
            },
            {
                "text": "Having an entrepreneurship mindset keeps us always relevent. Careers are better enhanced by having an enterpreneuship mindset.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_61 = [
    {
        "answer": "Researchers who think that entrepreneurship is a waste of time and can ruin their career",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "Research with no added value to people will remain only a paper, there should be real impact financially and socially on livelihood",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "Action based research translation into Startup researcher are not made for entrepreneurship and they should be open to collaborate with business people.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Having an entrepreneurship mindset keeps us always relevant",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "Careers are better enhanced by having an entrepreneurship mindset.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_61, metrics = compare(annotated_answers_61, model_answers_61, metrics)

# Display metrics for this test case
display_metrics(local_metrics_61, "Metrics for Test Case 61")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 61:
Answers: {'TP': 1, 'FP': 2, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 2, 'FN': 0, 'TN': 0}
Arguments: {'TP': 0, 'FP': 2, 'FN': 2, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 2, 'FN': 2, 'TN': 0}

Updated Metrics:
Answers: {'TP': 77, 'FP': 45, 'FN': 12, 'TN': 0}
Answer_classifications: {'TP': 54, 'FP': 68, 'FN': 12, 'TN': 0}
Arguments: {'TP': 31, 'FP': 61, 'FN': 39, 'TN': 0}
Argument_classifications: {'TP': 29, 'FP': 63, 'FN': 39, 'TN': 0}


#Testcase 62

In [None]:
# Test case 62 dictionary format
annotated_answers_62 = [
    {
        "answer": "Not daring enough! In a dynamic and fast changing world, UM6P should have an agile, yet, efficient approach to dare exploring new frontiers and ideas. However, it should have to ssess quickly and set up correction measures when necessary.",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "Researchers are more career driven than impact oriented.",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "Failing to embrace and translate collective intelligence from a work paradigm to a day to day practice,,More interdepartmental collaboration and sharing of research and practices with everyone.",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "Level of communication, make participation and integration of entrepreneurship more fluid and simpler. especially the files, The mentality, this is good enough, no attention to details.",
        "classification": "BARRIER",
        "arguments": []
    }
]

model_answers_62 = [
    {
        "answer": "Not daring enough!",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "However, it should have to assess quickly and set up correction measures when necessary.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Non daring mindsets, fear of adventures and risks, comfort zone laziness",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "Right people in the area of Entrepreneurship is the need of the hour",
        "classification": "SOLUTION",
        "arguments": []
    },
    {
        "answer": "Researchers are more career driven than impact oriented",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "Failing to embrace and translate collective intelligence from a work paradigm to a day to day practice",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "More interdepartmental collaboration and sharing of research and practices with everyone",
        "classification": "SOLUTION",
        "arguments": []
    },
    {
        "answer": "Level of communication, make participation and integration of entrepreneurship more fluid and simpler, especially the files to submit",
        "classification": "SOLUTION",
        "arguments": []
    },
    {
        "answer": "The mentality, this is good enough, no attention to details",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "The product we deliver is not finished but you can customize it as you please",
                "classification": "CON"
            },
            {
                "text": "The product should be sold finished, ready to use, therefore all the cost should be included and quantified.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_62, metrics = compare(annotated_answers_62, model_answers_62, metrics)

# Display metrics for this test case
display_metrics(local_metrics_62, "Metrics for Test Case 62")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 62:
Answers: {'TP': 3, 'FP': 5, 'FN': 1, 'TN': 0}
Answer_classifications: {'TP': 2, 'FP': 6, 'FN': 1, 'TN': 0}
Arguments: {'TP': 0, 'FP': 3, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 3, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 80, 'FP': 50, 'FN': 13, 'TN': 0}
Answer_classifications: {'TP': 56, 'FP': 74, 'FN': 13, 'TN': 0}
Arguments: {'TP': 31, 'FP': 64, 'FN': 39, 'TN': 0}
Argument_classifications: {'TP': 29, 'FP': 66, 'FN': 39, 'TN': 0}


#Test case 63

In [None]:
# Test case 63 dictionary format
annotated_answers_63 = [
    {
        "answer": "selfishness : we need to develop the collective spirit of work",
        "classification": "BARRIER",
        "arguments": []
    },
    {
        "answer": "Academic Ego is a critical factor in hindrance of Entrepreneurship.",
        "classification": "BARRIER",
        "arguments": []
    }
]

model_answers_63 = [
    {
        "answer": "selfishness : we need to develop the collective spirit of work",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "reseachers should stop looking only at their CV because an entire community is waiting for their outputs based on sciences and experiments.",
                "classification": "PRO"
            },
            {
                "text": "A lot specially by people in research fields we have lot of expert but everyone keep what he have for him self this what others called selfishness.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Academic Ego is a critical factor in hindrance of Entrepreneurship.",
        "classification": "BARRIER",
        "arguments": []
    }
]

# Process the new test case and update metrics
local_metrics_63, metrics = compare(annotated_answers_63, model_answers_63, metrics)

# Display metrics for this test case
display_metrics(local_metrics_63, "Metrics for Test Case 63")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 63:
Answers: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 0, 'FP': 2, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 0, 'FP': 2, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 82, 'FP': 50, 'FN': 13, 'TN': 0}
Answer_classifications: {'TP': 58, 'FP': 74, 'FN': 13, 'TN': 0}
Arguments: {'TP': 31, 'FP': 66, 'FN': 39, 'TN': 0}
Argument_classifications: {'TP': 29, 'FP': 68, 'FN': 39, 'TN': 0}


#Testcase 64

In [None]:
# Test case 64 dictionary format
annotated_answers_64 = [
    {
        "answer": "To overcome barriers to success in research, we should find applications for theoretical research.",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "Applications of theoretical research increase its societal impact This is important for impact.",
                "classification": "PRO"
            },
            {
                "text": "Prioritizing applications may limit pure, exploratory theoretical research Starting from the need to establish research projects in order to ensure there are applications of the research findings.",
                "classification": "CON"
            }
        ]
    }
]

model_answers_64 = [
    {
        "answer": "To overcome barriers to success in research, we should find applications for theoretical research.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "Applications of theoretical research increase its societal impact. This is important for impact.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "Prioritizing applications may limit pure, exploratory theoretical research.",
        "classification": "BARRIER",
        "arguments": [
            {
                "text": "Starting from the need to establish research projects in order to ensure there are applications of the research findings.",
                "classification": "CON"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_64, metrics = compare(annotated_answers_64, model_answers_64, metrics)

# Display metrics for this test case
display_metrics(local_metrics_64, "Metrics for Test Case 64")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 64:
Answers: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 0, 'FP': 2, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 1, 'FN': 1, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 1, 'FN': 1, 'TN': 0}

Updated Metrics:
Answers: {'TP': 83, 'FP': 51, 'FN': 13, 'TN': 0}
Answer_classifications: {'TP': 58, 'FP': 76, 'FN': 13, 'TN': 0}
Arguments: {'TP': 32, 'FP': 67, 'FN': 40, 'TN': 0}
Argument_classifications: {'TP': 30, 'FP': 69, 'FN': 40, 'TN': 0}


#Testcase 65

In [None]:
# Test case 65 dictionary format
annotated_answers_65 = [
    {
        "answer": "Working with tangible models",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "The practical application of the theories found in research, and the thinking and execution of our researchers, are the best methods for ensuring success in different sectors",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_65 = [
    {
        "answer": "working with tangible models",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "The practical application of the theories found in research, and the thinking and execution of our researchers, are the best methods for ensuring success in different sectors.",
                "classification": "PRO"
            }
        ]
    },
    {
        "answer": "the adoption of our research topics by others",
        "classification": "METRIC",
        "arguments": []
    }
]

# Process the new test case and update metrics
local_metrics_65, metrics = compare(annotated_answers_65, model_answers_65, metrics)

# Display metrics for this test case
display_metrics(local_metrics_65, "Metrics for Test Case 65")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 65:
Answers: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 1, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 84, 'FP': 52, 'FN': 13, 'TN': 0}
Answer_classifications: {'TP': 59, 'FP': 77, 'FN': 13, 'TN': 0}
Arguments: {'TP': 33, 'FP': 67, 'FN': 40, 'TN': 0}
Argument_classifications: {'TP': 31, 'FP': 69, 'FN': 40, 'TN': 0}


#Testcase 66

In [None]:
# Test case 66 dictionary format
annotated_answers_66 = [
    {
        "answer": "In my opinion, to push research further, we should diversify subjects and invest as much as possible in the world of research.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "The more we have the chance to dig, the more information we get.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_66 = [
    {
        "answer": "to push research further, we should diversify subjects and invest as much as possible in the world of research.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "The more we have the chance to dig, the more information we get.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_66, metrics = compare(annotated_answers_66, model_answers_66, metrics)

# Display metrics for this test case
display_metrics(local_metrics_66, "Metrics for Test Case 66")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 66:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 85, 'FP': 52, 'FN': 13, 'TN': 0}
Answer_classifications: {'TP': 60, 'FP': 77, 'FN': 13, 'TN': 0}
Arguments: {'TP': 34, 'FP': 67, 'FN': 40, 'TN': 0}
Argument_classifications: {'TP': 32, 'FP': 69, 'FN': 40, 'TN': 0}


#Testcase 68

In [None]:
# Test case 68 dictionary format
annotated_answers_68 = [
    {
        "answer": "Providing the students with clear understanding of how the corporate and/or the research world are, and how they could provide an added value with their abilities and knowledge.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "This prepares the students for the real world.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_68 = [
    {
        "answer": "Providing the students with clear understanding of how the corporate and/or the research world are, and how they could provide an added value with their abilities and knowledge.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "This prepares the students for the real world.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_68, metrics = compare(annotated_answers_68, model_answers_68, metrics)

# Display metrics for this test case
display_metrics(local_metrics_68, "Metrics for Test Case 68")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 68:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 86, 'FP': 52, 'FN': 13, 'TN': 0}
Answer_classifications: {'TP': 61, 'FP': 77, 'FN': 13, 'TN': 0}
Arguments: {'TP': 35, 'FP': 67, 'FN': 40, 'TN': 0}
Argument_classifications: {'TP': 33, 'FP': 69, 'FN': 40, 'TN': 0}


#Testcase 69

In [None]:
# Test case 69 dictionary format
annotated_answers_69 = [
    {
        "answer": "Gamify the teaching experience when the subject nature allows it to engage students and make learning fun through challenges with rewards and achievements.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "Gamification enhances engagement and learning effectiveness.",
                "classification": "PRO"
            },
            {
                "text": "42 network is proof that the gamification of the learning experience is a great model.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_69 = [
    {
        "answer": "Gamify the teaching experience when the subject nature allows it to engage students and make learning fun through challenges with rewards and achievements.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "Gamification enhances engagement and learning effectiveness.",
                "classification": "PRO"
            },
            {
                "text": "42 network is proof that the gamification of the learning experience is a great model.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_69, metrics = compare(annotated_answers_69, model_answers_69, metrics)

# Display metrics for this test case
display_metrics(local_metrics_69, "Metrics for Test Case 69")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 69:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 2, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 87, 'FP': 52, 'FN': 13, 'TN': 0}
Answer_classifications: {'TP': 62, 'FP': 77, 'FN': 13, 'TN': 0}
Arguments: {'TP': 37, 'FP': 67, 'FN': 40, 'TN': 0}
Argument_classifications: {'TP': 35, 'FP': 69, 'FN': 40, 'TN': 0}


#Test case 70

In [None]:
# Test case 70 dictionary format
annotated_answers_70 = [
    {
        "answer": "Implement cohesive teaching guidelines.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "Students emulate structured, effective teacher behavior.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_70 = [
    {
        "answer": "Implement cohesive teaching guidelines.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "Students emulate structured, effective teacher behavior.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_70, metrics = compare(annotated_answers_70, model_answers_70, metrics)

# Display metrics for this test case
display_metrics(local_metrics_70, "Metrics for Test Case 70")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")



Metrics for Test Case 70:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 88, 'FP': 52, 'FN': 13, 'TN': 0}
Answer_classifications: {'TP': 63, 'FP': 77, 'FN': 13, 'TN': 0}
Arguments: {'TP': 38, 'FP': 67, 'FN': 40, 'TN': 0}
Argument_classifications: {'TP': 36, 'FP': 69, 'FN': 40, 'TN': 0}


#Testcase 71

In [None]:
# Test case 71 dictionary format
annotated_answers_71 = [
    {
        "answer": "To improve teaching and student life at UM6P, we can promote international student exchange programs.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "This will cultivate a diverse and globally conscious student community, thereby elevating the standard of education.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_71 = [
    {
        "answer": "To improve teaching and student life at UM6P, we can promote international student exchange programs.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "This will cultivate a diverse and globally conscious student community, thereby elevating the standard of education.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_71, metrics = compare(annotated_answers_71, model_answers_71, metrics)

# Display metrics for this test case
display_metrics(local_metrics_71, "Metrics for Test Case 71")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")


#Testcase 72

In [None]:
# Test case 72 dictionary format
annotated_answers_72 = [
    {
        "answer": "One idea to address the lack of communication and support in universities is to establish a student-led support team. This team would consist of dedicated students who are available to assist their peers in accessing various resources and providing guidance when needed.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "By creating such a team, students can benefit from improved communication channels and receive the necessary support to navigate their academic journey more effectively.",
                "classification": "PRO"
            }
        ]
    }
]

model_answers_72 = [
    {
        "answer": "One idea to address the lack of communication and support in universities is to establish a student-led support team. This team would consist of dedicated students who are available to assist their peers in accessing various resources and providing guidance when needed.",
        "classification": "SOLUTION",
        "arguments": [
            {
                "text": "By creating such a team, students can benefit from improved communication channels and receive the necessary support to navigate their academic journey more effectively.",
                "classification": "PRO"
            }
        ]
    }
]

# Process the new test case and update metrics
local_metrics_72, metrics = compare(annotated_answers_72, model_answers_72, metrics)

# Display metrics for this test case
display_metrics(local_metrics_72, "Metrics for Test Case 72")

# Display updated metrics
display_metrics(metrics, "Updated Metrics")




Metrics for Test Case 72:
Answers: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Answer_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Arguments: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}
Argument_classifications: {'TP': 1, 'FP': 0, 'FN': 0, 'TN': 0}

Updated Metrics:
Answers: {'TP': 89, 'FP': 52, 'FN': 13, 'TN': 0}
Answer_classifications: {'TP': 64, 'FP': 77, 'FN': 13, 'TN': 0}
Arguments: {'TP': 39, 'FP': 67, 'FN': 40, 'TN': 0}
Argument_classifications: {'TP': 37, 'FP': 69, 'FN': 40, 'TN': 0}
