In [85]:
import json

start=600
end=900
schema="""{
    "description": "A structured schema to represent detailed information from a video or text analysis",
    "type": "object",
    "properties": {
        "Category": {
            "type": "string",
            "description": "The category or general type of the content"
        },
        "DetailedDescriptionOfEventsAndConversations": {
            "type": "string",
            "description": "A detailed textual description of the events and conversations in the content"
        },
        "BrandsCompanyNamesLogos": {
            "type": "array",
            "items": {
                "type": "string"
            },
            "description": "A list of brands, company names, or logos appearing or mentioned in the content"
        },
        "KeyLocationsAndScenes": {
            "type": "array",
            "items": {
                "type": "string"
            },
            "description": "A list of key locations and scenes appearing or mentioned in the content"
        },
        "KeyThemes": {
            "type": "array",
            "items": {
                "type": "string"
            },
            "description": "A list of key themes discussed or portrayed in the content"
        },
        "PeopleAppearingAndMentioned": {
            "type": "array",
            "items": {
                "type": "string"
            },
            "description": "A list of people who appear or are mentioned in the content"
        }
    },
    "required": [
        "Category",
        "DetailedDescriptionOfEventsAndConversations",
        "BrandsCompanyNamesLogos",
        "KeyLocationsAndScenes",
        "KeyThemes",
        "PeopleAppearingAndMentioned"
    ]
}"""


VAR_VIDEO_SEGMENT=f"Your task is to provide a comprehensive description of this video from segment {start} seconds to {end} seconds.\n"
VAR_INSTRUCTIONS= """To complete the task you need to follow these steps:\n
                           No greetings, closing remarks, or additional comments. Begin immediately with the video analysis and provide only the requested information in the specified format.\n
                           Idenify all instances of visual product placement. Pay close attention to background details and items held by the characters. List each product placement with the following
                            information: Brand name, product name (if applicable), and a brief description. Include information about product placement into the description generated for the video\n
                           Create a transcript of all the speeches, dialogs, narration.\n
                           Scrupulously examine each scene for any and all visible brand names, logos, and products. Even if a product appears briefly or in the background, it should be included.\n"""

VAR_CONSTRAINTS= """Describe the video content objectively, avoiding any subjective opinions or assumptions.\n
                           Specify who is saying what. If a person talking can be seen, specify their name and/or occupation. If it is voice behind the scenes, then describe it as a narrator.\n
                           Be specific when describing. Include all the information that is shown or given.\n
                           Do not show timestamps.\n
                           If an unidentified person is shown in the video first, but then their name is mentioned later in the video, make sure to mention their name in the description from the start.\n
                           """

VAR_STRUCTURE= f"""Organize the description with the following properties, and give a valid json file with JSON schema.<JSONSchema>{json.dumps(schema)}</JSONSchema>:
                       \n**Category**\n
                       \n**DetailedDescriptionOfEventsAndConversations**\n
                       \n**BrandsCompanyNamesLogos**\n
                       \n**KeyLocationsAndScenes**\n
                       \n**KeyThemes**\n
                       \n**PeopleAppearingAndMentioned**\n 
                 """ 

VAR_CONDITIONS = """Identify a video as one of these categories: News, TV Shows, Live Sport Events, News Analyses. \n
                       When describing the DetailedDescriptionOfEventsAndConversations, consider the following instructions for specific video types:\n
                       * **News:** Pay close attention to transitions, graphics, and on-screen text.\n
                       * **TV Shows:** Describe facial expressions, body language, appearances, and overall mood.\n
                       * **Live Sports Events:** Focus on key moments, like goals or fouls, and describe the overall flow and momentum of the game.\n
                       * **News Analyses:** Identify different perspectives, arguments, and supporting evidence.\n
                       Make sure to mention people's names in the DetailedDescriptionOfEventsAndConversations and in PeopleAppearingAndMentioned as well as any other information about them like their age, occupation, location, etc. \n"""

VAR_EXAMPLE = """Follow this example for the format of the output:\n
              {
                "Category": "TV Show",
                "DetailedDescriptionOfEventsAndConversations": "The video starts with a man sitting at a dining table, reading a letter. Two Fiji bottles are visible on the benchtop. He has short, light brown hair and a beard. His name is Harrison. The scene changes to Melissa. Melissa says: \"I'm Melissa, and I'm a hairdresser. I'm 41 years old, and I'm from Sydney.\"",
                "BrandsCompanyNamesLogos": ["Lacoste", "Fiji"],
                "KeyLocationsAndScenes": ["Apartment"],
                "KeyThemes": ["Marriage"],
                "PeopleAppearingAndMentioned": [
                "Harrison, 32, Builder, NSW",
                "Melissa, 41, Hairdresser, NSW"
                ]
            }
               """
  
    
video_description_prompt=VAR_VIDEO_SEGMENT+VAR_INSTRUCTIONS+VAR_CONSTRAINTS+VAR_STRUCTURE+VAR_CONDITIONS+VAR_EXAMPLE
 

In [86]:
import base64
import time
import typing
import math
import numpy as np

from google.cloud import aiplatform
from google.protobuf import struct_pb2

#libraries to generate image summaries
from vertexai.vision_models import Video
from vertexai.vision_models import VideoSegmentConfig
from vertexai.vision_models import MultiModalEmbeddingModel
from vertexai.language_models import TextEmbeddingModel
from vertexai.vision_models import Image as vision_model_Image
from vertexai.preview.generative_models import (
    Content,
    GenerationConfig,
    GenerationResponse,
    GenerativeModel,
    Image,
    Part as GenerativeModelPart,
    HarmBlockThreshold,
    HarmCategory,
)
from typing import Any, Dict, List, Literal, Optional, Union

from moviepy.editor import VideoFileClip

contents= [{
                                                                    "role": "user",
                                                                    "parts": [
                                                                        {
                                                                    
                                                                        
                                                                        "file_data": {
                                                                            "mime_type":  "video/mp4",
                                                                            "file_uri": "gs://raw_nine_files/vlt_video_extract/MAAT/MAAT2024_1_A_HBB.mp4"
                                                                        } 
                                                                         ,
                                                                        "video_metadata": {
                                                                                        "start_offset": {
                                                                                        "seconds": start,
                                                                                        "nanos": 0
                                                                                        },
                                                                                        "end_offset": {
                                                                                        "seconds": end,
                                                                                        "nanos": 0
                                                                                        }
                                                                                }

                                                                        },
                                                                        { "text": video_description_prompt }
                                                                    ]
                                                                    }
                                                                ]


In [3]:

generative_multimodal_model= GenerativeModel("gemini-1.5-pro-002")
generation_config=GenerationConfig(temperature=1, top_k=40,top_p=0.95,max_output_tokens=8192, response_mime_type='application/json')
 
safety_settings=  {
                    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
                    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
                    }

model_response = generative_multimodal_model.generate_content(
                                    contents ,safety_settings=safety_settings,generation_config=generation_config,
                                   )   

In [4]:
model_response.candidates[0].content.parts[0].text

'```json\n{\n  "Category": "TV Show",\n  "DetailedDescriptionOfEventsAndConversations": "The video begins with Jayden, a 26-year-old professional kickboxer from Queensland, arriving at a gathering. He greets Tim, Richard, and John. Richard introduces himself to Jayden. Richard asks Jayden what he does for a living. Jayden says that kickboxing is his full-time job, training twice a day, six days a week. Jayden explains that he\'s not what he appears to be, enjoying snuggles and back tickles when he\'s not fighting. He adds that he\'s a romantic and doesn\'t have a specific type, looking for someone with a good heart. Lucinda, a 43-year-old MC and wedding celebrant from New South Wales, arrives at the same gathering, greeting the other women: Tori and Natasha. Lucinda says that she\'s looking for a confident man, a great cook who is funny and generous of spirit. She unfolds a scroll, revealing her detailed list of desired qualities in a partner, including “handsome” and “spunky,” prompti

In [5]:
print(model_response.candidates[0].content.parts[0].text)

```json
{
  "Category": "TV Show",
  "DetailedDescriptionOfEventsAndConversations": "The video begins with Jayden, a 26-year-old professional kickboxer from Queensland, arriving at a gathering. He greets Tim, Richard, and John. Richard introduces himself to Jayden. Richard asks Jayden what he does for a living. Jayden says that kickboxing is his full-time job, training twice a day, six days a week. Jayden explains that he's not what he appears to be, enjoying snuggles and back tickles when he's not fighting. He adds that he's a romantic and doesn't have a specific type, looking for someone with a good heart. Lucinda, a 43-year-old MC and wedding celebrant from New South Wales, arrives at the same gathering, greeting the other women: Tori and Natasha. Lucinda says that she's looking for a confident man, a great cook who is funny and generous of spirit. She unfolds a scroll, revealing her detailed list of desired qualities in a partner, including “handsome” and “spunky,” prompting laught

In [6]:
import json

# Provided JSON string
json_string =  model_response.candidates[0].content.parts[0].text.replace('```json','').replace('```','')
# Convert JSON string to a Python dictionary
parsed_json = json.loads(json_string, strict=False)

parsed_json
 

{'Category': 'TV Show',
 'DetailedDescriptionOfEventsAndConversations': "The video begins with Jayden, a 26-year-old professional kickboxer from Queensland, arriving at a gathering. He greets Tim, Richard, and John. Richard introduces himself to Jayden. Richard asks Jayden what he does for a living. Jayden says that kickboxing is his full-time job, training twice a day, six days a week. Jayden explains that he's not what he appears to be, enjoying snuggles and back tickles when he's not fighting. He adds that he's a romantic and doesn't have a specific type, looking for someone with a good heart. Lucinda, a 43-year-old MC and wedding celebrant from New South Wales, arrives at the same gathering, greeting the other women: Tori and Natasha. Lucinda says that she's looking for a confident man, a great cook who is funny and generous of spirit. She unfolds a scroll, revealing her detailed list of desired qualities in a partner, including “handsome” and “spunky,” prompting laughter from her 

In [7]:
# Define the text you want to write to the file
text_to_export =json_string

# Specify the file name
file_name = "output_model1.txt"

# Write the text to the file
with open(file_name, "w") as file:
    file.write(text_to_export)

print(f"The text has been successfully exported to {file_name}")

The text has been successfully exported to output_model1.txt


In [8]:
# Define the text you want to write to the file
text_to_export =model_response.candidates[0].content.parts[0].text

# Specify the file name
file_name = "output_model_original_text1.txt"

# Write the text to the file
with open(file_name, "w") as file:
    file.write(text_to_export)

print(f"The text has been successfully exported to {file_name}")

The text has been successfully exported to output_model_original_text1.txt


### Lets test with another model


In [9]:
#lets change the model

generative_multimodal_model= GenerativeModel("gemini-1.5-flash-002")
generation_config=GenerationConfig(temperature=1, top_k=40,top_p=0.95,max_output_tokens=8192)#, response_mime_type='application/json',
 
safety_settings=  {
                    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
                    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
                    }

model_response = generative_multimodal_model.generate_content(
                                    contents ,safety_settings=safety_settings,generation_config=generation_config,
                                   )   

In [10]:
# Provided JSON string
json_string =  model_response.candidates[0].content.parts[0].text.replace('```json','').replace('```','')
# Convert JSON string to a Python dictionary
parsed_json = json.loads(json_string, strict=False)

parsed_json

{'Category': 'TV Show',
 'DetailedDescriptionOfEventsAndConversations': "The video starts with two women seated at a table. Lucinda is wearing a pink sparkly jacket and a matching pink skirt. She says: 'So, Tori and Nat, what's on the manifesto?' Nat is wearing a red floral dress. Tori is wearing a floral patterned long-sleeved dress.  Lucinda continues:  'The man, of first down.' Nat replies: 'I mean, yeah, yeah. I don't want him to have a big ego.' Lucinda says: 'Yeah.  And it doesn't have a big ego.  Confident but with nothing to prove.' Tori continues: 'Correct.' Lucinda says: 'I want a nerdy guy.'  Nat replies: 'You're a bit of an intellect, and you need somebody to challenge you on that level.'  Lucinda says: 'Yeah, I love nerds.' She smiles.  The scene changes to a view of a city with buildings lit up at night.  A voice over says:  'Love Lucinda. Here's another one.'  There's a scene change to the three women seated at a table. Lucinda says: 'Yay.' The scene changes to a view of

In [11]:
# Define the text you want to write to the file
text_to_export =json_string

# Specify the file name
file_name = "output_model2.txt"

# Write the text to the file
with open(file_name, "w") as file:
    file.write(text_to_export)

print(f"The text has been successfully exported to {file_name}")

The text has been successfully exported to output_model2.txt


In [12]:
# Define the text you want to write to the file
text_to_export =model_response.candidates[0].content.parts[0].text

# Specify the file name
file_name = "output_model_original_text2.txt"

# Write the text to the file
with open(file_name, "w") as file:
    file.write(text_to_export)

print(f"The text has been successfully exported to {file_name}")

The text has been successfully exported to output_model_original_text2.txt


### compare the result of the two model

In [87]:
video_description_prompt
with open('output_model1.txt', 'r') as file:
    response1 = json.dumps(json.load(file))
with open('output_model2.txt', 'r') as file:
    response2 = json.dumps(json.load(file))


In [138]:
def get_autorater_response(metric_prompt: list) -> dict:
    metric_response_schema = {
    "type": "OBJECT",
    "properties": {
        "Category": {
            "type": "OBJECT",
            "properties": {
                "score": {"type": "NUMBER"},
                "explanation": {"type": "STRING"},
            },
            "required": ["score", "explanation"],
        },
        "Detailed Description Of Events And Conversations": {
            "type": "OBJECT",
            "properties": {
                "score": {"type": "NUMBER"},
                "explanation": {"type": "STRING"},
            },
            "required": ["score", "explanation"],
        },
        "Brands, CompanyNames, and Logos": {
            "type": "OBJECT",
            "properties": {
                "score": {"type": "NUMBER"},
                "explanation": {"type": "STRING"},
            },
            "required": ["score", "explanation"],
        },
        "KeyLocations And Scenes": {
            "type": "OBJECT",
            "properties": {
                "score": {"type": "NUMBER"},
                "explanation": {"type": "STRING"},
            },
            "required": ["score", "explanation"],
        },
        "Key Themes": {
            "type": "OBJECT",
            "properties": {
                "score": {"type": "NUMBER"},
                "explanation": {"type": "STRING"},
            },
            "required": ["score", "explanation"],
        },
        "People Appearing And Mentioned": {
            "type": "OBJECT",
            "properties": {
                "score": {"type": "NUMBER"},
                "explanation": {"type": "STRING"},
            },
            "required": ["score", "explanation"],
        },
    },
    "required": [
        "Category",
        "Detailed Description Of Events And Conversations",
        "Brands, CompanyNames, and Logos",
        "KeyLocations And Scenes",
        "Key Themes",
        "People Appearing And Mentioned",
    ],
}
    autorater = GenerativeModel(
        "gemini-1.5-pro",
        generation_config=GenerationConfig(
            response_mime_type="application/json",
            response_schema=metric_response_schema,
        ),
        safety_settings={
            HarmCategory.HARM_CATEGORY_UNSPECIFIED: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
        },
    )

    response = autorater.generate_content(metric_prompt)

    response_json = {}

    if response.candidates and len(response.candidates) > 0:
        candidate = response.candidates[0]
        if (
            candidate.content
            and candidate.content.parts
            and len(candidate.content.parts) > 0
        ):
            part = candidate.content.parts[0]
            if part.text:
                response_json = json.loads(part.text)
    print(response_json)
    return response_json

def custom_coverage_fn(instance):
   
    video_uri = instance["reference"]["video_uri"]
    video_metadata = instance["reference"]["video_metadata"]
    response_A = instance["response_A"]
    response_B = instance["response_B"]

    eval_instruction_template = """

  # Instruction
  You are an expert evaluator. Your task is to evaluate the quality of the responses generated by AI models. We will provide you with the user prompt,  and AI-generated responses for model A and B, video and the segment for which this response is generated.
  You should first read the user input carefully for analyzing the task, then look into video segment, and then evaluate the quality of the responses based on the Criteria provided in the Evaluation section below.
  You will assign the response a rating following the Rating Rubric and Evaluation Steps. Give step by step explanations for your rating, and only choose ratings from the Rating Rubric.

  # Evaluation
  ## Metric Definition
  You will be assessing coverage, which measures the ability to provide a detailed response based on a the given video segment and requested properties.
  

  ## Criteria
  Coverage: It is the quality of capturing all required detail for each requested property.
  In the context of video content capturing, it refers to the way that all the details of the following properties are captured and presented throughly:
  
  - Category 
  - Detailed Description Of Events And Conversations 
  - Brands,CompanyNames, and Logos 
  - KeyLocations And Scenes" 
  - Key Themes 
  - People Appearing And Mentioned 
  
  This AI-generated responses will be used for data retrieval. So, it has to be able to capture all the details of a scene. Rating rubric has to be calculated for each property separately.

  ## Rating Rubric
  1: (Model A Perfectly Aligned) model A has perfectly captured the most accurate content but model B could not
  2: (Model B Perfectly Aligned) model B has perfectly captured the  most accurate content but model A could not
  3: (Mode A and B Aligned) both model could capture the content accurately
  4: (Mode A and B Not Aligned) neither of two model could capture the content accurately
  5: (Mode A and B Slightly Aligned- A is more accurate) Both models could capture the content partially but model A is more accurate
  6: (Mode A and B Slightly Aligned- B is more accurate) Both models could capture the content partially but model B is more accurate

  ## Evaluation Steps 
  STEP 1:  Assess User Instruction:  Carefully read the user input prompt to understand the user's request and requested information.
  STEP 2:  Analyze Video Segment: Examine the video segment for each requested property to to make sure all the requested information are captured in detail.
  STEP 3: Evaluate Accuracy:  For each requested property, Check if the generated response correctly identifies the information and details described in the video segment.
  STEP 4:  Identify Inconsistencies: for each requested property, look for any discrepancies or missed details between the video segment details and the captured information in the AI-generated responses. For example, if any information is missed, or not captured right for each property separately.
  STEP 5:  Determine Score: Based on the previous steps, assign a score using the 1-5 rubric. Consider the severity of any inconsistencies and their potential impact on the data retrieval.
  
  ## AI-generate response model A:
  {response_A}
  
   ## AI-generate response model B:
  {response_B}
  """
 
    # generate the eval
    evaluation_prompt = [
        eval_instruction_template,       
        "VIDEO URI: ",
        video_uri,
        "VIDEO METADATA: ",
        video_metadata,
        "GENERATED RESPONSE MODEL A: ",
        response_A,
        "GENERATED RESPONSE MODEL B: ",
        response_B,
    ]

   
    
    evaluation_response = get_autorater_response(evaluation_prompt)
    print(evaluation_response)
    return evaluation_response
       
    


In [154]:
import pandas as pd
context = [
    {
        "video_uri": "gs://raw_nine_files/vlt_video_extract/MAAT/MAAT2024_1_A_HBB.mp4",
        "video_metadata":json.dumps({"start_offset": {"seconds": start, "nanos": 0}, "end_offset": {"seconds": end, "nanos": 0}}, indent=2)
    }
]


eval_dataset = pd.DataFrame(
    {
         "instruction": video_description_prompt,
        "context":[None]  ,
        "response_A": response1,
         "response_B": response2,
        "reference":context ,
    
    }
)

In [155]:
eval_dataset['evaluations']=eval_dataset.apply(custom_coverage_fn,axis=1)

{'Category': {'score': 3, 'explanation': 'Both Model A and B correctly identified the category as "TV Show".'}, 'Detailed Description Of Events And Conversations': {'score': 5, 'explanation': 'Both Model A and Model B captured the overall events correctly but Model A is slightly better in terms of detailed description of events and conversation.'}, 'Brands, CompanyNames, and Logos': {'score': 1, 'explanation': 'Only Model A was able to correctly capture the brand name "Fiji" while Model B missed it. '}, 'KeyLocations And Scenes': {'score': 5, 'explanation': 'Model A is slightly better because it more accurately identified the key location as "Sydney Harbour" instead of just "Modern living room with city view". Both correctly identified the "boxing gym".'}, 'Key Themes': {'score': 3, 'explanation': 'Both models are aligned because they identify the correct theme such as dating, relationship, and first impression.'}, 'People Appearing And Mentioned': {'score': 5, 'explanation': 'Both Mod

In [156]:
eval_dataset

Unnamed: 0,instruction,context,response_A,response_B,reference,evaluations
0,Your task is to provide a comprehensive descri...,,"{""Category"": ""TV Show"", ""DetailedDescriptionOf...","{""Category"": ""TV Show"", ""DetailedDescriptionOf...",{'video_uri': 'gs://raw_nine_files/vlt_video_e...,"{'Category': {'score': 3, 'explanation': 'Both..."


In [150]:
evaluations=eval_dataset['evaluations'][0]
evaluations

{'Category': {'score': 3,
  'explanation': 'Both models correctly identify the category as a TV show, likely a dating or reality show based on the content.'},
 'Detailed Description Of Events And Conversations': {'score': 5,
  'explanation': 'Model A captures more details of the conversations and events in the video segment. It mentions more names, interactions, and specifics about the conversation topics. Model B provides a decent overview but misses out on the nuances captured by model A.'},
 'Brands, CompanyNames, and Logos': {'score': 1,
  'explanation': 'Model A correctly identifies a "Fiji" bottle, likely water, which is visible in the video segment. Model B fails to identify any brands.'},
 'KeyLocations And Scenes': {'score': 5,
  'explanation': 'Model A provides a more comprehensive list of key locations and scenes, including a "Sydney Harbour" backdrop and specific details like a "Boxing gym" and "Interview room." Model B is partially accurate but less detailed in its descrip

In [157]:
import pandas as pd

# Define the keys for the categories
keys = [
    'Category', 'Detailed Description Of Events And Conversations', 
    'Brands, CompanyNames, and Logos', 'KeyLocations And Scenes', 
    'Key Themes', 'People Appearing And Mentioned'
]

# Function to extract the score and explanation for each category
def flatten_evaluations(row):
    flattened_data = {}
    for key in keys:
        flattened_data[f"{key.lower().replace(' ', '_')}_score"] = row[key]['score']
        flattened_data[f"{key.lower().replace(' ', '_')}_explanation"] = row[key]['explanation']
    return flattened_data

# Apply the function to flatten the 'evaluations' column and create new columns
flattened_df = eval_dataset['evaluations'].apply(flatten_evaluations)

# Join the flattened columns to the original dataframe
eval_dataset_df = eval_dataset.join(pd.json_normalize(flattened_df))

# Display the result
eval_dataset_df


Unnamed: 0,instruction,context,response_A,response_B,reference,evaluations,category_score,category_explanation,detailed_description_of_events_and_conversations_score,detailed_description_of_events_and_conversations_explanation,"brands,_companynames,_and_logos_score","brands,_companynames,_and_logos_explanation",keylocations_and_scenes_score,keylocations_and_scenes_explanation,key_themes_score,key_themes_explanation,people_appearing_and_mentioned_score,people_appearing_and_mentioned_explanation
0,Your task is to provide a comprehensive descri...,,"{""Category"": ""TV Show"", ""DetailedDescriptionOf...","{""Category"": ""TV Show"", ""DetailedDescriptionOf...",{'video_uri': 'gs://raw_nine_files/vlt_video_e...,"{'Category': {'score': 3, 'explanation': 'Both...",3,Both Model A and B correctly identified the ca...,5,Both Model A and Model B captured the overall ...,1,Only Model A was able to correctly capture the...,5,Model A is slightly better because it more acc...,3,Both models are aligned because they identify ...,5,Both Model A and B correctly identified the pe...


In [159]:
eval_dataset_df.to_csv('comparision.csv')