In [1]:
#!pip install ego4d

In [2]:
#!ego4d --output_directory="./data/ego4d" --datasets full_scale --benchmarks goalstep --video_uids ac582760-09b1-4a6e-be08-f19f9bf5dfcb

In [None]:
#!ffmpeg -i /scratch/rl3725/llms_experiments/data/ego4d/v2/full_scale/51224e32-3d6c-4148-9eea-7b73da751f25.mp4 -c:v libx264 -crf 23 -preset medium -r 30 -c:a copy  /scratch/rl3725/llms_experiments/data/ego4d/v2/full_scale/51224e32-3d6c-4148-9eea-7b73da751f25_mod.mp4

In [3]:
import json
from os.path import join
from torchmetrics.text import BLEUScore
from torchmetrics.text.rouge import ROUGEScore

FOLDER_PATH = "./ego4d/annotations"

def read_data(file_path):
    with open(file_path, "r") as file:
        step_annotations = json.load(file)
        return step_annotations
    
    

def agregate_steps(target_video):
    steps = ""
    for video in train_data["videos"]:
        if video["video_uid"] == target_video:
            steps = ". ".join([f"{i}. {x['step_description'].capitalize()}" for i, x in  enumerate(video["segments"], 1)])
        
    return steps

def get_summary(target_video):
    summary = ""
    for video in train_data["videos"]:
        if video["video_uid"] == target_video:
            steps = ". ".join([f"{i}: {x.capitalize()}" for i, x in  enumerate(video["summary"], 1)])
        
    return steps

def get_summary(target_video):
    summary = ""
    for video in train_data["videos"]:
        if video["video_uid"] == target_video:
            summary = ". ".join([f"{i}: {x.capitalize()}" for i, x in  enumerate(video["summary"], 1)])
        
    return summary

def get_goal(target_video):
    goal = ""
    for video in train_data["videos"]:
        if video["video_uid"] == target_video:
            goal = video["goal_description"].capitalize()
        
    return goal

def calculate_bleu(pred, target):
    bleu = BLEUScore(n_gram=1)
    score = bleu([pred], [[target]]).item()
    return round(score, 3)

def calculate_rouge(pred, target):
    rouge = ROUGEScore()
    score = rouge(pred, target)['rouge1_fmeasure'].item()
    return round(score, 3)

def calculate_metrics(pred, target):
    rouge_score = calculate_rouge(pred, target)
    bleu_score = calculate_bleu(pred, target)
    print(f"BLEU: {bleu_score}   ROUGE: {rouge_score}")

In [5]:
train_data = read_data(join(FOLDER_PATH, "goalstep_train.json"))
len(train_data["videos"])

583

In [6]:
val_data = read_data(join(FOLDER_PATH, "goalstep_val.json"))
len(val_data["videos"])

134

In [7]:
test_data = read_data(join(FOLDER_PATH,  "goalstep_test_unannotated.json"))
len(test_data["videos"])

134

## BLEU and ROUGE

In [29]:
answers_no_context = {
    "1938c632-f575-49dd-8ae0-e48dbb467920":{
        "general_answer": "The video shows a person washing dishes in a kitchen sink. They put away the clean dishes and put the dirty ones in a dish rack. The person then puts the clean dishes away in the cabinets.",
        "detailed_answer": "The video shows a person washing dishes in a kitchen sink. They start by putting away the clean dishes in the cabinets and then put the dirty dishes in a dish rack. After that, the person takes out the clean dishes from the cabinets and puts them in a dish rack. The video is a simple demonstration of how to wash dishes in a kitchen. The person is shown performing the task of washing dishes in a kitchen sink, which is a common household chore. The video provides a clear visual representation of the process of washing dishes, from putting away the clean dishes to putting them in a dish rack. The video is shot in a kitchen setting, with a sink and cabinets visible in the background. Overall, the video is a helpful guide for anyone looking to learn how to wash dishes in a kitchen."
    },
    "51224e32-3d6c-4148-9eea-7b73da751f25":{
        "general_answer": "The video shows a woman cooking in a kitchen. She is seen cutting up food on a pan, frying it, and then mixing it with other ingredients. She also pours water into a pot and stirs it.",
        "detailed_answer": "The video depicts a woman cooking in a kitchen. She is seen cutting up food on a pan and frying it. Afterwards, she pours water into a pot and stirs it. The woman then mixes the food with other ingredients and continues to cook. The video showcases the woman's culinary skills as she prepares a delicious meal. The kitchen is well-equipped with all the necessary tools and ingredients, and the woman appears to be experienced in cooking. The lighting in the kitchen is bright, and the colors of the ingredients are vibrant, making the video visually appealing. Overall, the video is an excellent representation of a woman's culinary skills and the process of cooking a meal."
    },
    "0c192ca8-1ede-4ef0-a05e-2f4151b6bdfc":{
        "general_answer": "The video shows a man kneading dough in a kitchen and then shaping it into balls. He then rolls out the dough and puts it into the oven.",
        "detailed_answer": "The video shows a man in a kitchen preparing dough. He starts by kneading the dough on a table and then rolls it out. He then puts the dough into the oven and takes it out when it is fully baked. The man appears to be focused on his task and takes his time to ensure that the dough is properly prepared. The kitchen is well-lit, and the man is wearing an apron. The sound of the dough being kneaded and the oven being preheated can be heard in the background. Overall, the video provides a detailed look at the process of making dough from scratch, and the man's attention to detail is evident in the final product."
    }
}

In [30]:
target_video = "51224e32-3d6c-4148-9eea-7b73da751f25"
goal = get_goal(target_video)
summary = get_summary(target_video)
steps = agregate_steps(target_video)

In [31]:
pred = answers_no_context[target_video]["general_answer"]
calculate_metrics(pred, summary)

BLEU: 0.081   ROUGE: 0.138


In [32]:
pred = answers_no_context[target_video]["detailed_answer"]
calculate_metrics(pred, steps)

BLEU: 0.052   ROUGE: 0.077


In [35]:
answers_with_context = {
    "1938c632-f575-49dd-8ae0-e48dbb467920":{
        "general_answer": "The video shows a person following a recipe to make a rice dish. They start by turning on the stove, getting a plate, and adding oil to a frying pan. They then add ham and fry it before returning it to the refrigerator. The person then gathers ingredients, adds them to the pan, and microwaves the rice. They arrange the utensils on the counter and drink soda before washing the dishes. The person then adds the fried ingredients to the rice and adds toppings before disposing of the plastic bag in the trash. They garnish the rice with the fried ingredient and add cream before washing the dishes again. Finally, they add meat to the rice and serve the meal.",
        "detailed_answer": "The video starts with a person turning on the stove and getting a plate. They then add oil to a frying pan and add ham into it. After that, they get a frying pan lid and regulate the stove heat. The person gathers ingredients from the refrigerator and adds them to the pan. They then add eggs to the pan and wash their hands. The person adds water to the pan and microwaves the rice. They arrange the utensils on the counter and drink soda. The person washes the dishes and adds the fried ingredients to the rice. They add toppings to the rice and dispose of the plastic bag in the trash. The person garnishes the rice with the fried ingredient and adds cream to it. They serve the meal on a plate. Overall, the video shows a person cooking a rice dish by following a recipe. They start by adding ingredients to the pan and cooking them on the stove. They then add eggs and rice to the pan and microwave it. They add toppings and cream to the rice and serve it on a plate. The video provides a step-by-step guide on how to cook a rice dish and highlights the importance of following a recipe to achieve the desired outcome."
    },
    "51224e32-3d6c-4148-9eea-7b73da751f25":{
        "general_answer": "The video shows a person preparing a soup recipe by slicing onions and tomatoes, cooking them in a pot, and adding salt to the mixture. The person also rinses their hands under the tap before and after preparing the soup.",
        "detailed_answer": "The video shows a person preparing a soup recipe in a kitchen. The person starts by slicing onions and tomatoes and pouring them into a pot. They then add more ingredients to the pot and stir them around. After that, the person rinses their hands under the tap. Next, they add salt to the pot and rinse their hands again. The person then ignites the cooking gas and continues to cook the soup. The video provides a step-by-step guide to making the soup, showing the person carefully adding ingredients and stirring them around. The viewer can see the person's hands moving around the pot, and the sound of the cooking gas can be heard in the background. Overall, the video provides a detailed and informative demonstration of how to make a delicious soup recipe."
    },
    "0c192ca8-1ede-4ef0-a05e-2f4151b6bdfc":{
        "general_answer": "The video shows a man making bread in a kitchen. He follows a recipe that involves gathering baking pan covers, cooling the oven with water, putting doughs in the oven, adjusting the baking tray, cooling the oven with water again, putting doughs in the oven, adjusting the baking pan with a rod, taking the doughs out of the oven, removing the dough cover, putting doughs in the oven again, adjusting the baking pan with a rod, kneading the dough in his hands, preparing the dough in a mixer, preparing the dough in a cloth, and preparing wrapped dough in a dough cutter.",
        "detailed_answer": "The video shows a man in a kitchen following a recipe to make bread. He starts by gathering baking pan covers and cooling the oven with water. He then puts the doughs in the oven and adjusts the baking tray. After cooling the oven with water again, he puts the doughs back in the oven and adjusts the baking pan with a rod. He takes the doughs out of the oven, removes the dough cover, and puts the doughs back in the oven. He adjusts the baking pan with a rod again and kneads the dough in his hands. He then prepares the dough in a mixer and in a cloth. Finally, he prepares wrapped dough in a dough cutter. Throughout the video, the man is focused and determined, carefully following each step of the recipe to create delicious bread."
    }
}

In [36]:
pred = answers_with_context[target_video]["general_answer"]
calculate_metrics(pred, summary)

BLEU: 0.05   ROUGE: 0.098


In [37]:
pred = answers_with_context[target_video]["detailed_answer"]
calculate_metrics(pred, steps)

BLEU: 0.104   ROUGE: 0.204


## Examples

### General Questions

In [41]:
answers_no_context[target_video]["general_answer"]

'The video shows a woman cooking in a kitchen. She is seen cutting up food on a pan, frying it, and then mixing it with other ingredients. She also pours water into a pot and stirs it.'

In [42]:
answers_with_context[target_video]["general_answer"]

'The video shows a person preparing a soup recipe by slicing onions and tomatoes, cooking them in a pot, and adding salt to the mixture. The person also rinses their hands under the tap before and after preparing the soup.'

### Detailed Questions

In [43]:
answers_no_context[target_video]["detailed_answer"]

"The video depicts a woman cooking in a kitchen. She is seen cutting up food on a pan and frying it. Afterwards, she pours water into a pot and stirs it. The woman then mixes the food with other ingredients and continues to cook. The video showcases the woman's culinary skills as she prepares a delicious meal. The kitchen is well-equipped with all the necessary tools and ingredients, and the woman appears to be experienced in cooking. The lighting in the kitchen is bright, and the colors of the ingredients are vibrant, making the video visually appealing. Overall, the video is an excellent representation of a woman's culinary skills and the process of cooking a meal."

In [44]:
answers_with_context[target_video]["detailed_answer"]

"The video shows a person preparing a soup recipe in a kitchen. The person starts by slicing onions and tomatoes and pouring them into a pot. They then add more ingredients to the pot and stir them around. After that, the person rinses their hands under the tap. Next, they add salt to the pot and rinse their hands again. The person then ignites the cooking gas and continues to cook the soup. The video provides a step-by-step guide to making the soup, showing the person carefully adding ingredients and stirring them around. The viewer can see the person's hands moving around the pot, and the sound of the cooking gas can be heard in the background. Overall, the video provides a detailed and informative demonstration of how to make a delicious soup recipe."

In [40]:
#target_video = '51224e32-3d6c-4148-9eea-7b73da751f25'#'0c192ca8-1ede-4ef0-a05e-2f4151b6bdfc'#'ac582760-09b1-4a6e-be08-f19f9bf5dfcb'

for video in train_data["videos"]:
    if video['video_uid'] == target_video:
        print(video)

{'video_uid': '51224e32-3d6c-4148-9eea-7b73da751f25', 'start_time': 0.0210286458333333, 'end_time': 531.2876953125, 'goal_category': 'COOKING:COOKING_GENERAL', 'goal_description': 'preparing soup', 'goal_wikihow_url': '', 'summary': ['cutting vegetable', 'adding ingredients', 'putting vegetable in cooking utensils', 'putting meat in cooking utensils', 'pouring oil'], 'is_procedural': True, 'segments': [{'start_time': 0.17508, 'end_time': 8.17507, 'step_category': 'Peel and cut ingredients: Cut onion', 'step_description': 'slice onion with knife', 'is_continued': False, 'is_procedural': False, 'is_relevant': 'essential', 'summary': [], 'segments': []}, {'start_time': 8.37507, 'end_time': 71.333, 'step_category': 'Peel and cut ingredients: Cut tomato', 'step_description': 'slice tomato with knife', 'is_continued': False, 'is_procedural': False, 'is_relevant': 'essential', 'summary': [], 'segments': []}, {'start_time': 71.33335, 'end_time': 91.289, 'step_category': 'Add ingredients to the

In [45]:
video_uids = [x["video_uid"] for x in train_data["videos"]]

In [48]:
" ".join(video_uids)

'39d087b0-afc2-47d8-ba91-b70dd8fab90e 2f6da5f6-e26c-4ac3-8f71-12386f7588e2 09bccca1-368b-4776-9433-3c8835837110 0c192ca8-1ede-4ef0-a05e-2f4151b6bdfc 6639f53c-701d-4fe7-adcd-55d040ce8afe grp-090c6bc0-49da-4d3b-b209-a1a60aeb0317 d9691bde-a0b2-4521-8374-a74f594aaaac 4c642620-db0e-4096-9ece-2b2c6fdb47b0 grp-ab070b36-def2-4ad7-a760-2a9ce29ce505 bbe7b4ee-0e62-4fb5-877e-d4c76a0c8247 952b1fa3-05cc-4c2b-8897-47d02cd598b8 grp-d250521e-5197-44aa-8baa-2f42b24444d2 5cdf77b8-7bf8-421b-99b6-19fa6429aeb4 546a1aed-676d-44ed-a63c-8db89fa4d935 94d5eff8-0fac-4719-adf2-5c0208ab89f7 91e3e6ce-bc01-4720-a490-e319dd380509 1938c632-f575-49dd-8ae0-e48dbb467920 b0a55292-f9a0-4af2-8f2a-bd6d7eff2b2e 51224e32-3d6c-4148-9eea-7b73da751f25 b83285c5-0b88-4ced-a52e-5c34ea371507 grp-2be0151d-8ee7-4e7a-adf5-6b2b3d5afdb0 68205f0f-9b30-4d81-986d-f8816e70bddd 0ebb682e-6aec-482c-bbb7-774ec5eca906 78b06017-cd7b-441c-ba48-33c04e37a82a 1bece8d5-2d0f-47bb-a1f0-4fc3f94970f5 4dcf1151-b0dd-49e8-8e76-3d0075d4c3f7 4371dce1-6911-4366-b3