In [1]:
#loade gemini_pred.csv

import pandas as pd
import numpy as np

df = pd.read_csv("gemini_pred.csv", sep=",")
    

In [2]:
df.head()

Unnamed: 0,video_id,question,true answer,text answer,prediction,correct
0,0074f737-11cb-497d-8d07-77c3a8127391,{'question': 'Taking into account all the acti...,3,The primary objective and focus within the vi...,3,True
1,00b9a0de-c59e-49cb-a127-6081e2fb8c8e,{'question': 'What was the primary purpose of ...,4,The primary purpose of the cup of water in th...,0,False
2,00f93e1e-cf4e-4835-88b4-4ad68216e86f,{'question': 'What is the overarching theme of...,1,The video shows two people playing a board ga...,0,False
3,00faf954-74f7-4aa3-8b29-4a5dff4f9518,{'question': 'What is the primary sequence of ...,4,"The answer is ""Option 1"". \n\nThe video shows...",1,False
4,011b8b73-0ce4-4843-95ef-33b79610d212,"{'question': ""What can be deduced about c's le...",3,C made several adjustments throughout the vid...,0,False


In [3]:
# Correcting the pivoting process
# We will create a new column to differentiate between normal, 4x, and 10x speeds based on the video_id
def categorize_video_speed(video_id):
    if '4x' in video_id:
        return '4x_speed'
    elif '10x' in video_id:
        return '10x_speed'
    else:
        return 'normal'

# Applying the categorization
df['speed_category'] = df['video_id'].apply(categorize_video_speed)

# First, we'll modify the video IDs to align them by removing the speed-up prefixes
df['aligned_video_id'] = df['video_id'].str.replace('sped_up_4x_', '').str.replace('sped_up_10x_', '')

# Now, we'll pivot the table to align the predictions for each video across different speeds
pivoted_data = df.pivot(index='aligned_video_id', columns='speed_category', values='prediction')

pivoted_data.head()

speed_category,10x_speed,4x_speed,normal
aligned_video_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0074f737-11cb-497d-8d07-77c3a8127391,3.0,3.0,3.0
00b9a0de-c59e-49cb-a127-6081e2fb8c8e,0.0,0.0,0.0
00f93e1e-cf4e-4835-88b4-4ad68216e86f,0.0,0.0,0.0
00faf954-74f7-4aa3-8b29-4a5dff4f9518,1.0,1.0,1.0
011b8b73-0ce4-4843-95ef-33b79610d212,0.0,0.0,0.0


In [4]:
# drop the row with nan vales from pivoted_data
pivoted_data = pivoted_data.dropna()

In [5]:
# Counting how many videos have the same prediction in no_speed_up and 4x speed up
same_prediction_normal_4x = (pivoted_data['normal'] == pivoted_data['4x_speed']).sum()
same_prediction_normal_10x = (pivoted_data['normal'] == pivoted_data['10x_speed']).sum()

print("Number of videos with the same prediction in normal and 4x speed up: {}".format(same_prediction_normal_4x))
print("Number of videos with the same prediction in normal and 10x speed up: {}".format(same_prediction_normal_10x))

Number of videos with the same prediction in normal and 4x speed up: 73
Number of videos with the same prediction in normal and 10x speed up: 78


In [6]:
ground_truth = df.groupby('aligned_video_id')['true answer'].first()
pivoted_data = pivoted_data.join(ground_truth)
pivoted_data.head()

Unnamed: 0_level_0,10x_speed,4x_speed,normal,true answer
aligned_video_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0074f737-11cb-497d-8d07-77c3a8127391,3.0,3.0,3.0,3
00b9a0de-c59e-49cb-a127-6081e2fb8c8e,0.0,0.0,0.0,4
00f93e1e-cf4e-4835-88b4-4ad68216e86f,0.0,0.0,0.0,1
00faf954-74f7-4aa3-8b29-4a5dff4f9518,1.0,1.0,1.0,4
011b8b73-0ce4-4843-95ef-33b79610d212,0.0,0.0,0.0,3


In [8]:
from scipy import stats

# Re-defining the function to calculate the 95% confidence interval for proportion
def calculate_proportion_confidence_interval(correct, total):
    """ Calculate the 95% confidence interval for a proportion. """
    proportion = correct / total
    se = np.sqrt(proportion * (1 - proportion) / total)  # Standard error
    ci = se * stats.t.ppf((1 + 0.95) / 2., total-1)  # t-critical value for 95% CI
    return ci

In [12]:
# check how many of 10x speed up videos are predicted correctly
correct_10x_predictions = (pivoted_data['10x_speed'] == pivoted_data['true answer']).sum()
total_10x_videos = pivoted_data['10x_speed'].count()

# check how many of 4x speed up videos are predicted correctly
correct_4x_predictions = (pivoted_data['4x_speed'] == pivoted_data['true answer']).sum()
total_4x_videos = pivoted_data['4x_speed'].count()

# check how many of normal speed videos are predicted correctly
correct_normal_predictions = (pivoted_data['normal'] == pivoted_data['true answer']).sum()
total_normal_videos = pivoted_data['normal'].count()

print("10x speed up - Correct predictions: {}/{}".format(correct_10x_predictions, total_10x_videos))
print("4x speed up - Correct predictions: {}/{}".format(correct_4x_predictions, total_4x_videos))
print("Normal speed - Correct predictions: {}/{}".format(correct_normal_predictions, total_normal_videos))

# Calculating the confidence intervals
ci_10x_speed = calculate_proportion_confidence_interval(correct_10x_predictions, total_10x_videos)
ci_4x_speed = calculate_proportion_confidence_interval(correct_4x_predictions, total_4x_videos)
ci_normal_speed = calculate_proportion_confidence_interval(correct_normal_predictions, total_normal_videos)

print(f"95\% Confidence: +- {((ci_10x_speed + ci_4x_speed + ci_normal_speed)/3 * 100):.2f}")

(ci_10x_speed, ci_4x_speed, ci_normal_speed)

10x speed up - Correct predictions: 52/100
4x speed up - Correct predictions: 48/100
Normal speed - Correct predictions: 59/100
95\% Confidence: +- 9.86


(0.09913144712447916, 0.09913144712447916, 0.09759039812846866)