## Import and main settings

In [1]:
import os
from utils_result_analysis import *

In [2]:
max_records = 10000
base_dir=os.path.abspath("../")
output_dir=f"{base_dir}/output"
data_dir = f"{base_dir}/data"
events_dir=f"{data_dir}/events"
teams_list=  ['cvhunter1', 'cvhunter2','cvhunter3', 'cvhunter4',
        'visione1','visione2','visione3','visione4',
        'vibro1','vibro2','vibro3','vibro4'] #teams used in the analysis
if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

# Loading and processing data

In [3]:
#loading tasks
tasks_df = pd.read_csv(f"{data_dir}/tasks.csv")
task_list=tasks_df['name'].unique()
#loading submissions
submissions_df = pd.read_csv(f"{data_dir}/submissions.csv")

#loading all teams results (we have a csv file for each time. We read these files and create  dataframes with all team logs)
dfs = []
dfs_events = []
for f in os.listdir(events_dir):
    team_df = pd.read_csv(os.path.abspath(f"{events_dir}/{f}"))
    dfs_events.append(team_df)
    team_df = get_team_values_df(team_df, tasks_df, max_records)
    dfs.append(team_df)

df_events=pd.concat(dfs_events,axis=0) # dataframe with one row for each query and best rank info
df_results = pd.concat(dfs, axis=0) # dataframe with one row for each task-team-user
df_events=df_events[df_events['team'].isin(teams_list)]
df_results=df_results[df_results['team'].isin(teams_list)]
teams_family_list=df_events['teamFamily'].unique()
print(f" ** Teams considered in the analysis: ** {teams_list}")
print(f" ** Team Families considered in the analysis: ** {teams_family_list}")
print(f"df_events: {df_events.shape}" )
print(f"df_results: {df_results.shape}" )



 ** Teams considered in the analysis: ** ['cvhunter1', 'cvhunter2', 'cvhunter3', 'cvhunter4', 'visione1', 'visione2', 'visione3', 'visione4', 'vibro1', 'vibro2', 'vibro3', 'vibro4']
 ** Team Families considered in the analysis: ** ['cvhunter' 'vibro' 'visione']
df_events: (4361, 14)
df_results: (682, 18)


In [4]:
df_results.head()

Unnamed: 0,teamFamily,team,user,task,task_start,time_correct_submission,time_best_video,time_best_shot,time_best_shot_margin5,rank_video,rank_shot_margin_0,rank_shot_margin_5,time_first_appearance,rank_shot_first_appearance,time_last_appearance,rank_shot_last_appearance,time_first_appearance_video,rank_video_first_appearance
0,cvhunter,cvhunter1,1,vbse001,1668157000000.0,24.0,14.0,14.0,14.0,2.0,2.0,2.0,14.0,2.0,15.0,2.0,14.0,2.0
1,cvhunter,cvhunter1,1,vbse002,1668158000000.0,-1.0,178.0,178.0,178.0,96.0,96.0,96.0,8.0,136.0,263.0,135.0,8.0,136.0
2,cvhunter,cvhunter1,1,vbse003,1668158000000.0,87.0,35.0,35.0,35.0,33.0,33.0,33.0,9.0,48.0,35.0,33.0,9.0,46.0
3,cvhunter,cvhunter1,1,vbse004,1668158000000.0,18.0,11.0,11.0,11.0,3.0,3.0,3.0,11.0,3.0,11.0,3.0,11.0,3.0
4,cvhunter,cvhunter1,1,vbse005,1668158000000.0,13.0,9.0,9.0,9.0,2.0,2.0,2.0,9.0,2.0,10.0,2.0,9.0,2.0


In [5]:
df_events.head()

Unnamed: 0,task,team,user,timestamp,elapsed_since_task_start_ms,correct_submission_time_ms,rank_video,rank_shot_margin_0,rank_shot_margin_5,category,type,value,additionals,teamFamily
0,vbse001,cvhunter1,1,1668157262045,13678.0,24219.0,2.0,2.0,2.0,TEXT,JointEmbedding,CLIP: A woman holding a steaming mug close to ...,{},cvhunter
1,vbse001,cvhunter1,1,1668157263040,14673.0,24219.0,2.0,2.0,2.0,TEXT,JointEmbedding,CLIP: A woman holding a steaming mug close to ...,{},cvhunter
2,vbse002,cvhunter1,1,1668157580162,8188.0,,136.0,136.0,136.0,TEXT,JointEmbedding,CLIP: Viewer moving fast backwards on railway ...,{},cvhunter
3,vbse002,cvhunter1,1,1668157580949,8975.0,,136.0,136.0,136.0,TEXT,JointEmbedding,CLIP: Viewer moving fast backwards on railway ...,{},cvhunter
4,vbse002,cvhunter1,1,1668157614097,42123.0,,1161.0,1161.0,1161.0,IMAGE,feedbackModel,Bayes CLIP 15821:2727_16612:962_12565:4812_522...,{},cvhunter


In [6]:
submissions_df.head() #contains submission of each task

Unnamed: 0,taskName,team,teamFamily,user,task_start,task_end,timestamp,sessionID,status
0,vbse001,cvhunter3,cvhunter,3,1668157000000.0,1668158000000.0,1668157260122,d4549039-edba-4f67-be8d-49631b50e3d1,CORRECT
1,vbse001,cvhunter4,cvhunter,4,1668157000000.0,1668158000000.0,1668157265367,af025988-70ba-4e86-8b9d-3004de40a677,CORRECT
2,vbse001,vibro4,vibro,4,1668157000000.0,1668158000000.0,1668157269373,eda40d9d-2a9e-44a7-81ab-fa9a826663ea,CORRECT
3,vbse001,cvhunter1,cvhunter,1,1668157000000.0,1668158000000.0,1668157272586,a2b6d319-7da0-4278-a13b-ae553c892168,CORRECT
4,vbse001,cvhunter2,cvhunter,2,1668157000000.0,1668158000000.0,1668157278121,c08385eb-b1ec-4ed5-ba19-9441104833d0,CORRECT


## COMPUTING DRES SCORES
both using original teams and teams aggregated by 'teamFamily'

In [7]:
print(f"Computing scores for the following teams: {teams_list}")
team_scores_df=compute_team_scores(task_list, teams_list,submissions_df)
team_scores_df= team_scores_df.rename(columns={'score':'score'})

print(f"Computing scores for the following teams: {teams_family_list}")
team_aggregated_by_family_scores_df=compute_team_scores(task_list, teams_family_list,submissions_df)
team_aggregated_by_family_scores_df= team_aggregated_by_family_scores_df.rename(columns={'team': 'teamFamily','score':'score_family'})

#adding scores to the  df_resuls
df_results=pd.merge(df_results,team_scores_df,on=['team', 'task'])
df_results=pd.merge(df_results,team_aggregated_by_family_scores_df,on=['teamFamily', 'task'])

df_results

Computing scores for the following teams: ['cvhunter1', 'cvhunter2', 'cvhunter3', 'cvhunter4', 'visione1', 'visione2', 'visione3', 'visione4', 'vibro1', 'vibro2', 'vibro3', 'vibro4']
Computing scores for the following teams: ['cvhunter' 'vibro' 'visione']


Unnamed: 0,teamFamily,team,user,task,task_start,time_correct_submission,time_best_video,time_best_shot,time_best_shot_margin5,rank_video,rank_shot_margin_0,rank_shot_margin_5,time_first_appearance,rank_shot_first_appearance,time_last_appearance,rank_shot_last_appearance,time_first_appearance_video,rank_video_first_appearance,score,score_family
0,cvhunter,cvhunter1,1,vbse001,1.668157e+12,24.0,14.0,14.0,14.0,2.0,2.0,2.0,14.0,2.0,15.0,2.0,14.0,2.0,95.963594,98.040879
1,cvhunter,cvhunter2,2,vbse001,1.668157e+12,30.0,14.0,14.0,14.0,2.0,2.0,2.0,14.0,2.0,14.0,2.0,14.0,2.0,95.041116,98.040879
2,cvhunter,cvhunter3,3,vbse001,1.668157e+12,12.0,8.0,8.0,8.0,2.0,2.0,2.0,8.0,2.0,9.0,2.0,8.0,2.0,98.040879,98.040879
3,cvhunter,cvhunter4,4,vbse001,1.668157e+12,17.0,14.0,14.0,14.0,1.0,1.0,1.0,14.0,1.0,14.0,1.0,14.0,1.0,97.166733,98.040879
4,cvhunter,cvhunter1,1,vbse002,1.668158e+12,-1.0,178.0,178.0,178.0,96.0,96.0,96.0,8.0,136.0,263.0,135.0,8.0,136.0,0.000000,94.330686
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
677,visione,visione4,4,vbse059,1.668179e+12,239.0,205.0,205.0,205.0,3.0,4.0,4.0,56.0,1815.0,227.0,4.0,6.0,1768.0,60.085699,86.896841
678,visione,visione1,1,vbse060,1.668180e+12,49.0,8.0,8.0,8.0,1.0,8.0,8.0,8.0,8.0,39.0,8.0,8.0,1.0,91.765720,82.605364
679,visione,visione2,2,vbse060,1.668180e+12,44.0,7.0,13.0,13.0,1.0,8.0,8.0,7.0,10.0,13.0,8.0,7.0,1.0,82.605364,82.605364
680,visione,visione3,3,vbse060,1.668180e+12,73.0,6.0,6.0,6.0,1.0,8.0,8.0,6.0,8.0,6.0,8.0,6.0,1.0,87.857824,82.605364


In [8]:
df_results.columns

Index(['teamFamily', 'team', 'user', 'task', 'task_start',
       'time_correct_submission', 'time_best_video', 'time_best_shot',
       'time_best_shot_margin5', 'rank_video', 'rank_shot_margin_0',
       'rank_shot_margin_5', 'time_first_appearance',
       'rank_shot_first_appearance', 'time_last_appearance',
       'rank_shot_last_appearance', 'time_first_appearance_video',
       'rank_video_first_appearance', 'score', 'score_family'],
      dtype='object')

 ## Render and save table with all the team results


In [9]:
#cosidering all distinct teams
team_time_recall_table_df=TimeRecallTable(df_results, teams_list)
team_time_recall_table_df.to_csv(f"{output_dir}/team_time_recall_table_vbse2022.csv")
print(f"Saving: {output_dir}/team_time_recall_table_vbse2022.csv")
team_time_recall_table_df

Saving: D:\Work\Dev\workspacePYTHON\PycharmProjects\VBSE-2022/output/team_time_recall_table_vbse2022.csv


Unnamed: 0,Unnamed: 1,task,vbse001,vbse002,vbse003,vbse004,vbse005,vbse006,vbse007,vbse008,vbse009,vbse010,...,vbse051,vbse052,vbse053,vbse054,vbse055,vbse056,vbse057,vbse058,vbse059,vbse060
cvhunter1,correct frame,rank,2,96,33,3,2,105,6,4069,12,173,...,253,2,3,4949,-,-,-,26,135,1
cvhunter1,correct frame,time,14s,178s,35s,11s,9s,9s,9s,8s,8s,66s,...,233s,119s,57s,245s,-,-,-,5s,94s,88s
cvhunter1,correct video,rank,2,96,33,3,2,42,6,17,12,81,...,246,2,3,656,5,21,19,16,11,1
cvhunter1,correct video,time,14s,178s,35s,11s,9s,9s,9s,112s,8s,276s,...,116s,119s,57s,101s,7s,292s,198s,5s,94s,88s
cvhunter1,correct submission,time,24s,-,87s,18s,13s,49s,13s,196s,24s,-,...,-,138s,64s,-,74s,-,205s,38s,140s,98s
cvhunter2,correct frame,rank,2,136,48,3,2,5,6,134,12,24,...,23,26,2404,1922,-,-,-,1,362,89
cvhunter2,correct frame,time,14s,9s,9s,8s,12s,161s,9s,43s,8s,103s,...,64s,7s,8s,219s,-,-,-,59s,92s,48s
cvhunter2,correct video,rank,2,136,46,3,2,1,6,6,12,24,...,13,20,226,138,5,2,610,1,125,44
cvhunter2,correct video,time,14s,9s,9s,8s,12s,190s,9s,129s,8s,103s,...,64s,7s,8s,219s,7s,123s,169s,59s,7s,48s
cvhunter2,correct submission,time,30s,34s,58s,14s,17s,210s,14s,-,14s,121s,...,91s,104s,34s,283s,32s,143s,-,168s,-,67s


In [11]:
#considering the best "user" for each team family
df_results_best_users= df_results.loc[df_results.groupby(['teamFamily', 'task'])['score'].idxmax()]
df_results_best_users=df_results_best_users.drop(['team'], axis=1).rename(columns={'teamFamily':'team'})
bestUser_time_recall_table_df=TimeRecallTable(df_results_best_users, teams_family_list)
bestUser_time_recall_table_df.to_csv(f"{output_dir}/bestUser_time_recall_table_vbse2022.csv")
print(f"Saving: {output_dir}/bestUser_time_recall_table_vbse2022.csv")
bestUser_time_recall_table_df

Saving: D:\Work\Dev\workspacePYTHON\PycharmProjects\VBSE-2022/output/bestUser_time_recall_table_vbse2022.csv


Unnamed: 0,Unnamed: 1,task,vbse001,vbse002,vbse003,vbse004,vbse005,vbse006,vbse007,vbse008,vbse009,vbse010,...,vbse051,vbse052,vbse053,vbse054,vbse055,vbse056,vbse057,vbse058,vbse059,vbse060
cvhunter,correct frame,rank,2,136,48,3,2,105,6,21,12,24,...,23,26,2404,6575,-,-,-,26,135,90
cvhunter,correct frame,time,8s,9s,8s,8s,8s,9s,7s,85s,8s,103s,...,64s,6s,8s,113s,-,-,-,5s,94s,55s
cvhunter,correct video,rank,2,136,46,3,2,42,6,3,12,24,...,13,20,226,2000,5,117,19,16,11,8
cvhunter,correct video,time,8s,9s,8s,8s,8s,9s,7s,85s,8s,103s,...,64s,6s,8s,113s,7s,22s,198s,5s,94s,55s
cvhunter,correct submission,time,12s,34s,33s,14s,11s,49s,12s,102s,14s,121s,...,91s,65s,34s,205s,32s,59s,205s,38s,140s,63s
vibro,correct frame,rank,47,1,-,6,-,10,27,1,-,159,...,51,-,-,-,-,19,9399,1,863,54
vibro,correct frame,time,6s,132s,-,3s,-,12s,6s,121s,-,79s,...,7s,-,-,-,-,89s,52s,49s,8s,54s
vibro,correct video,rank,47,1,1,6,1,10,27,1,99,159,...,19,3150,45,1,1,8,14,1,79,9
vibro,correct video,time,6s,132s,8s,3s,2s,12s,6s,121s,11s,79s,...,7s,17s,6s,29s,6s,82s,41s,49s,8s,8s
vibro,correct submission,time,21s,146s,68s,14s,16s,23s,15s,127s,35s,101s,...,17s,29s,19s,80s,48s,146s,87s,110s,105s,61s
