## Import and main settings

In [41]:
import pandas as pd
import numpy as np
import os
import itertools

In [42]:
max_records = 10000
base_dir=os.path.abspath("../")
output_dir=f"{base_dir}/output"
data_dir = f"{base_dir}/data"
events_dir=f"{data_dir}/events"
teams=  ['cvhunter1', 'cvhunter2','cvhunter3', 'cvhunter4',
        'visione1','visione2','visione3','visione4',
        'vibro1','vibro2','vibro3','vibro4']

if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

## Functions for data processing

In [43]:
# give the dataframe containing all the tasks
def get_task_from_taskname(tasks_df, taskname):
        return tasks_df[tasks_df['name'] == taskname].iloc[0].to_dict()

In [44]:
def get_submissions_count(sub_df):
        wrong_sub=sub_df[sub_df['status']=='WRONG']
        wrong_sub=wrong_sub.groupby(["taskName","team","teamFamily","user"])[['status']].count()
        wrong_sub.columns = ["nWrongSubmissions"]
        correct_sub=sub_df[sub_df['status']=='CORRECT']
        correct_sub=correct_sub.groupby(["taskName","team","teamFamily","user"])[['status']].count()
        correct_sub.columns = ["nCorrectSubmissions"]

        correct_sub=correct_sub.reset_index()
        wrong_sub=wrong_sub.reset_index()
        submissions=correct_sub.merge(wrong_sub, on=["taskName","team","teamFamily","user"], how="outer").fillna(0)
        submissions=submissions.rename(columns={"taskName": "task"})
        return  submissions.reset_index().drop(columns=['index'])

In [45]:
 #create the  dataframe containing results for all teams and tasks
def get_team_values_df(df,sub_df, task_df, max_rank=10000):
        #remove ranks bigger than max_rank
        replace_large_ranks = lambda x: np.inf if x > max_rank  else x
        df['rank_video'] = df['rank_video'].apply(replace_large_ranks)
        df['rank_shot_margin_0'] = df['rank_shot_margin_0'].apply(replace_large_ranks)
        df['rank_shot_margin_5'] = df['rank_shot_margin_5'].apply(replace_large_ranks)

        # for each (team, user, task), find the minimum ranks and the timestamps
        df=df.sort_values('timestamp')
        best_video_df = df.loc[df.groupby(['teamFamily','team', 'user', 'task'])['rank_video'].idxmin()]
        best_shot_df = df.loc[df.groupby(['teamFamily','team', 'user', 'task'])['rank_shot_margin_0'].idxmin()]
        best_shot_df_5secs = df.loc[df.groupby(['teamFamily','team', 'user','task'])['rank_shot_margin_5'].idxmin()]
        # find also the time of first and last appearance of a result in the ranked list
        df_valid_rankshot = df[~df['rank_shot_margin_0'].isin([np.inf, -np.inf])]
        df_valid_rankvideo = df[~df['rank_video'].isin([np.inf, -np.inf])]
        first_appearance_time = df_valid_rankshot.loc[df_valid_rankshot.groupby(['teamFamily','team', 'user', 'task'])['timestamp'].idxmin()]
        first_appearance_time_video = df_valid_rankvideo.loc[df_valid_rankvideo.groupby(['teamFamily','team', 'user', 'task'])['timestamp'].idxmin()]
        last_appearance_time = df_valid_rankshot.loc[df_valid_rankshot.groupby(['teamFamily','team', 'user', 'task'])['timestamp'].idxmax()]

        best_video_df = best_video_df.filter(['teamFamily','team', 'user', 'task', 'rank_video', 'timestamp', 'correct_submission_time_ms']).rename(
                columns={'timestamp': 'timestamp_best_video'})
        best_shot_df = best_shot_df.filter(['teamFamily','team', 'user','task', 'rank_shot_margin_0', 'timestamp']).rename(
                columns={'timestamp': 'timestamp_best_shot'})
        best_shot_df_5secs = best_shot_df_5secs.filter(
                ['teamFamily','team', 'user', 'task', 'rank_shot_margin_5', 'timestamp']).rename(
                columns={'timestamp': 'timestamp_best_shot_5secs'})
        first_appearance_time = first_appearance_time.filter(['teamFamily','team', 'user', 'task', 'timestamp', 'rank_shot_margin_0']).rename(
                columns={'timestamp': 'timestamp_first_appearance', 'rank_shot_margin_0': 'rank_shot_first_appearance'})
        first_appearance_time_video = first_appearance_time_video.filter(['teamFamily','team', 'user', 'task', 'timestamp', 'rank_video']).rename(
                columns={'timestamp': 'timestamp_first_appearance_video', 'rank_video': 'rank_video_first_appearance'})
        last_appearance_time = last_appearance_time.filter(['teamFamily','team', 'user', 'task', 'timestamp', 'rank_shot_margin_0']).rename(
                columns={'timestamp': 'timestamp_last_appearance', 'rank_shot_margin_0': 'rank_shot_last_appearance'})

        #setting best timestamp to np.inf if there is not a best video/shot
        best_video_df.loc[df['rank_video'].isin([np.inf, -np.inf]), 'timestamp_best_video'] = -1
        best_shot_df.loc[df['rank_shot_margin_0'].isin([np.inf, -np.inf]), 'timestamp_best_shot']=-1
        best_shot_df_5secs.loc[df['rank_shot_margin_5'].isin([np.inf, -np.inf]), 'timestamp_best_shot_5secs'] = -1

        df = best_video_df.merge(best_shot_df, on=['teamFamily','team', 'user', 'task'])
        df = df.merge(best_shot_df_5secs, on=['teamFamily','team', 'user','task'])
        df = df.merge(first_appearance_time, on=['teamFamily','team', 'user','task'], how="outer")
        df = df.merge(last_appearance_time, on=['teamFamily','team', 'user','task'], how="outer")
        df = df.merge(first_appearance_time_video, on=['teamFamily','team', 'user','task'], how="outer")

        # convert timestamps in actual seconds from the start of the task
        df['task_start'] = df['task'].apply(lambda x: get_task_from_taskname(task_df,x)['started'])
        df['time_best_video'] = (df['timestamp_best_video'] - df['task_start'])
        df['time_best_shot'] = (df['timestamp_best_shot'] - df['task_start'])
        df['time_first_appearance'] = (df['timestamp_first_appearance'] - df['task_start'])
        df['time_first_appearance_video'] = (df['timestamp_first_appearance_video'] - df['task_start'])
        df['time_last_appearance'] = (df['timestamp_last_appearance'] - df['task_start'])
        df['time_best_shot_margin5'] = (df['timestamp_best_shot_5secs'] - df['task_start'])
        #df['time_correct_submission'] = df.apply(lambda x: runreader.get_csts()[x['teamFamily','team']][x['task']] -
        #                                                   runreader.tasks.get_task_from_taskname(x['task'])[
        #                                                           'started'], axis=1)
        fix_time_fun=lambda x: x / 1000 if x > 0 else np.inf
        df['time_best_video'] = df['time_best_video'].astype(float).apply(fix_time_fun)
        df['time_best_shot'] = df['time_best_shot'].astype(float).apply(fix_time_fun)
        df['time_best_shot_margin5'] = df['time_best_shot_margin5'].astype(float).apply(fix_time_fun)
        df['time_correct_submission'] = df['correct_submission_time_ms'].astype(float).apply(fix_time_fun)
        df['time_first_appearance'] = df['time_first_appearance'].astype(float).apply(fix_time_fun)
        df['time_first_appearance_video'] = df['time_first_appearance_video'].astype(float).apply(fix_time_fun)
        df['time_last_appearance'] = df['time_last_appearance'].astype(float).apply(fix_time_fun)

        df = df.round(decimals=0)

        df = df.filter(['teamFamily','team', 'user', 'task', 'task_start', 'time_correct_submission', 'time_best_video', 'time_best_shot',
                        'time_best_shot_margin5', 'rank_video', 'rank_shot_margin_0', 'rank_shot_margin_5',
                        'rank_shot_margin_10', 'time_first_appearance', 'rank_shot_first_appearance', 'time_last_appearance', 'rank_shot_last_appearance',
                        'time_first_appearance_video', 'rank_video_first_appearance'])

        df.replace([np.inf, -np.inf, np.nan], -1, inplace=True)

        return df



In [46]:
#def aggregate_teamfamily(df):


In [47]:
#penality function used to assess the best user of a team family
#the best user is the one that did
# def compute_user_penalty(df, max_records):
#         rank_video_mod = df['rank_video'].replace(-1, max_records + 1)
#         rank_shot_mod = df['rank_shot_margin_0'].replace(-1, max_records + 1)
#         time_video_mod = df['time_best_video'].replace(-1, 1000)
#         time_shot_mod = df['time_best_shot'].replace(-1, 1000)
#
#         task_duration = 420
#         user_penalty = time_video_mod + \
#                 rank_video_mod * task_duration + \
#                 time_shot_mod * task_duration * max_records + \
#                 rank_shot_mod * (task_duration ** 2) * max_records
#
#         return user_penalty

In [48]:
#for each task we should keep only the best user of the "teamFamily"
#df_results
# the best penalty contribution is given by the ranks. If they are equal, then the user that submitted earlier wins
#user_penalty = compute_user_penalty(total_df, max_records)

# Loading and processing data

In [49]:
#loading tasks
tasks_df = pd.read_csv(f"{data_dir}/tasks.csv")
#loading submissions
submissions_df = pd.read_csv(f"{data_dir}/submissions.csv")
sub_count_df = get_submissions_count(submissions_df)

#loading all teams results
dfs = []
dfs_events = []
for f in os.listdir(events_dir):
    team_df = pd.read_csv(os.path.abspath(f"{events_dir}/{f}"))
    dfs_events.append(team_df)
    team_df = get_team_values_df(team_df, submissions_df, tasks_df, max_records)
    dfs.append(team_df)

df_events=pd.concat(dfs_events,axis=0)
df_results = pd.concat(dfs, axis=0)

#df_results_aggregatedTeamFamily=aggregate_teamfamily(df_results)
print(f"df_events: {df_events.shape}" )
print(f"df_results: {df_results.shape}" )

df_events: (4361, 14)
df_results: (682, 18)


In [50]:
df_results.head()

Unnamed: 0,teamFamily,team,user,task,task_start,time_correct_submission,time_best_video,time_best_shot,time_best_shot_margin5,rank_video,rank_shot_margin_0,rank_shot_margin_5,time_first_appearance,rank_shot_first_appearance,time_last_appearance,rank_shot_last_appearance,time_first_appearance_video,rank_video_first_appearance
0,cvhunter,cvhunter1,1,vbse001,1668157000000.0,24.0,14.0,14.0,14.0,2.0,2.0,2.0,14.0,2.0,15.0,2.0,14.0,2.0
1,cvhunter,cvhunter1,1,vbse002,1668158000000.0,-1.0,178.0,178.0,178.0,96.0,96.0,96.0,8.0,136.0,263.0,135.0,8.0,136.0
2,cvhunter,cvhunter1,1,vbse003,1668158000000.0,87.0,35.0,35.0,35.0,33.0,33.0,33.0,9.0,48.0,35.0,33.0,9.0,46.0
3,cvhunter,cvhunter1,1,vbse004,1668158000000.0,18.0,11.0,11.0,11.0,3.0,3.0,3.0,11.0,3.0,11.0,3.0,11.0,3.0
4,cvhunter,cvhunter1,1,vbse005,1668158000000.0,13.0,9.0,9.0,9.0,2.0,2.0,2.0,9.0,2.0,10.0,2.0,9.0,2.0


In [51]:
df_events.head()

Unnamed: 0,task,team,user,timestamp,elapsed_since_task_start_ms,correct_submission_time_ms,rank_video,rank_shot_margin_0,rank_shot_margin_5,category,type,value,additionals,teamFamily
0,vbse001,cvhunter1,1,1668157262045,13678.0,24219.0,2.0,2.0,2.0,TEXT,JointEmbedding,CLIP: A woman holding a steaming mug close to ...,{},cvhunter
1,vbse001,cvhunter1,1,1668157263040,14673.0,24219.0,2.0,2.0,2.0,TEXT,JointEmbedding,CLIP: A woman holding a steaming mug close to ...,{},cvhunter
2,vbse002,cvhunter1,1,1668157580162,8188.0,,136.0,136.0,136.0,TEXT,JointEmbedding,CLIP: Viewer moving fast backwards on railway ...,{},cvhunter
3,vbse002,cvhunter1,1,1668157580949,8975.0,,136.0,136.0,136.0,TEXT,JointEmbedding,CLIP: Viewer moving fast backwards on railway ...,{},cvhunter
4,vbse002,cvhunter1,1,1668157614097,42123.0,,1161.0,1161.0,1161.0,IMAGE,feedbackModel,Bayes CLIP 15821:2727_16612:962_12565:4812_522...,{},cvhunter


In [52]:
submissions_df.head() #contains submission of each task

Unnamed: 0,taskName,team,teamFamily,user,task_start,task_end,timestamp,sessionID,status
0,vbse001,cvhunter3,cvhunter,3,1668157000000.0,1668158000000.0,1668157260122,d4549039-edba-4f67-be8d-49631b50e3d1,CORRECT
1,vbse001,cvhunter4,cvhunter,4,1668157000000.0,1668158000000.0,1668157265367,af025988-70ba-4e86-8b9d-3004de40a677,CORRECT
2,vbse001,vibro4,vibro,4,1668157000000.0,1668158000000.0,1668157269373,eda40d9d-2a9e-44a7-81ab-fa9a826663ea,CORRECT
3,vbse001,cvhunter1,cvhunter,1,1668157000000.0,1668158000000.0,1668157272586,a2b6d319-7da0-4278-a13b-ae553c892168,CORRECT
4,vbse001,cvhunter2,cvhunter,2,1668157000000.0,1668158000000.0,1668157278121,c08385eb-b1ec-4ed5-ba19-9441104833d0,CORRECT


 ## Render table with all the team results


In [53]:
def TimeRecallTable(df,teams, output_dir=output_dir):

        # drop unuseful columns from df that has the structure of df_results
        df = df.drop(
            ['time_first_appearance', 'rank_shot_first_appearance', 'time_last_appearance', 'rank_shot_last_appearance',
             'time_first_appearance_video', 'rank_video_first_appearance'], axis=1)

        df.drop(columns='task_start', inplace=True)

        df = df.fillna(-1)
        col = [c for c in df.columns.values.tolist() if c != 'team' and c != 'task' and c != 'user' and c != 'teamFamily' ]
        df[col] = df[col].astype('int32')
        df[col] = df[col].applymap(lambda x: -1 if x < 0 else x)
        df = df.astype('str')
        df.replace(['-1'], '-', inplace=True)


        # aggregate
        agg_dic = {c: (lambda x: ' / '.join(x)) for c in col}
        agg_dic['time_correct_submission'] = "min"
        df = df.groupby(['team', 'task'])[col].agg(agg_dic).reset_index()
        df.replace('- / -', '-', regex=True, inplace=True)
        add_second = lambda x: x if x == '-' else x + 's'
        df['time_correct_submission'] = df['time_correct_submission'].apply(add_second)
        df['time_best_shot'] = df['time_best_shot'].apply(add_second)
        df['time_best_video'] = df['time_best_video'].apply(add_second)
        df = df.melt(var_name="metric", id_vars=["team", "task"], value_name="value")
        df['unit'] = df['metric'].apply(lambda x: 'rank' if x.startswith('rank_') else 'time')
        replace_dic = {
            'rank_shot_margin_0': 'correct frame',
            'time_best_shot': 'correct frame',
            'rank_shot_margin_5': 'frame in GT+2x5s',
            'time_best_shot_margin5': 'frame in GT+2x5s',
            'rank_video': 'correct video',
            'time_best_video': 'correct video',
            'time_correct_submission': 'correct submission'
        }
        df['metric'] = df['metric'].map(replace_dic)
        df = df.pivot(index=['team', 'metric', 'unit'], columns="task", values="value")
        df = df.fillna('!')

        # sorting index desired order
        level_0 = teams  # order in the conf file
        level_1 = ['correct frame', 'frame in GT+2x5s', 'correct video','correct submission']
        level_2 = ['rank', 'time']
        df = df.reindex(pd.MultiIndex.from_product([level_0, level_1, level_2]))
        df.dropna(axis=0, inplace=True)  # 'correct submission'/rank shluld not be in the index
        print(df)
        print(f"Saving: {output_dir}/time_recall_table_withMargin5_vbse2022.csv")
        df.to_csv(f"{output_dir}/time_recall_table_withMargin5_vbse2022.csv")
        # sorting index desired order
        level_0 = teams  # order in the conf file
        level_1 = ['correct frame', 'correct video','correct submission']
        level_2 = ['rank', 'time']
        df = df.reindex(pd.MultiIndex.from_product([level_0, level_1, level_2]))
        df.dropna(axis=0, inplace=True)  # 'correct submission'/rank shluld not be in the index
        df.to_csv(f"{output_dir}/time_recall_table_vbse2022.csv")
        print(f"Saving: {output_dir}/time_recall_table_vbse2022.csv")
        return df



In [54]:
time_recall_table_df=TimeRecallTable(df_results, teams)

task                              vbse001 vbse002 vbse003 vbse004 vbse005  \
cvhunter1 correct frame      rank       2      96      33       3       2   
                             time     14s    178s     35s     11s      9s   
          frame in GT+2x5s   rank       2      96      33       3       2   
                             time      14     178      35      11       9   
          correct video      rank       2      96      33       3       2   
...                                   ...     ...     ...     ...     ...   
vibro4    frame in GT+2x5s   rank      47     404       1       6       1   
                             time       6     190     259       6       6   
          correct video      rank      47       4       1       6       1   
                             time      6s    190s     31s      6s      6s   
          correct submission time     21s    216s    263s     16s     22s   

task                              vbse006 vbse007 vbse008 vbse009 vbse010  

## COMPUTING DRES SCORES
both using original teams and teams aggregated by 'teamFamily'

In [55]:
#adding ncorrect and wrong submissions
#df_res_sub=df_results.merge(sub_count_df, on=["task","team","teamFamily","user"], how="outer").fillna(0)
#df_res_sub

In [56]:
def dres_KIS_score(index_firstCorrect,time_correct_submission, tDur):
        #todo: to be checked I used sa formula I found at https://github.com/dres-dev/DRES/blob/37bfa448852a090c564b7519b8c08292f71ede36/backend/src/main/kotlin/dev/dres/run/score/scorer/KisTaskScorer.kt
        #index of first correct submission is -1 if there are no correct submission ortherwise is equal to  number of not correct submissions
        #tDur=  actual duration of task, in case it was extended during competition.
        maxPointsPerTask = 100.0
        maxPointsAtTaskEnd = 50.0
        penaltyPerWrongSubmission = 10.0

        score=0.0
        if index_firstCorrect>-1:
                timeFraction =1.0-time_correct_submission/tDur
                score=  max(0.0,maxPointsAtTaskEnd + ((maxPointsPerTask - maxPointsAtTaskEnd) * timeFraction) - (index_firstCorrect * penaltyPerWrongSubmission) )
        return score

def compute_team_scores(df,sub_df, aggregate_by_teamFamily=False): ##NOT WORKING!!!!!!TO BE FIXED
        scores=[]
        task_lists=df['task'].unique()
        if(aggregate_by_teamFamily):
                df['team']=df['teamFamily']

        teams_lists=df['team'].unique()
        print(teams_lists)

        for task,team in itertools.product(task_lists, teams_lists):
                sub_team_task=sub_df.loc[(sub_df['team'].str.startswith(team)) & (sub_df['taskName']==task) ]
                sub_team_task=sub_team_task.sort_values(by=['timestamp']).reset_index()
                score_team_task=0.0

                if len(sub_team_task.index)>0:
                        correct_sub=sub_team_task [sub_team_task['status']=='CORRECT']
                        if(len(correct_sub.index)>0):
                                firstCorrect=correct_sub.iloc[0]
                                timestamp=firstCorrect['timestamp']
                                time_correct_submission=timestamp-firstCorrect['task_start'] #milliseconds
                                tDur=firstCorrect['task_end']-firstCorrect['task_start']
                                index_firstCorrect=firstCorrect['index']
                                score_team_task=dres_KIS_score(index_firstCorrect,time_correct_submission, tDur)

                scores.append({'team': team, 'task':task, 'score': score_team_task })
        return pd.DataFrame(scores)

In [57]:
##NOT WORKING!!!!!!TO BE FIXED

team_scores_df=compute_team_scores(df_results,submissions_df)
teamFamily_scores_df=compute_team_scores(df_results,submissions_df, aggregate_by_teamFamily=True)

['cvhunter1' 'cvhunter2' 'cvhunter3' 'cvhunter4' 'vibro1' 'vibro2'
 'vibro3' 'vibro4' 'visione1' 'visione2' 'visione3' 'visione4']
['cvhunter' 'vibro' 'visione']


In [58]:
team_scores_df

Unnamed: 0,team,task,score
0,cvhunter1,vbse001,65.963594
1,cvhunter2,vbse001,55.041116
2,cvhunter3,vbse001,98.040879
3,cvhunter4,vbse001,87.166733
4,vibro1,vbse001,0.000000
...,...,...,...
679,vibro4,vbse060,0.000000
680,visione1,vbse060,0.000000
681,visione2,vbse060,0.000000
682,visione3,vbse060,0.000000


In [59]:
teamFamily_scores_df

Unnamed: 0,team,task,score
0,cvhunter,vbse001,98.040879
1,vibro,vbse001,76.499082
2,visione,vbse001,16.003827
3,cvhunter,vbse002,0.000000
4,vibro,vbse002,0.000000
...,...,...,...
166,vibro,vbse059,0.000000
167,visione,vbse059,0.000000
168,cvhunter,vbse060,0.000000
169,vibro,vbse060,0.000000
