# Annotation Preprocessing for ActionFormerObj

# 1. Imports

In [1]:
import pandas as pd
import numpy as np
import ast
import math
import numpy as np
import json
import os

## 2. Util functions

In [None]:
def name2int(val, transfo_dict):
    return transfo_dict.get(val)

## 3. Parameters

In [2]:
root_path = "/media/LaCie/Annotation_20240521/raw"
saving_dir = "/media/LaCie/Annotation_20240521/json_style"
A2_p = os.path.join(root_path, "annotations_A2_20_removed_nonvisual.csv")
A1_p = os.path.join(root_path, "annotations_A1_20_removed_nonvisual.csv")


df_A2 = pd.read_csv(A2_p, sep=";", index_col=0)
df_A1 = pd.read_csv(A1_p, sep=";", index_col=0)

df = pd.concat([df_A2, df_A1], ignore_index = True)

dico_df = {"A1": df_A1, "A2":df_A2}
label2use = {"ENHN_S" : {"Easy Negative": 0, "Hard Negative" : 0, "Sure":1}, 
             "EN_HNS":{"Easy Negative": 0, "Hard Negative" : 1, "Sure":1}, 
             "EN_S":{"Easy Negative": 0,  "Sure":1}}
MIN_PER_SCENE = 5

## 4. Json for ActionFormer with the Regression Head

In [None]:
for FILE_EXT, label2int in label2use.items():
    for ANNOTATOR in ['A1', 'A2']:
        print("-----------", FILE_EXT, "    ", ANNOTATOR, "------------")
        df = dico_df[ANNOTATOR].copy()

        df["label_obj"] = df["label"].apply(lambda x: name2int(x, label2int))
        df = df.dropna(subset=["label_obj"])
        
        movies = df.imdb_key.unique()

        total_json = {}
        total_json["version"] = "test_final"
        total_json["database"] = {}
        
        for movie in movies:
            nb_void = 0
            sub_df_movie = df.query("imdb_key == @movie")
            fps = sub_df_movie.iloc[0]['framerate']

            
            start_scenes = np.arange(0, sub_df_movie.end_frame.max()- MIN_PER_SCENE * fps * 60, MIN_PER_SCENE * fps * 60)
            end_scenes = np.arange(MIN_PER_SCENE * fps * 60, sub_df_movie.end_frame.max(), MIN_PER_SCENE * fps * 60)
            
            for num_scene, (ss, es) in enumerate(zip(start_scenes, end_scenes)):
                ss = int(ss)
                es=int(es)
                dico_video = {"duration": int(MIN_PER_SCENE*60), #duration in seconds
                                "fps":fps,
                                "imdb_id":movie,
                                "start_scene":int(ss),
                                "end_scene":int(es)
                                }
                video_name = movie + "_" + str(num_scene).zfill(3)
                annotations = []
                
                # several cases : 
                # (start_frame <= ss and end_frame>= ss and end_frame <= es) or
                # (start_frame >= ss and end_frame <= es) or
                # (start_frame >= ss and end_frame>= es and start_frame <= es) or
                # (start_frame <= ss and end_frame>= es ) 

                scene_df = sub_df_movie.query("(start_frame <= @ss and end_frame>= @ss and end_frame <= @es) or (start_frame >= @ss and end_frame <= @es) or (start_frame >= @ss and end_frame>= @es and start_frame <= @es) or (start_frame <= @ss and end_frame>= @es )")
            

                for i, row in scene_df.iterrows():
                    start_frame = int(row["start_frame"])
                    end_frame = int(row["end_frame"])
                    fps = row["framerate"]

                    dico_video["fps"] = fps
                    dico_video["duration"] = int(MIN_PER_SCENE*60*fps)
                    label_int = row["label_obj"]

                    if label_int == 1:
                        if (start_frame <= ss and end_frame>= ss and end_frame <= es): s = [ss-ss,end_frame-ss]
                        elif (start_frame >= ss and end_frame <= es):s = [start_frame-ss,end_frame-ss]
                        elif(start_frame >= ss and end_frame>= es and start_frame <= es):s = [start_frame-ss,es-ss]
                        elif (start_frame <= ss and end_frame>= es ):s = [ss-ss,es-ss]
                        else: assert False
                        s_sec = [round(float(s[0]/ fps),2), round(float(s[1]/fps),2)]
                        segment_duration = s_sec[1] - s_sec[0]

                        if segment_duration > 0.1:
                            annotations.append({"label": "objectification", "segment": s_sec,"segment(frames)": s,"label_id":int(row["label_obj"]) })
                        else:
                            print("SCENE LESS THAN ONE SECOND", row["clip_index"])
                
                if annotations !=[]:
                    dico_video["annotations"] = annotations
                    total_json["database"][video_name] = dico_video
                else: nb_void +=1
        

            print(movie, " NB scenes without objectification : ", nb_void,"/", len(start_scenes))
   
        saving_file = os.path.join(saving_dir, 'withReg',f"main_{ANNOTATOR}_{FILE_EXT}.json" )
        with open(saving_file, "w") as json_file:
            json.dump(total_json, json_file)

        print("--------------------")


## 3. Json for ActionFormer without the Regression Head

In [None]:
for FILE_EXT, label2int in label2use.items():
    for ANNOTATOR in ['A1', 'A2']:
        print("-----------", FILE_EXT, "    ", ANNOTATOR, "------------")
        df = dico_df[ANNOTATOR].copy()

        df["label_obj"] = df["label"].apply(lambda x: name2int(x, label2int))
        df = df.dropna(subset=["label_obj"])
        
        movies = df.imdb_key.unique()

        total_json = {}
        total_json["version"] = "test_final"
        total_json["database"] = {}
        nb_void = 0
        for movie in movies:
            print("IMDB : ", movie)
            sub_df_movie = df.query("imdb_key == @movie")
            fps = sub_df_movie.iloc[0]['framerate']

            
            start_scenes = np.arange(0, sub_df_movie.end_frame.max()- MIN_PER_SCENE * fps * 60, MIN_PER_SCENE * fps * 60)
            end_scenes = np.arange(MIN_PER_SCENE * fps * 60, sub_df_movie.end_frame.max(), MIN_PER_SCENE * fps * 60)
            
            for num_scene, (ss, es) in enumerate(zip(start_scenes, end_scenes)):
                ss = int(ss)
                es=int(es)
                dico_video = {"duration": int(MIN_PER_SCENE*60), #duration in seconds
                                "fps":fps,
                                "imdb_id":movie,
                                "start_scene":int(ss),
                                "end_scene":int(es)
                                }
                video_name = movie + "_" + str(num_scene).zfill(3)
                annotations = []
                
                # several cases : 
                # (start_frame <= ss and end_frame>= ss and end_frame <= es) or
                # (start_frame >= ss and end_frame <= es) or
                # (start_frame >= ss and end_frame>= es and start_frame <= es) or
                # (start_frame <= ss and end_frame>= es ) 

                scene_df = sub_df_movie.query("(start_frame <= @ss and end_frame>= @ss and end_frame <= @es) or (start_frame >= @ss and end_frame <= @es) or (start_frame >= @ss and end_frame>= @es and start_frame <= @es) or (start_frame <= @ss and end_frame>= @es )")
            

                for i, row in scene_df.iterrows():
                    start_frame = int(row["start_frame"])
                    end_frame = int(row["end_frame"])
                    fps = row["framerate"]

                    dico_video["fps"] = fps
                    dico_video["duration"] = int(MIN_PER_SCENE*60*fps)
                    label_int = row["label_obj"]

                    
                    if (start_frame <= ss and end_frame>= ss and end_frame <= es): s = [ss-ss,end_frame-ss]
                    elif (start_frame >= ss and end_frame <= es):s = [start_frame-ss,end_frame-ss]
                    elif(start_frame >= ss and end_frame>= es and start_frame <= es):s = [start_frame-ss,es-ss]
                    elif (start_frame <= ss and end_frame>= es ):s = [ss-ss,es-ss]
                    else: assert False
                    s_sec = [round(float(s[0]/ fps),2), round(float(s[1]/fps),2)]
                    segment_duration = s_sec[1] - s_sec[0]

                    if segment_duration > 0.1:
                        if label_int  == 1:annotations.append({"label": "objectification", "segment": s_sec,"segment(frames)": s,"label_id":int(row["label_obj"]) })
                        else: 
                    
                            annotations.append({"label": "no_objectification", "segment": s_sec,"segment(frames)": s,"label_id":int(row["label_obj"]) })
                    else:
                        print("SCENE LESS THAN ONE SECOND", row)
                
                if annotations !=[]:
                    dico_video["annotations"] = annotations
                    total_json["database"][video_name] = dico_video
                else: nb_void +=1


        print("NB scenes without objectification : ", nb_void)
        print(ANNOTATOR, MIN_PER_SCENE)
        saving_file = os.path.join(saving_dir, 'withoutReg', f"main_{ANNOTATOR}_{FILE_EXT}.json" )
        with open(saving_file, "w") as json_file:
            json.dump(total_json, json_file)