In [226]:
import pandas as pd
import json
import numpy as np
import math

In [227]:
with open('tt0822832.json', 'r') as f:
    json_file = json.load(f)

In [228]:
oby_gaze_df = pd.read_csv('../ObyGaze12.csv', delimiter=';').iloc[1:][['label', 'movie', 'id']]
oby_gaze_df = oby_gaze_df[oby_gaze_df['movie']=='tt0822832'] #
oby_gaze_df['scene'] = oby_gaze_df.apply(lambda x: f'{x['movie']}_scene_{int(x['id'].split('-')[1])}', axis=1)

In [229]:
oby_gaze_df

Unnamed: 0,label,movie,id,scene
752,Easy Neg,tt0822832,tt0822832-001,tt0822832_scene_1
753,Easy Neg,tt0822832,tt0822832-002,tt0822832_scene_2
754,Not Sure,tt0822832,tt0822832-003,tt0822832_scene_3
755,Easy Neg,tt0822832,tt0822832-004,tt0822832_scene_4
756,Hard Neg,tt0822832,tt0822832-005,tt0822832_scene_5
...,...,...,...,...
867,Easy Neg,tt0822832,tt0822832-116,tt0822832_scene_116
868,Easy Neg,tt0822832,tt0822832-117,tt0822832_scene_117
869,Hard Neg,tt0822832,tt0822832-118,tt0822832_scene_118
870,Hard Neg,tt0822832,tt0822832-119,tt0822832_scene_119


In [230]:
scenes_list = []
for key in json_file.keys():
    if key != 'film':
        scene = json_file[key]['scene']
        for detection in json_file[key]['detections']:
            for people in detection:
                scenes_list.append({
                    'scene': scene,
                    'frame': people['Img_ID'],
                    'ID': people['ID'],
                    'Confidence': people['Confidence'],
                    'Dimensions': people['Dimensions'],
                    'img_width': people['Img_width'],
                    'img_height': people['Img_height']
                })

In [231]:
df = pd.DataFrame(scenes_list)

In [232]:
df = df[df['Dimensions'].apply(len) > 0] # where there is at least one person

In [233]:
df.loc[:, 'scene-frame'] = df.apply(lambda x: f"{x['scene']}_{x['frame']}", axis=1)
df.loc[:, 'x'] = df['Dimensions'].apply(lambda x: x[0])
df.loc[:, 'y'] = df['Dimensions'].apply(lambda x: x[1])
df.loc[:, 'width'] = df['Dimensions'].apply(lambda x: x[2]-x[0])
df.loc[:, 'height'] = df['Dimensions'].apply(lambda x: x[3]-x[1])
df.loc[:, 'area'] = df['width']*df['height']
df.loc[:, 'area_norm'] = df['area']/(df['img_width']*df['img_height'])



In [234]:
df['centre_img_x'] = df['img_width']/2 
df['centre_img_y'] = df['img_height']/2 
df.loc[:, 'offset_x'] = (df['centre_img_x']-(df['x']+df['width']/2))/df['img_width']
df.loc[:, 'offset_y'] = (df['centre_img_y']-(df['y']+df['height']/2))/df['img_height']
df.loc[:, 'offset'] = (np.sqrt(df['offset_x']**2 + df['offset_y']**2) / math.sqrt(2))*2

In [235]:
df = df.drop(['Dimensions', 'area', 'offset_x', 'offset_y', 'centre_img_x', 'centre_img_y', 'area', 'img_width', 'img_height'], axis=1)

In [236]:
df

Unnamed: 0,scene,frame,ID,Confidence,scene-frame,x,y,width,height,area_norm,offset
0,tt0822832_scene_10,0,00,0.927024,tt0822832_scene_10_0,265,0,567,358,0.661796,0.203373
1,tt0822832_scene_10,0,01,0.302094,tt0822832_scene_10_0,771,50,80,80,0.020866,0.730334
2,tt0822832_scene_10,1,10,0.925233,tt0822832_scene_10_1,262,0,573,360,0.672535,0.203335
3,tt0822832_scene_10,2,20,0.920848,tt0822832_scene_10_2,260,0,573,360,0.672535,0.200015
4,tt0822832_scene_10,2,21,0.541253,tt0822832_scene_10_2,763,9,88,122,0.035003,0.765946
...,...,...,...,...,...,...,...,...,...,...,...
331465,tt0822832_scene_98,999,9993,0.355406,tt0822832_scene_98_999,682,135,111,149,0.053922,0.529879
331466,tt0822832_scene_98,1000,10000,0.902542,tt0822832_scene_98_1000,541,104,228,252,0.187324,0.427861
331467,tt0822832_scene_98,1000,10001,0.615886,tt0822832_scene_98_1000,429,39,106,313,0.108170,0.111121
331468,tt0822832_scene_98,1000,10002,0.480744,tt0822832_scene_98_1000,676,135,94,174,0.053326,0.519860


In [237]:
grouped_df = df.groupby('scene-frame').agg({
    'ID': 'count',
    'offset': 'mean',
    'area_norm': 'mean'
})

In [238]:
df = pd.merge(df, grouped_df[['ID', 'offset', 'area_norm']].rename(columns={'ID': 'num_people', 'offset': 'offset_diff', 'area_norm': 'people_size_diff'}), on='scene-frame', how='inner')
df['people_size_diff'] = 1 - df['people_size_diff']
df['num_people_diff'] = 1-1/(df['num_people']+1)


In [239]:
df = pd.merge(df, oby_gaze_df[['scene', 'label']], on='scene', how='inner')

In [244]:
df.loc[:, 'Objectification'] = df['label'] == 'Sure'

In [245]:
df

Unnamed: 0,scene,frame,ID,Confidence,scene-frame,x,y,width,height,area_norm,offset,num_people,offset_diff,people_size_diff,num_people_diff,label,Objectification
0,tt0822832_scene_10,0,00,0.927024,tt0822832_scene_10_0,265,0,567,358,0.661796,0.203373,2,0.466853,0.658669,0.666667,Sure,True
1,tt0822832_scene_10,0,01,0.302094,tt0822832_scene_10_0,771,50,80,80,0.020866,0.730334,2,0.466853,0.658669,0.666667,Sure,True
2,tt0822832_scene_10,1,10,0.925233,tt0822832_scene_10_1,262,0,573,360,0.672535,0.203335,1,0.203335,0.327465,0.500000,Sure,True
3,tt0822832_scene_10,2,20,0.920848,tt0822832_scene_10_2,260,0,573,360,0.672535,0.200015,2,0.482981,0.646231,0.666667,Sure,True
4,tt0822832_scene_10,2,21,0.541253,tt0822832_scene_10_2,763,9,88,122,0.035003,0.765946,2,0.482981,0.646231,0.666667,Sure,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
324158,tt0822832_scene_98,999,9993,0.355406,tt0822832_scene_98_999,682,135,111,149,0.053922,0.529879,4,0.303726,0.870063,0.800000,Hard Neg,False
324159,tt0822832_scene_98,1000,10000,0.902542,tt0822832_scene_98_1000,541,104,228,252,0.187324,0.427861,4,0.308287,0.882707,0.800000,Hard Neg,False
324160,tt0822832_scene_98,1000,10001,0.615886,tt0822832_scene_98_1000,429,39,106,313,0.108170,0.111121,4,0.308287,0.882707,0.800000,Hard Neg,False
324161,tt0822832_scene_98,1000,10002,0.480744,tt0822832_scene_98_1000,676,135,94,174,0.053326,0.519860,4,0.308287,0.882707,0.800000,Hard Neg,False
