In [19]:
import os
import argparse
import numpy as np
import pandas as pd
import time
import pickle
import json
from tqdm.notebook import tqdm
from numpy.lib.format import open_memmap
from scipy.stats import multivariate_normal, entropy
from scripts.data import *
from scripts.preprocessing import *
from scripts.utils import *
from scripts.main import *
from scripts.saliency_metrics import *

# auxiliar funcs
def create_saliency_matrix(video_path, saliency_path ='../videos_sal/vinet'):
    folder = video_path #videos_data.loc[video].Video[:-4]
    maps = []
    for img in sorted(os.listdir(os.path.join(saliency_path, folder))):
        if img[0] == '.':
            continue
        maps.append(cv2.imread(os.path.join(saliency_path, folder, img), cv2.IMREAD_GRAYSCALE))
    return np.stack(maps, axis=2)

# add frame idx
def add_frame_idx(df,
                  vid_name,
                  data_path,
                  trials_data,
                  videos_data):
    
    dfs_list = []
    for idx in df.ID.unique():
    
        df_aux       = df[df.ID==idx].copy()
        et_file      = df_aux.ET_FILE.iloc[0]
        df_et        = pd.read_csv(os.path.join(data_path,et_file[2:]))
        frame_timest = videos_data.loc[vid_name,'FramesTimestamps']
        trial_init   = trials_data.loc[idx].set_index('VideoName').loc[vid_name].Start
        df_fix, flag = preprocess_fixations_new_dataset(df_et, frame_timest, trial_init)
        frame_id     = df_fix.frames_seen.apply(lambda x: x[0])
        #print(frame_id.columns)
        #print(frame_id.index)
        df_aux       = df_aux.merge(frame_id,how='right',left_on='FIX_idx',right_index=True)    
        dfs_list.append(df_aux)
        
    df_exploded_frames = pd.concat(dfs_list)
    df_exploded_frames.rename({'frames_seen':'FRAME_idx'}, axis=1, inplace=True)
    return df_exploded_frames

def calculate_metric_subject(df_et, 
                             fold_subj, 
                             trials_data,
                             videos_data,
                             vid_name,
                             saliency,
                             sal_mean,
                             sal_std,
                             metric,
                             trial_init=None,
                             frame_dur=None,
                             frame_timest=None):
    
    if frame_dur is None:
        frame_dur = (videos_data.loc[vid_name].FramesTimestamps[1])
    if trial_init is None:
        trial_init   = trials_data.loc[fold_subj].set_index('VideoName').loc[vid_name].Start
    if frame_timest is None:
        frame_timest = videos_data.loc[vid_name,'FramesTimestamps']
    
    df_fix, flag = preprocess_fixations_new_dataset(df_et, frame_timest, trial_init, sdt_correction = False)
    # drop fixations after video ended
    df_fix = df_fix[df_fix['start_time']//frame_dur < saliency.shape[-1]]
    #print(fold_subj)
    if flag==-1:
        return None
    elif metric =='CC':
        score = calculate_CC_apply(df_fix, saliency)
    elif metric=='NSS':
        score = calculate_NSS(df_fix, saliency, sal_mean, sal_std, frame_dur)
    else:
        print('Metric not implemented')
        return None
    
    mean_score = np.mean(np.array(score))

    return df_fix, score, mean_score, flag

def calculate_metric_dataset_chunks(data_path,
                                    trials_data,
                                    videos_data,
                                    n_chunks:int,
                                    chunk_i:int,
                                    vid_name,
                                    saliency = None,
                                    metric = 'NSS',
                                    saliency_path = ''):
    
    frame_dur = (videos_data.loc[vid_name].FramesTimestamps[1])
    vid = {'Diary': 'WK', 'Fractals': 'FF', 'Present': 'TP'}
    
    if saliency is None:
        saliency = create_saliency_matrix(videos_data.loc[vid_name].Video[:-4], saliency_path=saliency_path)
        
    missing_folders  = []
    video_not_seen   = []
    missing_metadata = []
    results          = []
    error_list       = []

    # precalculate saliency metrics
    sal_mean = saliency.mean(axis=(0,1))
    sal_std = np.zeros(saliency.shape[-1])
    for i in range(saliency.shape[-1]):
        sal_std[i] = saliency[:,:,i].std()
    
    # calculate chunk
    aux = sorted(os.listdir(data_path))
    
    if n_chunks == 0:
        iter_list = aux
    elif chunk_i == n_chunks-1:
        corte = int(len(aux)//n_chunks)
        iter_list = aux[chunk_i * corte:]
    else:
        corte = int(len(aux)//n_chunks)
        iter_list = aux[chunk_i * corte:(chunk_i+1)*corte]
    
    for fold_subj in tqdm(iter_list):
        # check
        if fold_subj not in list(metadata['ID'].unique()):
            missing_metadata.append(fold_subj)
            continue
        
        csv_files    = os.listdir(data_path+fold_subj)
        csv_vid_file = [f for f in csv_files if vid[vid_name] in f]
        if len(csv_vid_file) ==0:
            video_not_seen.append(fold_subj)
            continue

        et_file      = os.path.join(data_path, fold_subj, csv_vid_file[0])
        df_et        = pd.read_csv(et_file)
        frame_timest = videos_data.loc[vid_name,'FramesTimestamps']
        
        try:
            trial_init   = trials_data.loc[fold_subj].set_index('VideoName').loc[vid_name].Start
        except:
            missing_metadata.append([fold_subj, vid_name])
            continue
          
        try:  
            df_fix, score, mean_score, flag = calculate_metric_subject(df_et,
                                                                       fold_subj,
                                                                       trials_data, 
                                                                       videos_data, 
                                                                       vid_name, 
                                                                       saliency, 
                                                                       sal_mean, 
                                                                       sal_std,
                                                                       metric, 
                                                                       trial_init,
                                                                       frame_dur, 
                                                                       frame_timest)
        
            results.append((fold_subj,  df_fix.index, score, mean_score, len(df_fix), flag, vid_name, et_file))
        except:
            error_list.append(fold_subj)
            continue
        
    return results, {'missing_folders': missing_folders, 
                     'missing_metadata': missing_metadata, 
                     'video_not_seen': video_not_seen,
                     'errors': error_list}

In [20]:
VIDEO_NAME = 'Present'
SALIENCY = 'vinet'

data_path    = './../data/ETFinalCutSampleEC07/ETFinalCutSample/'
results_path = './../results/'
saliency_path = os.path.join('./../videos_sal/', SALIENCY)
percentils_path = './../cache/'

trials_data = load_trials_data()
videos_data = load_video_data()
metadata = load_metadata()
vid_codes   = {'Diary': 'WK', 'Fractals': 'FF', 'Present': 'TP'}

In [21]:
results_nss = calculate_metric_dataset_chunks(data_path, 
                                                    trials_data, 
                                                    videos_data,
                                                    vid_name = VIDEO_NAME,
                                                    n_chunks=0,
                                                    chunk_i=0,
                                                    saliency_path=saliency_path)

  0%|          | 0/1758 [00:00<?, ?it/s]

In [22]:
pd.DataFrame(results_nss[0]).head()

Unnamed: 0,0,1,2,3,4,5,6,7
0,NDARAB055BPR,"Int64Index([ 2, 4, 6, 8, 10, 12...","[0.09939172405665668, 1.1182395603927342, -0.3...",4.190695,887,0,Present,./../data/ETFinalCutSampleEC07/ETFinalCutSampl...
1,NDARAB348EWR,"Int64Index([ 2, 4, 7, 9, 11, 13...","[0.2786963369642455, 0.032996470335883744, 0.1...",0.03702,686,0,Present,./../data/ETFinalCutSampleEC07/ETFinalCutSampl...
2,NDARAB793GL3,"Int64Index([ 1, 3, 5, 7, 9, 11...","[1.8064684692046558, -0.43267163910805395, -0....",3.58264,950,0,Present,./../data/ETFinalCutSampleEC07/ETFinalCutSampl...
3,NDARAC349YUC,"Int64Index([ 2, 4, 6, 8, 11, 13...","[1.8056367545816474, 2.2400478944046407, -0.38...",4.066498,627,0,Present,./../data/ETFinalCutSampleEC07/ETFinalCutSampl...
4,NDARAC857HDB,"Int64Index([ 1, 3, 7, 9, 11, 13...","[4.244337386946553, -0.39142702441996985, -0.3...",4.080274,636,0,Present,./../data/ETFinalCutSampleEC07/ETFinalCutSampl...


In [23]:
len(results_nss[1]['errors'])

0

In [24]:
df_nss_aux = pd.DataFrame(results_nss[0])
df_nss_aux.columns = ['ID', 'FIX_idx', 'NSS','NSS_MEAN', 'FIX_IN_VID', 'FLAG', 'VIDEO_NAME', 'ET_FILE']
df_nss_exploded = explode(df_nss_aux, ['FIX_idx','NSS'])
df_final = add_frame_idx(df_nss_exploded.reset_index(), 
                            vid_name = VIDEO_NAME, 
                            data_path='./',
                            trials_data=trials_data, 
                            videos_data=videos_data)

In [25]:
df_final.drop_duplicates('ID').NSS_MEAN.mean()#.hist()

3.3104630722465465

In [26]:
df_final.shape

(682521, 10)

In [27]:
results_path = './../results/final/'
df_final.drop(columns=['index', 'FLAG']).to_csv(os.path.join(results_path, vid_codes[VIDEO_NAME], f'results_nss_{SALIENCY}.csv'),
                                                index=False)

with open(os.path.join(results_path, vid_codes[VIDEO_NAME], f'dump_nss_{SALIENCY}.json'), 'w') as jf:
    json.dump(results_nss[1], jf)