In [2]:
import numpy as np
import matplotlib.pyplot as plt
from subprocess import call
from datetime import datetime
import os
import json
import glob

In [5]:
all_cols = ['luma_avg', 'luma_change_ratio', 'kld_hist_2frames', 'vertical_black_values',
            'horizontal_black_values', 'crU_change_ratio', 'crV_change_ratio', 'crU_avg',
            'crV_avg', 'AvgB', 'AvgG', 'AvgR', 'KLD_histB', 'KLD_histG', 'KLD_histR', 'Time']
cols = ['luma_avg', 'kld_hist_2frames', 'horizontal_black_values', 'crU_change_ratio',
            'avgG', 'kld_hist_pattern', 'time']
aztv = '/home/abcd/Documents/AdSparx/data/aztv/aztv_new_src/'
kdoc = '/home/abcd/Documents/AdSparx/data/kdoc/'
save_metrics = 'metrics/'
configs = 'configs/'
annotations = {}

In [10]:
def compute_metrics(path):
    all_vids = glob.glob(path+'*.mp4')
    print('Available assets in folder:', all_vids)
    
    for vidpath in all_vids:
        vidname = os.path.split(vidpath)[-1].split(".")[0]
        cmd1 = "ffmpeg -hide_banner -threads 4 -i " + '%s' % vidpath + " -vf videometrics=config_file=utils/videometrics.cfg -f null -"
        cmd2 = 'paste -d"," luma_avg.dat kld_hist_2frames.dat horizontal_black_values.dat crU_change_ratio.dat avgG.dat kld_hist_pattern.dat time.dat > ./metrics/%s.csv' % vidname
        cmd3 = 'rm -rf *.dat'

        t1 = datetime.now()
        call(cmd1, shell=True)
        t2 = datetime.now()
        call(cmd2, shell=True)
        clean(vidname)
        call(cmd3, shell=True)
        
        add_labels(vidname)
        
        print('Preprocessing done for %s' %vidname)
        print('Time taken for metrics computation = %s' %str(t2-t1))           

In [26]:
def clean(csvname):
    metrics_df = pd.read_csv(save_metrics+csvname+'.csv', names=cols)
    metrics_df = metrics_df.dropna()
    metrics_df.to_csv(save_metrics+csvname+'.csv', header=True, index=False)
    print('Cleaning done for %s' % csvname)
    return metrics_df

In [24]:
def add_labels(csvname):
    
    df = pd.read_csv(save_metrics+csvname+'.csv')
    
    cfgname = configs+csvname+'.cfg'
    with open(cfgname) as f:
        data = json.load(f)
    ads = data["ads"]
    
    ad_starts, ad_stops = ads[0::2], ads[1::2]
    allframes = list(range(len(df.index)))
    labels = []
    
    for _, val in enumerate(allframes):
        frameLabel = 0
        for i in range(len(ad_starts)):
            if ad_starts[i] <= val <= ad_stops[i]:
                frameLabel = 1
        labels.append(frameLabel)
    df['Label'] = labels
    df.to_csv(save_metrics+csvname+'.csv', index=False, header=True)
    print('Label added for %s' %csvname)

In [3]:
# Code to clean the jsons and collect all the annotation jsons

import json
import pandas as pd

df = pd.read_csv('Annotation sheet - Sheet1.csv', usecols=['Asset Name', 'Annotation JSON'])
df = df.dropna() # Empty columns dropped

names = df['Asset Name']
configs = df['Annotation JSON']
names, configs = names[1:], configs[1:] # First value removed

names = [i.split('/')[0] for i in names]

# Logic to check for incorrect jsons
new_configs = []
for index, value in enumerate(configs):
    try:
        new_configs.append(json.loads(value))
    except Exception as e:
        print(f'Error is {e}')
        print(f'Problematic:\n {index+2}: {names[index]} -> {value}\n')

annotations = dict(zip(names, configs))
# print(annotations)

### Problems in annotations:

Problematic:
 19: 34_1252_1576400400_1576404000 -> {"trans":[31520,56042,56045,108401,108404,111092,111094,],
"ads":[3152136494,56046,58996,82219,87014,108405,111090,]}

Problematic:
 24: 34_134388_1576654200_1576656000 -> {"trans":[25751,25752,50999,51000,],
"ads":[25753,31299,51001,55799,]}

Problematic:
 32: 34_149619_1576425600_1576429200 -> {"trans":[2104,2105,27351,27354,52045,52047,52946,52948,79247,79248,81773,,81775,105274,105275,110062,110065],
"ads":[0,2405,27355,30808,52048,56543,79249,84649,105276,110375]}

Problematic:
 34: 34_159691_1576404000_1576411200 -> {"trans":[3093,3098,72096,72101,74345,74350,96229,96231,120461,121362,125245,125250,145941,145943,148633,148638,163703,163709,165953,165958,186387,186393,188637,188643],
"ads":[847,3092,25403,27916,72102,74343,96232,98930,121363,125244,145944,148632,163710,165952,186394,188636]

Problematic:
 39: 34_163831_1576440000_1576441800 -> {"trans":[685,686,29970,29977,37171,37178],
"ads":[0,684,29978,37170,]}

Problematic:
 40: 34_163831_1576441800_1576443600 -> {"trans":[ 10698,10700,29545,42340,42343],
"ads":[10700,14142,29545,33437, 42344, 44885,]}

Problematic:
 42: 34_174689_1576429200_1576432800 -> {"trans":[2075,2077,43606,43610,63715,63718,68052,80717,80719,104997,104999,],
"ads":[0,2074,43611,46750,63719,68207,80720,85662,105000, 110659]}

Problematic:
 43: 34_174689_1576515600_1576519200 -> {"trans":[66775,66777,88259,88261,99557,99560,107938,107940,111840. 111858],
"ads":[ 66778, 70375,88262,93060,99561,103121,107941,111839]}

Problematic:
 48: 34_174998_1576544400_1576546200 -> {"trans":[3672,3689,8478, 8485,26876, 26878,56210,56212,],
"ads":[0,3671,8026,8477,26879,32245, 56213,59635]}

Problematic:
 50: 34_1842_1576584000_1576586700 -> {"trans":[32402,32409,64761,64763,],
"ads":[32410,36003,61614,64760]}

Problematic:
 51: 34_1842_1576609200_1576612800 -> {"trans":[2907,2909,6056,26729, 26732,43110,43112,67567,67570, 84760,84763],
"ads"[0,2906,6057,7706, 26733,32126,43113,50162,67570,72970,84764,90613]}

Problematic:
 63: 34_206185_1576546200_1576548000 -> {"trans":[2207,2209,26721,26723,52462,52465,54796,54798,54831,54964,57864,57882],
"ads"[2210,8806,26724,33461,52466,57865]}

Problematic:
 68: 34_225819_1576375200_1576377000 -> nan

Problematic:
 72: 34_229339_1576373400_1576375200 -> nan

Problematic:
 83: 34_256694_1576450800_1576454400 -> {"trans":[3010,3028,25370,25371,51099,51102,106519,106521,113419,113615],
"ads":[25372,32088,51103,58148,106522,]}

###Iteration 2:

Error is Expecting ':' delimiter: line 2 column 6 (char 82)
Problematic:
 51: 34_1842_1576609200_1576612800 -> {"trans":[2907,2909,6056,26729, 26732,43110,43112,67567,67570, 84760,84763],
"ads"[0,2906,6057,7706, 26733,32126,43113,50162,67570,72970,84764,90613]}

Error is Expecting ':' delimiter: line 2 column 6 (char 87)
Problematic:
 63: 34_206185_1576546200_1576548000 -> {"trans":[2207,2209,26721,26723,52462,52465,54796,54798,54831,54964,57864,57882],
"ads"[2210,8806,26724,33461,52466,57865]}

Error is the JSON object must be str, bytes or bytearray, not 'float'
Problematic:
 68: 34_225819_1576375200_1576377000 -> nan

Error is the JSON object must be str, bytes or bytearray, not 'float'
Problematic:
 72: 34_229339_1576373400_1576375200 -> nan

In [25]:
if __name__ == '__main__':
#     compute_metrics(aztv)
#  compute_metrics(kdoc)
    add_labels('62156')

Label added for 62156
