In [446]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import timedelta

In [447]:
obs_df = pd.concat([
        pd.read_csv("video/tmoc1.csv",names=["name","minute","second"]),
        pd.read_csv("video/tmoc2.csv",names=["name","minute","second"])
    ]).reset_index(drop=True)

In [448]:
estimated_df = pd.read_csv("face_seen_fn512_l2.csv",names=["name","minute","second"]).drop_duplicates().reset_index(drop=True)

In [449]:
obs_df["time"] = obs_df[["minute","second"]].apply(lambda x: pd.Timedelta(minutes=x[0],seconds=x[1]) if x[1]<60 else pd.Timedelta(minutes=x[0]+1),axis=1)

In [450]:
obs_df.head()

Unnamed: 0,name,minute,second,time
0,bhide,0,5,0 days 00:00:05
1,hati,0,5,0 days 00:00:05
2,abdul,0,5,0 days 00:00:05
3,unkown,0,5,0 days 00:00:05
4,bhide,0,5,0 days 00:00:05


In [451]:
estimated_df["time"] =  estimated_df[["minute","second"]].apply(lambda x: pd.Timedelta(minutes=x[0],seconds=x[1]) if x[1]<60 else pd.Timedelta(minutes=x[0]+1),axis=1)

In [452]:
estimated_df.head()

Unnamed: 0,name,minute,second,time
0,haati,0,5,0 days 00:00:05
1,bhide,0,5,0 days 00:00:05
2,abdul,0,5,0 days 00:00:05
3,haati,0,6,0 days 00:00:06
4,bhide,0,6,0 days 00:00:06


In [453]:
obs_df.set_index(["time"],inplace=True)
estimated_df.set_index(["time"],inplace=True)
obs_df.drop(["minute","second"],axis=1,inplace=True)
estimated_df.drop(["minute","second"],errors="ignore",axis=1,inplace=True)

In [454]:
obs_df.tail()

Unnamed: 0_level_0,name
time,Unnamed: 1_level_1
0 days 00:19:57,sodi wife
0 days 00:19:58,hathi wife
0 days 00:19:58,sodi wife
0 days 00:19:59,hathi wife
0 days 00:19:59,sodi wife


In [455]:
estimated_df.tail()

Unnamed: 0_level_0,name
time,Unnamed: 1_level_1
0 days 00:20:40,tarak
0 days 00:20:41,tarak
0 days 00:20:41,unknown
0 days 00:20:41,bhide
0 days 00:20:42,tarak


In [456]:
estimated_df = estimated_df.loc[estimated_df.index < pd.Timedelta(minutes=20)]

In [457]:
estimated_df.tail()

Unnamed: 0_level_0,name
time,Unnamed: 1_level_1
0 days 00:19:58,sodi wife
0 days 00:19:58,haati wife
0 days 00:19:59,sodi wife
0 days 00:19:59,haati wife
0 days 00:19:59,unknown


In [458]:
obs_df = obs_df.loc[(obs_df["name"]!="unkown")&(obs_df["name"]!="unkwon")&(obs_df["name"]!="unknown")]
estimated_df = estimated_df.loc[(estimated_df["name"]!="unkown")&(estimated_df["name"]!="unkwon")&(estimated_df["name"]!="unknown")]

In [459]:
obs_df.value_counts()

name       
jethalal       416
champaklal     311
tarak          227
babita         188
ayar           121
abdul          116
tarak wife      95
sodi            77
bhide wife      63
bhide           58
tappu           29
hati            27
popatlal        20
sodi wife       12
hathi wife      12
tarak            3
golu             3
gogi             3
 tarak wife      3
sonu             3
chintu           3
tark             2
 bhide wife      1
 jethalal        1
tarak wife       1
tarakwife        1
 bhide           1
dtype: int64

In [460]:
estimated_df.value_counts()

name           
jethalal           390
champak lal        254
tarak              239
tappu              164
bhide              161
babita             146
ayar               123
tarak wife         111
bhide wife         101
abdul               71
popatlal            60
haati               53
sodi                49
rickshaw driver     30
babita friend       29
haati wife          20
sodi wife           18
dtype: int64

In [461]:
def preprocess_name(x):
    x = x.strip()
    if x == "tark":
        return "tarak"
    
    if x == "tarakwife":
        return "tarak wife"
    if x == "hathi":
        return "hati"
    if x == "hathi wife":
        return "haati wife"
    return x

In [462]:
obs_df["name"] = obs_df["name"].apply(lambda x: preprocess_name(x))

In [463]:
obs_df.value_counts()

name      
jethalal      417
champaklal    311
tarak         232
babita        188
ayar          121
abdul         116
tarak wife    100
sodi           77
bhide wife     64
bhide          59
tappu          29
hati           27
popatlal       20
sodi wife      12
haati wife     12
gogi            3
golu            3
chintu          3
sonu            3
dtype: int64

In [464]:
obs_df = obs_df[~obs_df[["name"]].isin(["golu","gogi","chintu","sonu"])]
est_df = estimated_df[~estimated_df[["name"]].isin(["rickshaw driver","babita friend"])]

In [465]:
obs_df.value_counts()

name      
jethalal      417
champaklal    311
tarak         232
babita        188
ayar          121
abdul         116
tarak wife    100
sodi           77
bhide wife     64
bhide          59
tappu          29
hati           27
popatlal       20
haati wife     12
sodi wife      12
dtype: int64

In [466]:
est_df.value_counts()

name       
jethalal       390
champak lal    254
tarak          239
tappu          164
bhide          161
babita         146
ayar           123
tarak wife     111
bhide wife     101
abdul           71
popatlal        60
haati           53
sodi            49
haati wife      20
sodi wife       18
dtype: int64

In [630]:
obs_agg = obs_df.groupby(pd.Grouper(freq="1s")).apply(lambda x:x["name"].unique())
est_agg = est_df.groupby(pd.Grouper(freq="1s")).apply(lambda x:x["name"].unique())

In [631]:
obs_agg

time
0 days 00:00:05                           [bhide, hati, abdul]
0 days 00:00:06                           [bhide, hati, abdul]
0 days 00:00:07    [bhide, hati, abdul, jethalal, tarak, sodi]
0 days 00:00:08                        [jethalal, tarak, sodi]
0 days 00:00:09    [jethalal, tarak, sodi, bhide, hati, abdul]
                                      ...                     
0 days 00:19:55                        [haati wife, sodi wife]
0 days 00:19:56                        [haati wife, sodi wife]
0 days 00:19:57                        [haati wife, sodi wife]
0 days 00:19:58                        [haati wife, sodi wife]
0 days 00:19:59                        [haati wife, sodi wife]
Freq: S, Length: 1195, dtype: object

In [632]:
est_agg

time
0 days 00:00:05                           [haati, bhide, abdul]
0 days 00:00:06                           [haati, bhide, abdul]
0 days 00:00:07    [haati, abdul, bhide, jethalal, tarak, sodi]
0 days 00:00:08                  [tarak, jethalal, sodi, tappu]
0 days 00:00:09          [jethalal, tappu, haati, abdul, bhide]
                                       ...                     
0 days 00:19:55                         [haati wife, sodi wife]
0 days 00:19:56                         [sodi wife, haati wife]
0 days 00:19:57                         [sodi wife, haati wife]
0 days 00:19:58                         [sodi wife, haati wife]
0 days 00:19:59                         [sodi wife, haati wife]
Freq: S, Length: 1195, dtype: object

In [633]:
obs_agg.name = "observed"
est_agg.name = "estimated"

In [634]:
merged = pd.merge(obs_agg,est_agg,how="inner",left_index=True,right_index=True)
merged.head()

Unnamed: 0_level_0,observed,estimated
time,Unnamed: 1_level_1,Unnamed: 2_level_1
0 days 00:00:05,"[bhide, hati, abdul]","[haati, bhide, abdul]"
0 days 00:00:06,"[bhide, hati, abdul]","[haati, bhide, abdul]"
0 days 00:00:07,"[bhide, hati, abdul, jethalal, tarak, sodi]","[haati, abdul, bhide, jethalal, tarak, sodi]"
0 days 00:00:08,"[jethalal, tarak, sodi]","[tarak, jethalal, sodi, tappu]"
0 days 00:00:09,"[jethalal, tarak, sodi, bhide, hati, abdul]","[jethalal, tappu, haati, abdul, bhide]"


In [635]:
merged.apply(lambda x:set(x["observed"]).intersection(x["estimated"]),axis=1)

time
0 days 00:00:05                           {abdul, bhide}
0 days 00:00:06                           {abdul, bhide}
0 days 00:00:07    {jethalal, abdul, tarak, bhide, sodi}
0 days 00:00:08                  {tarak, jethalal, sodi}
0 days 00:00:09                 {bhide, abdul, jethalal}
                                   ...                  
0 days 00:19:55                  {haati wife, sodi wife}
0 days 00:19:56                  {haati wife, sodi wife}
0 days 00:19:57                  {haati wife, sodi wife}
0 days 00:19:58                  {haati wife, sodi wife}
0 days 00:19:59                  {haati wife, sodi wife}
Freq: S, Length: 1195, dtype: object

In [636]:
def calculate_precision(df,pred,obs):
    inter_len = df.apply(lambda x: len(set(x[obs]).intersection(x[pred])),axis=1)
    pred_len = df[pred].apply(lambda x: len(x))
    total = inter_len.div(pred_len)
    result = total.sum()/total.shape[0]
    return result
    

In [637]:
precision =calculate_precision(merged,"estimated","observed")
precision

0.4739386996081556

In [638]:
def calculate_recall(df,pred,obs):
    inter_len = df.apply(lambda x: len(set(x[obs]).intersection(x[pred])),axis=1)
    obs_len = df[obs].apply(lambda x: len(x))
    total = inter_len.div(obs_len)
    result = total.sum()/total.shape[0]
    return result

In [639]:
recall =calculate_recall(merged,"estimated","observed")
recall

0.5529218967921897

In [640]:
def calculate_f1(precision,recall):
    return 2*precision*recall/(precision+recall)

In [641]:
calculate_f1(precision,recall)

0.5103927167313344

In [663]:
def rolling_window(df):
    res = []
    for x in df.sort_index().rolling(window="3s"):
        ls = set()
        for y in x.values:
            ls.update(y)
        res.append(ls)
        
    return pd.Series(res)
    

In [664]:
obs_roll = rolling_window(obs_agg)
obs_roll.name = "observed"
obs_roll

0                              {abdul, hati, bhide}
1                              {abdul, hati, bhide}
2       {jethalal, abdul, hati, tarak, bhide, sodi}
3       {jethalal, abdul, hati, tarak, bhide, sodi}
4       {jethalal, abdul, hati, tarak, bhide, sodi}
                           ...                     
1190                        {haati wife, sodi wife}
1191                        {haati wife, sodi wife}
1192                        {haati wife, sodi wife}
1193                        {haati wife, sodi wife}
1194                        {haati wife, sodi wife}
Name: observed, Length: 1195, dtype: object

In [665]:
est_roll = rolling_window(est_agg)
est_roll.name = "estimated"
est_roll

0                                   {haati, abdul, bhide}
1                                   {haati, abdul, bhide}
2            {jethalal, abdul, haati, tarak, bhide, sodi}
3       {jethalal, tappu, abdul, haati, tarak, bhide, ...
4       {jethalal, tappu, abdul, haati, tarak, bhide, ...
                              ...                        
1190                              {haati wife, sodi wife}
1191                              {haati wife, sodi wife}
1192                              {haati wife, sodi wife}
1193                              {haati wife, sodi wife}
1194                              {haati wife, sodi wife}
Name: estimated, Length: 1195, dtype: object

In [666]:
roll_merged = pd.concat([est_roll,obs_roll],axis=1)
roll_merged.head()

Unnamed: 0,estimated,observed
0,"{haati, abdul, bhide}","{abdul, hati, bhide}"
1,"{haati, abdul, bhide}","{abdul, hati, bhide}"
2,"{jethalal, abdul, haati, tarak, bhide, sodi}","{jethalal, abdul, hati, tarak, bhide, sodi}"
3,"{jethalal, tappu, abdul, haati, tarak, bhide, ...","{jethalal, abdul, hati, tarak, bhide, sodi}"
4,"{jethalal, tappu, abdul, haati, tarak, bhide, ...","{jethalal, abdul, hati, tarak, bhide, sodi}"


In [667]:
roll_p = calculate_precision(roll_merged,"estimated","observed")
roll_p

0.5074791294247362

In [668]:
roll_r = calculate_recall(roll_merged,"estimated","observed")
roll_r

0.6202500498107192

In [669]:
roll_f1 =  calculate_f1(roll_p,roll_r)
roll_f1

0.5582261434735376