In [277]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import timedelta
from sklearn.metrics import matthews_corrcoef

In [278]:
obs_df = pd.concat([
        pd.read_csv("video/tmoc1.csv",names=["name","minute","second"]),
        pd.read_csv("video/tmoc2.csv",names=["name","minute","second"])
    ]).reset_index(drop=True)

In [279]:
estimated_df = pd.read_csv("face_seen.csv",names=["name","minute","second"]).drop_duplicates().reset_index(drop=True)

In [280]:
obs_df["time"] = obs_df[["minute","second"]].apply(lambda x: pd.Timedelta(minutes=x[0],seconds=x[1]) if x[1]<60 else pd.Timedelta(minutes=x[0]+1),axis=1)

In [281]:
obs_df.head()

Unnamed: 0,name,minute,second,time
0,bhide,0,5,0 days 00:00:05
1,hati,0,5,0 days 00:00:05
2,abdul,0,5,0 days 00:00:05
3,unkown,0,5,0 days 00:00:05
4,bhide,0,5,0 days 00:00:05


In [282]:
estimated_df["time"] =  estimated_df[["minute","second"]].apply(lambda x: pd.Timedelta(minutes=x[0],seconds=x[1]) if x[1]<60 else pd.Timedelta(minutes=x[0]+1),axis=1)

In [283]:
estimated_df.head()

Unnamed: 0,name,minute,second,time
0,haati,0,5,0 days 00:00:05
1,bhide,0,5,0 days 00:00:05
2,abdul,0,5,0 days 00:00:05
3,haati,0,6,0 days 00:00:06
4,bhide,0,6,0 days 00:00:06


In [284]:
obs_df.set_index(["time"],inplace=True)
estimated_df.set_index(["time"],inplace=True)
obs_df.drop(["minute","second"],axis=1,inplace=True)
estimated_df.drop(["minute","second"],axis=1,inplace=True)

In [285]:
obs_df.tail()

Unnamed: 0_level_0,name
time,Unnamed: 1_level_1
0 days 00:19:57,sodi wife
0 days 00:19:58,hathi wife
0 days 00:19:58,sodi wife
0 days 00:19:59,hathi wife
0 days 00:19:59,sodi wife


In [286]:
estimated_df.tail()

Unnamed: 0_level_0,name
time,Unnamed: 1_level_1
0 days 00:20:40,babita
0 days 00:20:41,tarak
0 days 00:20:41,ayar
0 days 00:20:41,babita
0 days 00:20:42,babita


In [287]:
estimated_df = estimated_df.loc[estimated_df.index < pd.Timedelta(minutes=20)]

In [288]:
estimated_df.tail()

Unnamed: 0_level_0,name
time,Unnamed: 1_level_1
0 days 00:19:58,sodi wife
0 days 00:19:59,sodi wife
0 days 00:19:59,bhide wife
0 days 00:19:59,babita
0 days 00:19:59,abdul


In [326]:
obs_df = obs_df.loc[(obs_df["name"]!="unkown")&(obs_df["name"]!="unkwon")&(obs_df["name"]!="unknown")]
estimated_df = estimated_df.loc[(estimated_df["name"]!="unkown")&(estimated_df["name"]!="unkwon")&(estimated_df["name"]!="unknown")]

In [327]:
obs_df.value_counts()

name       
jethalal       416
champaklal     311
tarak          227
babita         188
ayar           121
abdul          116
tarak wife      95
sodi            77
bhide wife      63
bhide           58
tappu           29
hati            27
popatlal        20
sodi wife       12
hathi wife      12
tarak            3
golu             3
gogi             3
 tarak wife      3
sonu             3
chintu           3
tark             2
 bhide wife      1
 jethalal        1
tarak wife       1
tarakwife        1
 bhide           1
dtype: int64

In [328]:
estimated_df.value_counts()

name           
babita             649
ayar               593
jethalal           302
abdul              120
champak lal        119
tarak              105
bhide wife          51
haati               51
tarak wife          41
bhide               30
sodi                30
tappu               30
popatlal            29
babita friend       25
sodi wife           17
rickshaw driver     11
haati wife           9
dtype: int64

In [329]:
def preprocess_name(x):
    x = x.strip()
    if x == "tark":
        return "tarak"
    
    if x == "tarakwife":
        return "tarak wife"
    return x

In [331]:
obs_df["name"] = obs_df["name"].apply(lambda x: preprocess_name(x))

In [333]:
obs_df.value_counts()

name      
jethalal      417
champaklal    311
tarak         232
babita        188
ayar          121
abdul         116
tarak wife    100
sodi           77
bhide wife     64
bhide          59
tappu          29
hati           27
popatlal       20
sodi wife      12
hathi wife     12
gogi            3
golu            3
chintu          3
sonu            3
dtype: int64

In [394]:
obs_df = obs_df[~obs_df[["name"]].isin(["golu","gogi","chintu","sonu"])]
est_df = estimated_df[~estimated_df[["name"]].isin(["rickshaw driver","babita friend"])]

In [396]:
obs_df.value_counts()

name      
jethalal      417
champaklal    311
tarak         232
babita        188
ayar          121
abdul         116
tarak wife    100
sodi           77
bhide wife     64
bhide          59
tappu          29
hati           27
popatlal       20
hathi wife     12
sodi wife      12
dtype: int64

In [397]:
est_df.value_counts()

name       
babita         649
ayar           593
jethalal       302
abdul          120
champak lal    119
tarak          105
bhide wife      51
haati           51
tarak wife      41
bhide           30
sodi            30
tappu           30
popatlal        29
sodi wife       17
haati wife       9
dtype: int64

In [400]:
obs_agg = obs_df.groupby(pd.Grouper(freq="2s")).apply(lambda x:x["name"].unique())
est_agg = est_df.groupby(pd.Grouper(freq="2s")).apply(lambda x:x["name"].unique())

In [401]:
obs_agg

time
0 days 00:00:05                           [bhide, hati, abdul]
0 days 00:00:07    [bhide, hati, abdul, jethalal, tarak, sodi]
0 days 00:00:09    [jethalal, tarak, sodi, bhide, hati, abdul]
0 days 00:00:11    [bhide, hati, abdul, jethalal, tarak, sodi]
0 days 00:00:13    [jethalal, tarak, sodi, bhide, hati, abdul]
                                      ...                     
0 days 00:19:51                            [abdul, champaklal]
0 days 00:19:53                        [hathi wife, sodi wife]
0 days 00:19:55                        [hathi wife, sodi wife]
0 days 00:19:57                        [hathi wife, sodi wife]
0 days 00:19:59                        [hathi wife, sodi wife]
Freq: 2S, Length: 598, dtype: object

In [402]:
est_agg

time
0 days 00:00:05                  [haati, bhide, abdul, ayar, babita]
0 days 00:00:07            [haati, abdul, ayar, babita, bhide, sodi]
0 days 00:00:09                  [abdul, ayar, haati, babita, bhide]
0 days 00:00:11         [babita, haati, abdul, ayar, jethalal, sodi]
0 days 00:00:13                   [babita, haati, ayar, abdul, sodi]
                                         ...                        
0 days 00:19:51                     [abdul, ayar, babita, sodi wife]
0 days 00:19:53    [sodi wife, haati wife, abdul, babita, bhide w...
0 days 00:19:55          [bhide wife, babita, sodi wife, haati wife]
0 days 00:19:57                      [sodi wife, bhide wife, babita]
0 days 00:19:59               [sodi wife, bhide wife, babita, abdul]
Freq: 2S, Length: 598, dtype: object

In [409]:
obs_agg.values.shape

(598,)

In [410]:
est_agg.values.shape

(598,)