In [1]:
%matplotlib notebook
import pandas as pd
import numpy as np
import glob
from datetime import datetime
from datetime import timedelta

import matplotlib.pyplot as plt
import matplotlib.ticker as ticker


In [2]:
# Convert subtitle file into CSV
subtitle_files = glob.glob('occupancy_estimation_by_human/*.ass')
subtitle_files.sort()
for file in subtitle_files:
#     print(type(file))
    with open(file, 'r') as subtitle_file:
        for _ in range(26):
            next(subtitle_file)
        with open(file.replace('_ffmpeg_with_interest_area.ass','.csv'), 'w') as csv_file:
            for line in subtitle_file:
                csv_file.write(line.replace('Dialogue: ','').replace('Format: ','').replace(' ', ''))

In [3]:
### 1. Read data from human estimation ###
human_estimation_files = glob.glob('occupancy_estimation_by_human/*.csv')
human_estimation_files.sort()
for human_estimation_file in human_estimation_files:
    print("read file: {}".format(human_estimation_file))
    human_estimation = pd.read_csv(human_estimation_file) # only one file is available here ----> Sept 17
    human_estimation = human_estimation.drop(columns = ['Layer','Style','Name','MarginL','MarginR','MarginV','Effect'])
    human_estimation = human_estimation.rename(index=str, columns={'Text':'Occupancy'})
    
    ### 2. Convert datetime to seconds ###
    exp_start_time = datetime.strptime('2018-09-17 13:00:00.00', '%Y-%m-%d %H:%M:%S.%f')
    start_time_in_s = []
    end_time_in_s = []
    for index,row in human_estimation.iterrows():
    #     print(type(row['Start']))
        full_start_time = '2018-09-17 '+ row['Start']
        full_end_time = '2018-09-17 '+ row['End']
        current_start_time = datetime.strptime(full_start_time, '%Y-%m-%d %H:%M:%S.%f') + timedelta(hours=13)
        current_end_time = datetime.strptime(full_end_time, '%Y-%m-%d %H:%M:%S.%f') + timedelta(hours=13)

        start_time_in_s.append((current_start_time-exp_start_time).total_seconds())
        end_time_in_s.append((current_end_time-exp_start_time).total_seconds())
    human_estimation.insert(loc=1,column='Start_s', value=start_time_in_s)
    human_estimation.insert(loc=3,column='End_s', value=end_time_in_s)
#     human_estimation
    
    ### 3. Read data from OpenPose estimation ###
    openpose_files = glob.glob('occupancy_estimation/occupancy_estimation_camera1_combined/Sep_17/*.csv')
    file_name_split = human_estimation_file.split('\\')[1].split('_')
    time_and_date = file_name_split[1] + '_' + file_name_split[2] + '_' + file_name_split[3] + '_' + file_name_split[4]
    for f in openpose_files:
        if ('whole' in f) and (time_and_date in f):
            openpose_estimation = pd.read_csv(f)
            print('openpose file select:{}'.format(f))
    #     if 'core' in f:
    #         occupancy_core = pd.read_csv(f)
    #     if 'margin' in f:
    #         occupancy_margin = pd.read_csv(f)

    ### 4. Fill out human observation data according to OpenPose data's frequency ###
    human_estimation_filled = pd.DataFrame(columns = ["Time", "Occupancy"])
    for _,he_row in human_estimation.iterrows():
        openpose_records_in_range = openpose_estimation.loc[openpose_estimation["Time"]>=he_row["Start_s"]*1000].loc[openpose_estimation["Time"]<he_row["End_s"]*1000]
        for _,op_record in openpose_records_in_range.iterrows():
            human_estimation_filled.loc[len(human_estimation_filled)] = {"Time":op_record["Time"], "Occupancy":he_row["Occupancy"]}

    #### 5. Get data for first 10 minutes and plot ###
    openpose_estimation = openpose_estimation.loc[openpose_estimation['Time']<=10*60*1000]
    human_estimation_filled = human_estimation_filled.loc[human_estimation_filled['Time']<10*60*1000]

    plt.figure()
    plt.plot(openpose_estimation['Time']/1000, openpose_estimation['Occupancy'], label='by OpenPose')
    plt.plot(human_estimation_filled['Time']/1000, human_estimation_filled['Occupancy'], label='by human')
    plt.legend(loc='upper right')
    plt.xlabel('Time/s')
    plt.ylabel('Occupancy')
    plt.title(human_estimation_file.split('\\')[1].replace('.csv','').replace('1400','1310'))
    
    plt.figure()
    plt.plot(openpose_estimation['Time']/1000, openpose_estimation['Occupancy'] - human_estimation_filled['Occupancy'], label = 'Openpose - Human')
    plt.legend(loc='upper right')
    plt.xlabel('Time/s')
    plt.ylabel('Occupancy')
    plt.title(human_estimation_file.split('\\')[1].replace('.csv','').replace('1400','1310'))

read file: occupancy_estimation_by_human\Camera1_Sep_17_1300_1400_Parameterized_Learning_Agent.csv
openpose file select:occupancy_estimation/occupancy_estimation_camera1_combined/Sep_17\Camera1_Sep_17_1300_1400_Parameterized_Learning_Agent_processed_occupancy_whole_COCO_736_3.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [4]:
human_estimation

Unnamed: 0,Start,Start_s,End,End_s,Occupancy
0,0:00:00.00,0.0,0:00:05.00,5.0,3
1,0:00:05.00,5.0,0:00:23.20,23.2,4
2,0:00:23.20,23.2,0:00:43.30,43.3,3
3,0:00:43.30,43.3,0:01:17.30,77.3,4
4,0:01:17.30,77.3,0:02:17.00,137.0,5
5,0:02:17.00,137.0,0:02:25.66,145.66,4
6,0:02:25.66,145.66,0:02:44.00,164.0,3
7,0:02:44.00,164.0,0:03:14.00,194.0,2
8,0:03:14.00,194.0,0:03:20.60,200.6,1
9,0:03:20.60,200.6,0:03:38.60,218.6,2


# Add low-pass filter onto OpenPose estimation

In [44]:
# Define a low pass filter

from scipy.signal import butter, lfilter

def butter_lowpass(data, highcut, fs, order=5):
    nyq = 0.5 * fs # Nyquist frequency = 0.5*sample Frequency
    high = highcut / nyq
    b, a = butter(order, high, btype='low')
    output = lfilter(b,a,data)
    return output

# Estimate sampling frequency
total_sample_time = openpose_estimation.iloc[-1]["Time"]/1000
total_sample = len(openpose_estimation.index)
sample_freq = total_sample/total_sample_time

# Feed into filter
cutoff_freq = 0.2 # <====== Tunable parameter  (order doesn't have much effects on filter output)

openpose_estimation_f = butter_lowpass(data=openpose_estimation["Occupancy"], highcut=cutoff_freq, fs=sample_freq, order=5)
if "Filtered_Occupancy" in openpose_estimation.columns:
    openpose_estimation["Filtered_Occupancy"] = openpose_estimation_filtered
else:    
    openpose_estimation.insert(loc=2,column="Filtered_Occupancy", value=openpose_estimation_filtered)


In [47]:

plt.figure()
plt.plot(openpose_estimation['Time']/1000, openpose_estimation["Filtered_Occupancy"], label='by Filtered OpenPose')
plt.plot(human_estimation_filled['Time']/1000, human_estimation_filled['Occupancy'], label='by human')
plt.legend(loc='upper right')
plt.xlabel('Time/s')
plt.ylabel('Occupancy')
plt.title(human_estimation_file.split('\\')[1].replace('.csv','').replace('1400','1310'))

plt.figure()
plt.plot(openpose_estimation['Time']/1000, openpose_estimation['Occupancy'] - human_estimation_filled['Occupancy'], label = 'Openpose - Human')
plt.plot(openpose_estimation['Time']/1000, openpose_estimation['Filtered_Occupancy'] - human_estimation_filled['Occupancy'], label = 'Filtered_Openpose - Human')
plt.legend(loc='upper right')
plt.xlabel('Time/s')
plt.ylabel('Occupancy')
plt.title(human_estimation_file.split('\\')[1].replace('.csv','').replace('1400','1310'))


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Text(0.5,1,'Camera1_Sep_17_1300_1310_Parameterized_Learning_Agent')

# Majority vote filter

In [45]:
def majority_vote(data, order=5):
    assert order%2==1, "order has to be odd number"
    half_order = int((order-1)/2)
    output = []
    for i in range(len(data)):
        if i < half_order or i >= len(data)-half_order:
            output.append(data[i])
        else:
            votes = data[i-half_order : i+half_order+1].value_counts()
            output.append(votes[:1].index[0])
    return output

openpose_estimation_filtered = majority_vote(data=openpose_estimation["Occupancy"], order=21)
if "Filtered_Occupancy" in openpose_estimation.columns:
    openpose_estimation["Filtered_Occupancy"] = openpose_estimation_filtered
else:    
    openpose_estimation.insert(loc=2,column="Filtered_Occupancy", value=openpose_estimation_filtered) 


In [47]:
plt.figure()
plt.plot(openpose_estimation['Time']/1000, openpose_estimation["Filtered_Occupancy"], label='by Filtered OpenPose')
plt.plot(human_estimation_filled['Time']/1000, human_estimation_filled['Occupancy'], label='by human')
plt.legend(loc='upper right')
plt.xlabel('Time/s')
plt.ylabel('Occupancy')
plt.title(human_estimation_file.split('\\')[1].replace('.csv','').replace('1400','1310'))

plt.figure()
plt.plot(openpose_estimation['Time']/1000, openpose_estimation['Occupancy'] - human_estimation_filled['Occupancy'], label = 'Openpose - Human')
plt.plot(openpose_estimation['Time']/1000, openpose_estimation['Filtered_Occupancy'] - human_estimation_filled['Occupancy'], label = 'Filtered_Openpose - Human')
plt.legend(loc='upper right')
plt.xlabel('Time/s')
plt.ylabel('Occupancy')
plt.title(human_estimation_file.split('\\')[1].replace('.csv','').replace('1400','1310'))

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Text(0.5,1,'Camera1_Sep_17_1300_1310_Parameterized_Learning_Agent')

# Correlation

In [57]:
import math

d = openpose_estimation['Filtered_Occupancy'] - human_estimation_filled['Occupancy']
print("Filtered Occupancy")
d_mean = d.mean()
print('mean of d: '+ str(d_mean))
d_std = d.std()
print('std of d: '+ str(d_std))

Td = abs(d_mean)/(d_std/math.sqrt(len(d)))
print('Td of d: '+ str(Td))

print("Raw Occupancy")
d = openpose_estimation['Occupancy'] - human_estimation_filled['Occupancy']

d_mean = d.mean()
print('mean of d: '+ str(d_mean))
d_std = d.std()
print('std of d: '+ str(d_std))

Td = abs(d_mean)/(d_std/math.sqrt(len(d)))
print('Td of d: '+ str(Td))

Filtered Occupancy
mean of d: -0.3728301886792453
std of d: 0.9895936695013405
Td of d: 27.42787092599139
Raw Occupancy
mean of d: -0.34547169811320755
std of d: 1.0254766883446549
Td of d: 24.525881031585183
