In [1]:
import numpy as np
import pandas as pd
import csv
import datetime

import re
import os
import glob

In [2]:
# Change here for every participant
DATA_FOLDER = 'T:/lab-study/20191206_HW-105-V3'

In [3]:
# VIDEO FILES: list of all 4 video files (one for each phase)
video_paths = glob.glob(DATA_FOLDER + '/output/video/*.flv_camera_front.mp4')

# TIMESTAMPS PHASES
timestamp_path = glob.glob(DATA_FOLDER + '/timestamps.csv')
timestamp_path = timestamp_path[0] # take first element from list and convert to string

In [8]:
# get start times of videos from video file folder as df
def read_videostamps_df(file_paths, debug):
    video_files = []
    video_time = []

    # convert paths to consistend slashing
    for x in range(0,4):
        video_files.append(os.path.normpath(file_paths[x]))
        if debug == True:
            print(x)
            print(video_files[x])
    
        # cut timestamp from video pathname
        temp = video_files[x].split("video\\", 1)
        temp = temp[1].split(".flv", 1)
        video_time.append(temp[0])
        if debug == True:
            print(video_time)
    
    # generate dataframe
    videostamps_df = pd.DataFrame(video_time)

    # name dataframe index and columns
    videostamps_df.index = ['P1', 'P2', 'P3', 'P4']
    videostamps_df.index.name = 'phase'
    videostamps_df.columns = ['video_time']
    
    return videostamps_df

In [9]:
# read start and endtime timestamps as df
def read_organize_timestamps_df(file_path, skiprows):

    timestamps_df = pd.read_csv(file_path, sep=';', index_col=None, header=None, skiprows=skiprows, parse_dates=[0])
    
    # name index as phase(x: 1 to 4) and scenario (y: 1 to 3) with shortcut PxSy
    timestamps_df.index = ['P1S1', 'P1S2', 'P1S3', 'P2S1', 'P2S2', 'P2S3', 'P3S1', 'P3S2', 'P3S3','P4S1', 'P4S2', 'P4S3']
    timestamps_df.index.name = 'phase_scenario'
    # name columns
    timestamps_df.columns = ['start_time','end_time']
    
    return timestamps_df

In [10]:
# Debugging

video_files = []

for x in range(0,4):
            print(x)
            video_files.append(x)
            print(video_files[x])

0
0
1
1
2
2
3
3


In [11]:
# get start times of videos from video file folder as df
videostamps_df = read_videostamps_df(video_paths, debug = False)
print(videostamps_df)

# read and organize timestamps as df (delete test drives)
timestamps_df = read_organize_timestamps_df(timestamp_path, skiprows=3)
print(timestamps_df)

                 video_time
phase                      
P1     2019-12-06--09-06-24
P2     2019-12-06--09-50-30
P3     2019-12-06--10-38-59
P4     2019-12-06--11-56-56
                              start_time                  end_time
phase_scenario                                                    
P1S1            2019-12-06--09-08-19-172  2019-12-06--09-13-53-370
P1S2            2019-12-06--09-15-10-608  2019-12-06--09-20-26-580
P1S3            2019-12-06--09-21-42-144  2019-12-06--09-26-53-050
P2S1            2019-12-06--09-59-53-322  2019-12-06--10-05-11-980
P2S2            2019-12-06--10-06-04-595  2019-12-06--10-11-50-100
P2S3            2019-12-06--10-12-25-413  2019-12-06--10-17-35-510
P3S1            2019-12-06--10-39-18-004  2019-12-06--10-44-36-740
P3S2            2019-12-06--10-48-47-945  2019-12-06--10-54-00-450
P3S3            2019-12-06--10-55-07-041  2019-12-06--11-01-18-430
P4S1            2019-12-06--12-01-04-478  2019-12-06--12-06-16-310
P4S2            2019-12-06--

In [12]:
# convert excel format to datetime
videostamps_df['video_time'] = pd.to_datetime(videostamps_df['video_time'], format='%Y-%m-%d--%H-%M-%S')
print(videostamps_df)

timestamps_df['start_time'] = pd.to_datetime(timestamps_df['start_time'], format='%Y-%m-%d--%H-%M-%S-%f')
timestamps_df['end_time'] = pd.to_datetime(timestamps_df['end_time'], format='%Y-%m-%d--%H-%M-%S-%f')
print(timestamps_df)
print(type(timestamps_df))

               video_time
phase                    
P1    2019-12-06 09:06:24
P2    2019-12-06 09:50:30
P3    2019-12-06 10:38:59
P4    2019-12-06 11:56:56
                            start_time                end_time
phase_scenario                                                
P1S1           2019-12-06 09:08:19.172 2019-12-06 09:13:53.370
P1S2           2019-12-06 09:15:10.608 2019-12-06 09:20:26.580
P1S3           2019-12-06 09:21:42.144 2019-12-06 09:26:53.050
P2S1           2019-12-06 09:59:53.322 2019-12-06 10:05:11.980
P2S2           2019-12-06 10:06:04.595 2019-12-06 10:11:50.100
P2S3           2019-12-06 10:12:25.413 2019-12-06 10:17:35.510
P3S1           2019-12-06 10:39:18.004 2019-12-06 10:44:36.740
P3S2           2019-12-06 10:48:47.945 2019-12-06 10:54:00.450
P3S3           2019-12-06 10:55:07.041 2019-12-06 11:01:18.430
P4S1           2019-12-06 12:01:04.478 2019-12-06 12:06:16.310
P4S2           2019-12-06 12:08:23.010 2019-12-06 12:13:36.910
P4S3           2019-12-06

In [13]:
def round_milliseconds(dt, direction):
    new_second = (dt.second + (1 if direction == 'up' else 0))
    new_dt = datetime.datetime(dt.year, dt.month, dt.day, dt.hour, dt.minute, new_second)
    return new_dt

In [14]:
# Debugging
test = round_milliseconds(pd.to_datetime(timestamps_df.at['P1S1','start_time']), direction = 'up')
print(test)
print(type(test))

test = round_milliseconds(pd.to_datetime(timestamps_df.at['P1S1','start_time']), direction = 'down')
print(test)
print(type(test))

2019-12-06 09:08:20
<class 'datetime.datetime'>
2019-12-06 09:08:19
<class 'datetime.datetime'>


In [15]:
# round millisecond timestamps to seconds: up for starttime, down for endtime
def round_timestamps(timestamps_df, debug):
    # make copy, changes not reflected in original
    timestamps_df_round = timestamps_df.copy(deep=True)
                                             
    for row in timestamps_df_round.index:
        if debug == True:
            print(row)

        for column in timestamps_df_round.columns:
            if debug == True:
                print(column)
            if column == 'start_time':
                timestamps_df_round.at[row, column] = round_milliseconds(pd.to_datetime(timestamps_df_round.at[row, column]), direction = 'up')
            if column == 'end_time':
                timestamps_df_round.at[row, column] = round_milliseconds(pd.to_datetime(timestamps_df_round.at[row, column]), direction = 'down')
    
    return timestamps_df_round

In [16]:
timestamps_df_round = round_timestamps(timestamps_df, debug = False)

# Debugging
print(timestamps_df)
print(timestamps_df_round)

                            start_time                end_time
phase_scenario                                                
P1S1           2019-12-06 09:08:19.172 2019-12-06 09:13:53.370
P1S2           2019-12-06 09:15:10.608 2019-12-06 09:20:26.580
P1S3           2019-12-06 09:21:42.144 2019-12-06 09:26:53.050
P2S1           2019-12-06 09:59:53.322 2019-12-06 10:05:11.980
P2S2           2019-12-06 10:06:04.595 2019-12-06 10:11:50.100
P2S3           2019-12-06 10:12:25.413 2019-12-06 10:17:35.510
P3S1           2019-12-06 10:39:18.004 2019-12-06 10:44:36.740
P3S2           2019-12-06 10:48:47.945 2019-12-06 10:54:00.450
P3S3           2019-12-06 10:55:07.041 2019-12-06 11:01:18.430
P4S1           2019-12-06 12:01:04.478 2019-12-06 12:06:16.310
P4S2           2019-12-06 12:08:23.010 2019-12-06 12:13:36.910
P4S3           2019-12-06 12:15:12.222 2019-12-06 12:20:45.370
                        start_time            end_time
phase_scenario                                        
P1S1    

In [17]:
print(videostamps_df.iloc[0])
print(pd.to_datetime(videostamps_df.iloc[0]))

print(timestamps_df_round['start_time'])
print(pd.to_datetime(timestamps_df_round['start_time']))

video_time   2019-12-06 09:06:24
Name: P1, dtype: datetime64[ns]
video_time   2019-12-06 09:06:24
Name: P1, dtype: datetime64[ns]
phase_scenario
P1S1   2019-12-06 09:08:20
P1S2   2019-12-06 09:15:11
P1S3   2019-12-06 09:21:43
P2S1   2019-12-06 09:59:54
P2S2   2019-12-06 10:06:05
P2S3   2019-12-06 10:12:26
P3S1   2019-12-06 10:39:19
P3S2   2019-12-06 10:48:48
P3S3   2019-12-06 10:55:08
P4S1   2019-12-06 12:01:05
P4S2   2019-12-06 12:08:24
P4S3   2019-12-06 12:15:13
Name: start_time, dtype: datetime64[ns]
phase_scenario
P1S1   2019-12-06 09:08:20
P1S2   2019-12-06 09:15:11
P1S3   2019-12-06 09:21:43
P2S1   2019-12-06 09:59:54
P2S2   2019-12-06 10:06:05
P2S3   2019-12-06 10:12:26
P3S1   2019-12-06 10:39:19
P3S2   2019-12-06 10:48:48
P3S3   2019-12-06 10:55:08
P4S1   2019-12-06 12:01:05
P4S2   2019-12-06 12:08:24
P4S3   2019-12-06 12:15:13
Name: start_time, dtype: datetime64[ns]


In [18]:
# initialize (else error)
timestamps_df_round['start_second'] = 0
timestamps_df_round['length_second'] = 0
print(timestamps_df_round)

                        start_time            end_time  start_second  \
phase_scenario                                                         
P1S1           2019-12-06 09:08:20 2019-12-06 09:13:53             0   
P1S2           2019-12-06 09:15:11 2019-12-06 09:20:26             0   
P1S3           2019-12-06 09:21:43 2019-12-06 09:26:53             0   
P2S1           2019-12-06 09:59:54 2019-12-06 10:05:11             0   
P2S2           2019-12-06 10:06:05 2019-12-06 10:11:50             0   
P2S3           2019-12-06 10:12:26 2019-12-06 10:17:35             0   
P3S1           2019-12-06 10:39:19 2019-12-06 10:44:36             0   
P3S2           2019-12-06 10:48:48 2019-12-06 10:54:00             0   
P3S3           2019-12-06 10:55:08 2019-12-06 11:01:18             0   
P4S1           2019-12-06 12:01:05 2019-12-06 12:06:16             0   
P4S2           2019-12-06 12:08:24 2019-12-06 12:13:36             0   
P4S3           2019-12-06 12:15:13 2019-12-06 12:20:45          

In [19]:
# calculate start second of phase in corresponding video
timestamps_df_round.at['P1S1','start_second'] = ( timestamps_df_round.at['P1S1','start_time'] - videostamps_df.iloc[0] ) / np.timedelta64(1, 's')
timestamps_df_round.at['P1S2','start_second'] = ( timestamps_df_round.at['P1S2','start_time'] - videostamps_df.iloc[0] ) / np.timedelta64(1, 's')
timestamps_df_round.at['P1S3','start_second'] = ( timestamps_df_round.at['P1S3','start_time'] - videostamps_df.iloc[0] ) / np.timedelta64(1, 's')

timestamps_df_round.at['P2S1','start_second'] = ( timestamps_df_round.at['P2S1','start_time'] - videostamps_df.iloc[1] ) / np.timedelta64(1, 's')
timestamps_df_round.at['P2S2','start_second'] = ( timestamps_df_round.at['P2S2','start_time'] - videostamps_df.iloc[1] ) / np.timedelta64(1, 's')
timestamps_df_round.at['P2S3','start_second'] = ( timestamps_df_round.at['P2S3','start_time'] - videostamps_df.iloc[1] ) / np.timedelta64(1, 's')

timestamps_df_round.at['P3S1','start_second'] = ( timestamps_df_round.at['P3S1','start_time'] - videostamps_df.iloc[2] ) / np.timedelta64(1, 's')
timestamps_df_round.at['P3S2','start_second'] = ( timestamps_df_round.at['P3S2','start_time'] - videostamps_df.iloc[2] ) / np.timedelta64(1, 's')
timestamps_df_round.at['P3S3','start_second'] = ( timestamps_df_round.at['P3S3','start_time'] - videostamps_df.iloc[2] ) / np.timedelta64(1, 's')

timestamps_df_round.at['P4S1','start_second'] = ( timestamps_df_round.at['P4S1','start_time'] - videostamps_df.iloc[3] ) / np.timedelta64(1, 's')
timestamps_df_round.at['P4S2','start_second'] = ( timestamps_df_round.at['P4S2','start_time'] - videostamps_df.iloc[3] ) / np.timedelta64(1, 's')
timestamps_df_round.at['P4S3','start_second'] = ( timestamps_df_round.at['P4S3','start_time'] - videostamps_df.iloc[3] ) / np.timedelta64(1, 's')

print(timestamps_df_round['start_second'])

phase_scenario
P1S1     116
P1S2     527
P1S3     919
P2S1     564
P2S2     935
P2S3    1316
P3S1      20
P3S2     589
P3S3     969
P4S1     249
P4S2     688
P4S3    1097
Name: start_second, dtype: int64


In [20]:
# calculate length of phase in corresponding video in unit [s]
timestamps_df_round.at['P1S1','length_second'] = ( timestamps_df_round.at['P1S1','end_time'] - timestamps_df_round.at['P1S1','start_time'] ) / np.timedelta64(1, 's')
timestamps_df_round.at['P1S2','length_second'] = ( timestamps_df_round.at['P1S2','end_time'] - timestamps_df_round.at['P1S2','start_time'] ) / np.timedelta64(1, 's')
timestamps_df_round.at['P1S3','length_second'] = ( timestamps_df_round.at['P1S3','end_time'] - timestamps_df_round.at['P1S3','start_time'] ) / np.timedelta64(1, 's')

timestamps_df_round.at['P2S1','length_second'] = ( timestamps_df_round.at['P2S1','end_time'] - timestamps_df_round.at['P2S1','start_time'] ) / np.timedelta64(1, 's')
timestamps_df_round.at['P2S2','length_second'] = ( timestamps_df_round.at['P2S2','end_time'] - timestamps_df_round.at['P2S2','start_time'] ) / np.timedelta64(1, 's')
timestamps_df_round.at['P2S3','length_second'] = ( timestamps_df_round.at['P2S3','end_time'] - timestamps_df_round.at['P2S3','start_time'] ) / np.timedelta64(1, 's')

timestamps_df_round.at['P3S1','length_second'] = ( timestamps_df_round.at['P3S1','end_time'] - timestamps_df_round.at['P3S1','start_time'] ) / np.timedelta64(1, 's')
timestamps_df_round.at['P3S2','length_second'] = ( timestamps_df_round.at['P3S2','end_time'] - timestamps_df_round.at['P3S2','start_time'] ) / np.timedelta64(1, 's')
timestamps_df_round.at['P3S3','length_second'] = ( timestamps_df_round.at['P3S3','end_time'] - timestamps_df_round.at['P3S3','start_time'] ) / np.timedelta64(1, 's')

timestamps_df_round.at['P4S1','length_second'] = ( timestamps_df_round.at['P4S1','end_time'] - timestamps_df_round.at['P4S1','start_time'] ) / np.timedelta64(1, 's')
timestamps_df_round.at['P4S2','length_second'] = ( timestamps_df_round.at['P4S2','end_time'] - timestamps_df_round.at['P4S2','start_time'] ) / np.timedelta64(1, 's')
timestamps_df_round.at['P4S3','length_second'] = ( timestamps_df_round.at['P4S3','end_time'] - timestamps_df_round.at['P4S3','start_time'] ) / np.timedelta64(1, 's')

print(timestamps_df_round['length_second'])

phase_scenario
P1S1    333
P1S2    315
P1S3    310
P2S1    317
P2S2    345
P2S3    309
P3S1    317
P3S2    312
P3S3    370
P4S1    311
P4S2    312
P4S3    332
Name: length_second, dtype: int64


In [21]:
print(timestamps_df_round)

                        start_time            end_time  start_second  \
phase_scenario                                                         
P1S1           2019-12-06 09:08:20 2019-12-06 09:13:53           116   
P1S2           2019-12-06 09:15:11 2019-12-06 09:20:26           527   
P1S3           2019-12-06 09:21:43 2019-12-06 09:26:53           919   
P2S1           2019-12-06 09:59:54 2019-12-06 10:05:11           564   
P2S2           2019-12-06 10:06:05 2019-12-06 10:11:50           935   
P2S3           2019-12-06 10:12:26 2019-12-06 10:17:35          1316   
P3S1           2019-12-06 10:39:19 2019-12-06 10:44:36            20   
P3S2           2019-12-06 10:48:48 2019-12-06 10:54:00           589   
P3S3           2019-12-06 10:55:08 2019-12-06 11:01:18           969   
P4S1           2019-12-06 12:01:05 2019-12-06 12:06:16           249   
P4S2           2019-12-06 12:08:24 2019-12-06 12:13:36           688   
P4S3           2019-12-06 12:15:13 2019-12-06 12:20:45          

In [34]:
# write to csv file as needed for ffmpeg-split
csv_df = timestamps_df_round[['start_second','length_second']].copy(deep=True) # copy 2 needed columns

In [48]:
csv_df_1 = csv_df.drop(['P2S1', 'P2S2', 'P2S3', 'P3S1', 'P3S2', 'P3S3','P4S1', 'P4S2', 'P4S3']) # drop all data except 1 video
print(csv_df_1) # Debug

csv_df_1['rename_to'] = csv_df_1.index # move index to column
csv_df_1 = csv_df_1.reset_index(drop=True) # reset index
print(csv_df_1) # Debug

# rename to ffmpeg-split convention
csv_df_1 = csv_df_1.rename(columns={"start_second": "start_time", "length_second": "length"})
print(csv_df_1) # Debug

# write csv file
csv_df_1.to_csv('1.csv', index=False)

                start_second  length_second
phase_scenario                             
P1S1                     116            333
P1S2                     527            315
P1S3                     919            310
   start_second  length_second rename_to
0           116            333      P1S1
1           527            315      P1S2
2           919            310      P1S3
   start_time  length rename_to
0         116     333      P1S1
1         527     315      P1S2
2         919     310      P1S3


In [19]:
# Basic Example
d = {'col1': [1, 2], 'col2': [3, 4]}
df = pd.DataFrame(data=d)
df

Unnamed: 0,col1,col2
0,1,3
1,2,4
