# Data Manipulation

## Imports

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

%matplotlib inline

## Paths and directories

In [2]:
# Input and output parent directories
input_dir = '../inputs/'
output_dir = '../outputs/'

In [3]:
# Paths for activation data
activation_path  = input_dir + 'activation.dat'
output_activation_path = output_dir + 'activation.csv'

In [4]:
# Frames directories
frames_dir = input_dir + 'frames/'
output_frames_dir = output_dir + 'frames/'
if not os.path.exists(output_frames_dir):
    os.makedirs(output_frames_dir)

In [5]:
# Frames paths
frame_files = [f for f in os.listdir(frames_dir) if f.endswith('.dat')]
frame_paths = [frames_dir + f for f in frame_files]

# Change .dat to .csv for outputs
output_frame_files = [f[:-4] + '.csv' for f in frame_files]
output_frame_paths = [output_frames_dir + f for f in frame_files]

In [6]:
# Combined frames data
total_frames_path = output_dir + 'combined_frames.csv'

## Functions

In [7]:
def get_frame_number(frame_path):
    frame_suffix = frame_path.split('_')[-1]
    frame_number_split = frame_suffix.split('.')
    frame_number_str = '.'.join(frame_number_split[:-1])
    return int(float(frame_number_str))

In [8]:
def get_frame(frame_path):
    frame = list()
    with open(frame_path) as f:
        # Read comment line
        line = f.readline()
        while line:
            line = f.readline()
            line_split = line.strip().split(' ')
            if len(line_split) > 1:
                pedestrian = {'frame': get_frame_number(frame_path),
                              'pedestrianID': int(line_split[0]),
                              'x': float(line_split[1]),
                              'y': float(line_split[2])}
                frame.append(pedestrian)

    df = pd.DataFrame(frame)
    return df

## Read in data

In [9]:
activations = pd.read_csv(activation_path, sep=' ')

In [10]:
frames = [get_frame(frame_path) for frame_path in frame_paths]

## Modifications

### Activations - remove unneeded columns

In [11]:
activations.head()

Unnamed: 0,#,pedestrianID,time_activation,gate_in,gate_out,speed
0,0,2273.0,8,5,1.417886,
1,1,2401.0,9,3,1.130044,
2,2,2411.0,9,3,1.120553,
3,3,2564.0,9,1,0.554143,
4,4,2591.0,9,1,0.600241,


In [12]:
activations = activations.loc[:, ['pedestrianID', 'time_activation', 'gate_in', 'gate_out']]

In [13]:
activations.head()

Unnamed: 0,pedestrianID,time_activation,gate_in,gate_out
0,2273.0,8,5,1.417886
1,2401.0,9,3,1.130044
2,2411.0,9,3,1.120553
3,2564.0,9,1,0.554143
4,2591.0,9,1,0.600241


### Frames - join into single dataframe

In [14]:
df = pd.concat(frames)

In [15]:
df.shape

(226390, 4)

In [16]:
df.head()

Unnamed: 0,frame,pedestrianID,x,y
0,2708,1,517.738979,310.348028
1,2708,2,515.2471,302.5058
2,2708,3,357.187148,153.410673
3,2708,4,342.423434,110.213457
4,2708,5,341.559165,92.116009


## Write out data

In [17]:
activations.to_csv(output_activation_path, index=False)

In [18]:
for i, frame in enumerate(frames):
    frame.to_csv(output_frame_paths[i], index=False)

In [19]:
df.to_csv(total_frames_path, index=False)