# Save test and train data to csv to faster reading

Note: for every pedestrian saves each ten point, e.g.: p1, ..., p10; p2, ..., p11; ...

In [1]:
import pandas as pd
import numpy as np

from time import localtime, strftime

In [2]:
ZERO_PADDING = 6

def get_filename(index):
    return "src/Csv/{}.txt".format(str(index).zfill(ZERO_PADDING))

NUMBER_OF_PEDESTRIANS = 12273

def download_pedestrian(index):
    error_message_template = "Pedestrian number should be between 0 and {max}; given number: {id}"
    assert(0 <= index < NUMBER_OF_PEDESTRIANS), error_message_template.format(max=NUMBER_OF_PEDESTRIANS-1, id=index)
    filename = get_filename(index)
    data = pd.read_csv(filename, index_col=0)
    return data

In [3]:
def make_view(data, length=4, only_odd_rows=True):
    data = np.array(data).flatten()
    len1 = len(data)
    len2 = length
    return np.lib.stride_tricks.as_strided(data, shape=(len1 - len2 + 1, len2),
                                                    strides=(data.dtype.itemsize,) * 2)[::2 if only_odd_rows else 1, :]

In [4]:
# in each row: {index; category; frames_number; label}
labels = pd.read_csv('src/pedestrian_labels_and_test_and_train_separation.csv', index_col=0)

In [7]:
train_data = pd.DataFrame()
test_data = pd.DataFrame()

for index in range(NUMBER_OF_PEDESTRIANS):
    pedestrian_data = pd.DataFrame()
    
    # in each row: {index (time); X; Y}    
    all_data = download_pedestrian(index)[['X', 'Y']]
    
    for i in range(len(all_data) - 9):
        # we want only serial data
        if all_data.index.values[i + 9] - all_data.index.values[i] == 180:

            path = np.array(all_data[i:i+10])
            path_view = make_view(data=path)

            if np.linalg.norm((path_view[:,:2] - path_view[:,2:]).reshape((int(len(path_view)), 2)), axis=1).max() < 150:
                pedestrian_data = pd.concat([pedestrian_data, all_data[i:i+10].reset_index()[['X', 'Y']]], axis=1)  
    
    if labels['category'][index] == 'train':
        # in each row: {X; Y; X; Y; ...}
        train_data = pd.concat([train_data, pedestrian_data], axis=1)
    else:    
        test_data = pd.concat([test_data, pedestrian_data], axis=1)

In [8]:
test_data

Unnamed: 0,X,Y,X.1,Y.1,X.2,Y.2,X.3,Y.3,X.4,Y.4,...,X.5,Y.5,X.6,Y.6,X.7,Y.7,X.8,Y.8,X.9,Y.9
0,591,116,591,127,590,139,586,150,583,155,...,1360,363,1377,388,1391,408,1408,423,1429,444
1,591,127,590,139,586,150,583,155,581,169,...,1377,388,1391,408,1408,423,1429,444,1461,463
2,590,139,586,150,583,155,581,169,574,178,...,1391,408,1408,423,1429,444,1461,463,1490,483
3,586,150,583,155,581,169,574,178,572,186,...,1408,423,1429,444,1461,463,1490,483,1517,499
4,583,155,581,169,574,178,572,186,567,198,...,1429,444,1461,463,1490,483,1517,499,1551,515
5,581,169,574,178,572,186,567,198,566,210,...,1461,463,1490,483,1517,499,1551,515,1592,533
6,574,178,572,186,567,198,566,210,563,219,...,1490,483,1517,499,1551,515,1592,533,1637,541
7,572,186,567,198,566,210,563,219,564,235,...,1517,499,1551,515,1592,533,1637,541,1688,549
8,567,198,566,210,563,219,564,235,560,240,...,1551,515,1592,533,1637,541,1688,549,1726,553
9,566,210,563,219,564,235,560,240,560,255,...,1592,533,1637,541,1688,549,1726,553,1765,558


In [11]:
train_data

Unnamed: 0,X,Y,X.1,Y.1,X.2,Y.2,X.3,Y.3,X.4,Y.4,...,X.5,Y.5,X.6,Y.6,X.7,Y.7,X.8,Y.8,X.9,Y.9
0,525,122,541,141,546,156,555,169,561,183,...,1447,334,1466,355,1485,372,1506,388,1521,402
1,541,141,546,156,555,169,561,183,573,196,...,1466,355,1485,372,1506,388,1521,402,1537,427
2,546,156,555,169,561,183,573,196,595,208,...,1485,372,1506,388,1521,402,1537,427,1561,445
3,555,169,561,183,573,196,595,208,624,213,...,1506,388,1521,402,1537,427,1561,445,1583,468
4,561,183,573,196,595,208,624,213,662,227,...,1521,402,1537,427,1561,445,1583,468,1607,487
5,573,196,595,208,624,213,662,227,696,233,...,1537,427,1561,445,1583,468,1607,487,1638,507
6,595,208,624,213,662,227,696,233,731,242,...,1561,445,1583,468,1607,487,1638,507,1672,526
7,624,213,662,227,696,233,731,242,770,246,...,1583,468,1607,487,1638,507,1672,526,1699,542
8,662,227,696,233,731,242,770,246,807,262,...,1607,487,1638,507,1672,526,1699,542,1729,552
9,696,233,731,242,770,246,807,262,845,272,...,1638,507,1672,526,1699,542,1729,552,1765,561


In [12]:
test_data.to_csv('src/test_data_points.csv')
train_data.to_csv('src/train_data_points.csv')