Splines delimit the pedestrian ID and the number of control points:

```
x y frame_number gaze_direction   \
x y frame_number gaze_direction    \
....                                >>> N control points
x y frame_number gaze_direction    /
x y frame_number gaze_direction   /
```


We want to format the data [frame_number, pedestrian_id, x, y]

**Example of raw data format**

```
204 - the number of splines
11 - Num of control points
358.000000 -66.000000 6 87.455200 - (2D point, m_id)
...
11 - Num of control points
358.000000 -44.000000 12 89.041168 - (2D point, m_id)
311.000000 -45.000000 40 81.634125 - (2D point, m_id)

```

- The first line is the number of splines, i.e. the number of pedestrians throughout the video
- Each spline contains the number of observations of a specific pedestrian
- Observations are a sequence of control points (x, y, frame_number, gaze_direction)

In [15]:
import os
import math 
import cv2
import numpy as np
from numpy import savetxt
from scipy import interpolate
from operator import itemgetter

In [28]:
output_dir = 'processed_data'
data_dir = 'UCY_dataset'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [29]:
video_path = f'{data_dir}/crowds_zara02.avi'  # add video path here

cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Could not open video.")
else:
    _, frame = cap.read()
    height = int(frame.shape[0])
    width = int(frame.shape[1])


def coordinate_transformation(x_c, y_c, width, height):
    """
    Transforms center coordinates into cv coordinates
    """
    cx = width // 2
    cy = height // 2
    x_cv = x_c + cx
    y_cv = -y_c + cy
    return [x_cv, y_cv]

In [30]:
filename = f'{data_dir}/raw_zara02.txt' # add raw UCY data path here

file = open(filename, 'r')
# number - 
num_pedestrians = int(file.readline().split(' - ')[0])
num_observations = len(file.readlines()) - num_pedestrians

# [frame, pedestrian, x, y]
data = np.zeros((num_observations, 4))

file = open(filename, 'r')
file.readline()
row_count = 0
for i in range(num_pedestrians):

    num_control_points_i = int(file.readline().split(' - ')[0])
    
    for j in range(num_control_points_i):
        points = file.readline().split(' ')
        data[row_count, 0:4] = [int(points[2]), i ] + coordinate_transformation(float(points[0]), float(points[1]), width, height)#float(points[0]), float(points[1])]
        row_count += 1
        

sorted_indices = np.argsort(data[:,0]) 
sorted_data = data[sorted_indices]

savetxt(f'{output_dir}/Data_formated_crowds_zara02.csv', sorted_data, delimiter=',')

- Following code was taken from https://github.com/Habiba-Amroune/ETH-UCY-Preprocessing

In [31]:
data_to_interpolate = np.zeros((len(sorted_data), 4), dtype=float)
for i in range(0, len(sorted_data)):
    data_to_interpolate[i, 0] = sorted_data[i, 0]
    data_to_interpolate[i, 1] = sorted_data[i, 1]
    data_to_interpolate[i, 2] = sorted_data[i, 2]
    data_to_interpolate[i, 3] = sorted_data[i, 3]

interpolated_data = np.empty((0, 4), dtype=float)
t = 0
for i in range(1, int(np.max(data_to_interpolate[:, 1])+1)):
    mask = data_to_interpolate[:, 1] == i
    # list of frames where the pedestrian 'i' appeard
    traj_of_ped_i = data_to_interpolate[mask, :]
    if (traj_of_ped_i.size == 0):
        print('-----------------------------------------------------')
        print('this PedID does''t exist in the data', i)
        x = x+1
        t = t+1
    else:
        # the first appearance of the pedestrian 'i'
        x = int(traj_of_ped_i[0, 0])
        # the last appearance of the pedestrian 'i'
        y = int(traj_of_ped_i[-1, 0])

        if ((x % 10) != 0):  # testing on the first frame
            if ((x % 10) < 5):
                x = int(traj_of_ped_i[0, 0]) - (x % 10)
            else:
                x = (int(traj_of_ped_i[0, 0]) - (x % 10))+10

        if ((y % 10) != 0):  # testing on the last frame
            if ((y % 10) < 5):
                y = int(traj_of_ped_i[-1, 0]) - (y % 10)
            else:
                y = (int(traj_of_ped_i[-1, 0]) - (y % 10))+10

        while x < y:  # until last appearance of the pedestrian 'i'
            for j in range(0, traj_of_ped_i.shape[0]):
                z = np.where(traj_of_ped_i[:, 0] == x)
                # if the frame exist copy the values of the positions
                if np.squeeze(traj_of_ped_i[z, 0]) == x:
                    exist_frame = traj_of_ped_i[z, :]
                    interpolated_data = np.append(
                        interpolated_data, exist_frame[0, :, :], axis=0)
                    x = x+1
                else:                                # else: interpolate using scipy.interpolate.interp1d
                    f = interpolate.interp1d(traj_of_ped_i[:, 0], [
                                             traj_of_ped_i[:, 2], traj_of_ped_i[:, 3]], fill_value="extrapolate", bounds_error=False)
                    inter = f(x)
                    interpolated_data = np.append(interpolated_data, np.array(
                        [[int(x), int(i), float(inter[0]), float(inter[1])]]), axis=0)
                    x = x+1
                if x == y+1:
                    break

    percentage = i/(int(np.max(data_to_interpolate[:, 1])+1))*100
    percentage = "{:.2f}".format(percentage)
    print('interpolation percentage ', percentage, '%')
    print('-----------------------------------------------------')

print('Number of missing pedestrians is: ', t)
print('-----------------------------------------------------')


interpolated_data = interpolated_data[np.argsort(interpolated_data[:, 0])]
savetxt(f'{output_dir}/interpolated_data_crowds_zara02.csv',
        interpolated_data, delimiter=',')

  if np.squeeze(traj_of_ped_i[z, 0]) == x:


interpolation percentage  0.49 %
-----------------------------------------------------
interpolation percentage  0.98 %
-----------------------------------------------------
interpolation percentage  1.47 %
-----------------------------------------------------
interpolation percentage  1.96 %
-----------------------------------------------------
interpolation percentage  2.45 %
-----------------------------------------------------
interpolation percentage  2.94 %
-----------------------------------------------------
interpolation percentage  3.43 %
-----------------------------------------------------
interpolation percentage  3.92 %
-----------------------------------------------------
interpolation percentage  4.41 %
-----------------------------------------------------
interpolation percentage  4.90 %
-----------------------------------------------------
interpolation percentage  5.39 %
-----------------------------------------------------
interpolation percentage  5.88 %
----------

In [32]:
def frames_10(inter_data):
    frames = inter_data[:, 0]
    frames = sorted(frames)
    frames = np.unique(frames)
    final_data = []
    for i in range(0, len(frames)):
        id = frames[i]
        if (((id % 10) == 0) or (id == 0)):
            frm = inter_data[inter_data[:, 0] == id, :]
            final_data.extend(frm)
    final_data = np.unique(final_data, axis=0)
    return final_data


Final_data = frames_10(interpolated_data)
savetxt(f'{output_dir}/Final_data_crowds_zara02.csv', Final_data, delimiter=',')