In [1]:
# Python libraries
import os
from scipy.io import loadmat
import pandas as pd
import numpy as np
import itertools

# Load Matlab .mat files in Python
[source](https://towardsdatascience.com/how-to-load-matlab-mat-files-in-python-1f200e1287b5)

In [2]:
def get_info_mat(dirname, flag):
    
    """
    Store in a list some info about .mat files, as:
    camera, day and sequence of the sample and the pathfile
    """
    
    # in order to construct the pathname
    prefix, suffix = "sequence_data", ".mat"
    match flag:
        case "pre":
            cameras = ["_1","_2","_3","_4","_5"]
        case "post":
            cameras = ["1","2","3","4","5"]
        case _:
            return f"No match for {flag}, you can only choose between \"pre\" and \"post\""
    days = ["-1_"]
    sequences = ["1","2"]    
    
    # store the info in a list
    info_mat = []
    for cam,day,seq in itertools.product(cameras,days,sequences):
        filename = prefix+cam+day+seq+suffix
        pathfile = os.path.join(dirname, filename)
                
        info_mat.append([pathfile,cam[-1],day[1],seq])

        
    return info_mat

In [3]:
def load_mat(info):
    
    """
    Given an entry of info_mat, I load the .mat file and returns a python dictionary (as data struct).
    """
    pathfile = info[0]      # get the path of the .mat file
    mat = loadmat(pathfile) # it returns a python dictionary (as data struct).
    
    return mat

In [4]:
def is_correct_scn(trajs):
    
    """
    Check if a scene is admissible/correct or not. "Correct" means if all the trajs are increasing,
    so there are not vehicles which are going in the oppoiste way in the motorway.
    """
    
    flag = True
    wrong_path = [None]
    for traj in trajs:
        flag = all(earlier <= later for earlier, later in zip(traj, traj[1:]))
        if flag == False:
            wrong_path = traj
            break
    return flag, wrong_path

In [5]:
def mat2pd(mat,info):
    
    """
    Given a mat (python dict), this fun converts mat into a pd dataframe.
    """
    
    # Get the sequences stored in mat
    seqs = mat['sequences']
    nscene = seqs.shape[0] # list containing all the number of scenes in each sequences
    #print(f"This sequence has shape: {seqs.shape}, so it has {nscene} scenes")
    
    # initialize the list to store info
    Xarr, Tarr, Nveh, cons_dis = [],[], [], []
    ic_list, wp_list = [], []
    
    for scn in range(0,nscene): # run over scenes
                
        tmp = seqs[scn][0][0][0] # (xpos,t) for a scene
        x_scn, t_scn = tmp[0], tmp[1][0]     # x position and correspondin timestamps for a fixed scene
        flag, wrong_path = is_correct_scn(x_scn)
    
        Xarr.append(x_scn)
        Tarr.append(t_scn)
        Nveh.append(len(x_scn))
        cons_dis.append(np.diff(x_scn,axis=0))      # consecutive distances of vehicles in this scene
        ic_list.append(flag)
        wp_list.append(wrong_path)

    df = pd.DataFrame({'Tarr': Tarr,\
                       'Xarr': Xarr,\
                       'Cons Dis': cons_dis,\
                       'N. vehicles': Nveh,\
                       'cam': info[1],\
                       'day': info[2],\
                       'seq': info[3],\
                       'is_correct': ic_list,\
                       'wrong_path': wp_list})
    
    return df

In [6]:
def df_purify(df):
    
    """
    Avoid rows with not admissible trajs.
    """
    
    # Consider only correct scenes and drop useless columns
    cond = (df['is_correct'] == True)
    df_purified = df[cond].drop(['is_correct','wrong_path'], axis=1)
    
    return df_purified

In [7]:
def load_dataset(dirname, flag):
    
    "Converting mat into a list of pd dataframe"
    
    info_mat = get_info_mat(dirname, flag)
    counter = 1
    dflist = [] # initialize a list to store all the df, one for each .mat file
    dflist2 = []

    for info in info_mat: # run over all the sequences
    
        mat = load_mat(info) # load .mat
        df = mat2pd(mat,info) # convert mat into a pd dataframe
        df['N. file'] = [counter]*len(df)
        
        # avoid uncorrect paths and take indexes starting from 0
        df_purified = df_purify(df).reset_index(drop=True)
        dflist.append(df_purified)
                
        counter += 1

    merged_df = pd.concat(dflist)
    
    return merged_df, dflist

## Testing

In [8]:
# load .mat
par_dir = os.path.dirname(os.getcwd()) # parent dir
dir_name = par_dir + "/NN-interaction"

merged_df, dflist = load_dataset(dir_name, 'post')

In [9]:
dflist[0]

Unnamed: 0,Tarr,Xarr,Cons Dis,N. vehicles,cam,day,seq,N. file
0,"[14.240000000000002, 14.440000000000001, 14.64]","[[47.39131045666992, 53.82017733698899, 60.334...","[[3.9987357284027283, 1.7240925784948118, 0.06...",3,1,1,1,1
1,"[23.64, 23.840000000000003, 24.040000000000003...","[[39.20354853492598, 44.53373067590911, 50.344...","[[7.390266022169413, 7.390335490093179, 7.4754...",3,1,1,1,1
2,"[72.44, 72.64, 72.84, 73.04, 73.24000000000001...","[[2.7333870214228604, 7.903389478164454, 13.90...","[[21.867868119293895, 22.917965048072983, 23.6...",3,1,1,1,1
3,"[84.24000000000001, 84.44000000000001, 84.64, ...","[[21.74922515152677, 27.637296392268254, 33.03...","[[8.783524121740438, 7.59154793586179, 7.04558...",3,1,1,1,1
4,"[131.64, 131.84, 132.04]","[[29.92533961013666, 36.085487468240274, 42.11...","[[2.5808787162736593, 2.2428658969453323, 1.82...",3,1,1,1,1
5,"[148.04, 148.23999999999998, 148.44]","[[48.201888758537635, 52.58811573357667, 57.22...","[[12.802741302154757, 13.596920632589942, 15.6...",3,1,1,1,1
6,"[161.64, 161.84, 162.04]","[[28.43130968766962, 34.76343487043734, 40.755...","[[7.410148384362291, 6.2181928848025905, 5.686...",3,1,1,1,1
7,"[164.04, 164.24, 164.44, 164.64]","[[1.6731532650716647, 7.263156064323007, 12.60...","[[28.19418223864483, 29.110321119546107, 30.05...",3,1,1,1,1
8,"[215.64, 215.84, 216.04]","[[7.567157827915416, 13.423172011187404, 19.23...","[[25.44823587276452, 26.540406602870043, 27.50...",3,1,1,1,1
9,"[216.64, 216.84, 217.04]","[[23.001240988631807, 30.50534908349185, 38.47...","[[14.42226470829813, 13.744381684525315, 12.54...",3,1,1,1,1


In [10]:
dflist2[0]

NameError: name 'dflist2' is not defined