In [1]:
# Python libraries
import os
from scipy.io import loadmat
import pandas as pd
import numpy as np
import itertools

# Load Matlab .mat files in Python
[source](https://towardsdatascience.com/how-to-load-matlab-mat-files-in-python-1f200e1287b5)

In [2]:
def get_info_mat(dirname, flag):
    
    """
    Store in a list some info about .mat files, as:
    camera, day and sequence of the sample and the pathfile
    """
    
    # in order to construct the pathname
    prefix, suffix = "sequence_data", ".mat"
    match flag:
        case "pre":
            cameras = ["_1","_2","_3","_4","_5"]
        case "post":
            cameras = ["1","2","3","4","5"]
        case _:
            return f"No match for {flag}, you can only choose between \"pre\" and \"post\""
    days = ["-1_"]
    sequences = ["1","2"]    
    
    # store the info in a list
    info_mat = []
    for cam,day,seq in itertools.product(cameras,days,sequences):
        filename = prefix+cam+day+seq+suffix
        pathfile = os.path.join(dirname, filename)
                
        info_mat.append([pathfile,cam[-1],day[1],seq])

        
    return info_mat

In [3]:
def load_mat(info):
    
    """
    Given an entry of info_mat, I load the .mat file and returns a python dictionary (as data struct).
    """
    pathfile = info[0]      # get the path of the .mat file
    mat = loadmat(pathfile) # it returns a python dictionary (as data struct).
    
    return mat

In [44]:
def mat2pd(mat,info):
    
    """
    Given a mat (python dict), this fun converts mat into a pd dataframe.
    """
    
    # Get the sequences stored in mat
    seqs = mat['sequences']
    nscene = seqs.shape[0] # list containing all the number of scenes in each sequences
    #print(f"This sequence has shape: {seqs.shape}, so it has {nscene} scenes")


    # initialize the list to store info
    Xarr, Tarr, Nveh, cons_dis = [],[], [], []
    
    for scn in range(0,nscene): # run over scenes
                
        tmp = seqs[scn][0][0][0] # (xpos,t) for a scene
        
        x_scn, t_scn = tmp[0], tmp[1][0]     # x position and correspondin timestamps for a fixed scene
        
#         print(x_scn)
#         print(np.diff(x_scn,axis=0))
#         print("---"*30,"\n")
        
        Xarr.append(x_scn)
        Tarr.append(t_scn)
        Nveh.append(len(x_scn))
        cons_dis.append(np.diff(x_scn,axis=0))      # consecutive distances of vehicles in this scene

    df = pd.DataFrame({'Tarr': Tarr,\
                       'Xarr': Xarr,\
                       'Cons Dis': cons_dis,\
                       'N. vehicles': Nveh,\
                       'cam': info[1],\
                       'day': info[2],\
                       'seq': info[3]})

    #print("---"*30,"\n")
    
    return df

In [45]:
def load_dataset(dirname, flag):
    
    "Converting mat into a list of pd dataframe"
    
    info_mat = get_info_mat(dirname, flag)
    counter = 1
    dflist = [] # initialize a list to store all the df, one for each .mat file

    for info in info_mat: # run over all the sequences
    
        mat = load_mat(info) # load .mat
        df = mat2pd(mat,info) # convert mat into a pd dataframe
        df['N. file'] = [counter]*len(df)
        dflist.append(df)
        
        counter += 1

    merged_df = pd.concat(dflist)
    
    return merged_df, dflist

## Testing

In [46]:
# load .mat
par_dir = os.path.dirname(os.getcwd()) # parent dir
dir_name = par_dir + "/NN-interaction"

merged_df, dflist = load_dataset(dir_name, 'post')

In [47]:
dflist[6]

Unnamed: 0,Tarr,Xarr,Cons Dis,N. vehicles,cam,day,seq,N. file
0,"[38.56000000000002, 38.76000000000002, 38.9600...","[[56.76779191883536, 63.66958997302261, 70.423...","[[4.5860614406170015, 4.5441095222412855, 3.27...",3,4,1,1,7
1,"[39.76000000000002, 39.960000000000015, 40.160...","[[28.585285955308702, 33.09265031550572, 38.51...","[[0.4709603855852791, 1.2653982919268145, 0.30...",3,4,1,1,7
2,"[45.960000000000015, 46.16000000000002, 46.360...","[[43.254809909967804, 48.3827180564267, 53.895...","[[19.587905248792197, 20.909581613054954, 21.1...",3,4,1,1,7
3,"[162.76000000000002, 162.96000000000004, 163.1...","[[46.94988362635183, 48.967282935137554, 51.52...","[[18.96591212249644, 21.1029041579498, 23.6218...",3,4,1,1,7
4,"[165.76000000000002, 165.96, 166.1600000000000...","[[42.55153967028384, 45.476610417059014, 48.45...","[[15.783878141465813, 15.853851450384909, 15.0...",3,4,1,1,7
5,"[174.96, 175.16000000000003, 175.36, 175.56]","[[59.253208038584134, 44.932391091893074, 49.1...","[[22.732512723018445, 17.816190664542596, 18.0...",3,4,1,1,7
6,"[185.76000000000002, 185.96, 186.16000000000003]","[[51.28102449925153, 49.06464696775758, 53.782...","[[26.373021519402172, 6.497184692209068, 7.616...",3,4,1,1,7
7,"[188.16000000000003, 188.36, 188.56, 188.76000...","[[18.612953759355808, 22.265683904271114, 26.1...","[[16.6995712756874, 17.296299871555533, 18.707...",3,4,1,1,7
8,"[220.96, 221.16000000000003, 221.36]","[[42.126908214429605, 47.57846358020522, 52.56...","[[10.360282563110374, 10.478548380919278, 11.8...",3,4,1,1,7
9,"[166.56, 166.76000000000002, 166.96, 167.16000...","[[19.01106889821605, 22.459397588298913, 26.13...","[[27.60058788958563, 27.29976381082667, 29.167...",4,4,1,1,7


In [37]:
merged_df

Unnamed: 0,Tarr,Xarr,Cons Dis,N. vehicles,cam,day,seq,N. file
0,"[9.440000000000001, 9.64, 9.840000000000002, 1...","[[29.373325674060805, 34.22142942994519, 38.50...","[[4.8481037558843845, 4.285103626882972, 5.181...",3,1,1,1,1
1,"[14.240000000000002, 14.440000000000001, 14.64]","[[47.39131045666992, 53.82017733698899, 60.334...","[[6.42886688031907, 6.514427459392209], [4.154...",3,1,1,1,1
2,"[23.64, 23.840000000000003, 24.040000000000003...","[[39.20354853492598, 44.53373067590911, 50.344...","[[5.330182140983133, 5.810287037040418, 5.8102...",3,1,1,1,1
3,"[72.44, 72.64, 72.84, 73.04, 73.24000000000001...","[[2.7333870214228604, 7.903389478164454, 13.90...","[[5.170002456741593, 6.000012617715687, 4.7500...",3,1,1,1,1
4,"[84.24000000000001, 84.44000000000001, 84.64, ...","[[21.74922515152677, 27.637296392268254, 33.03...","[[5.8880712407414855, 5.40209800788945, 5.5201...",3,1,1,1,1
...,...,...,...,...,...,...,...,...
21,"[483.92, 484.12, 484.32000000000005]","[[48.41808799027481, 52.23427216057917, 56.086...","[[3.8161841703043535, 3.8522078081055966], [0....",3,5,1,2,10
22,"[531.72, 531.9200000000001, 532.12, 532.32]","[[41.40738261388648, 41.45076935491222, 44.091...","[[0.04338674102573492, 2.6406673041812354, 3.6...",3,5,1,2,10
23,"[202.52, 202.72, 202.92, 203.12]","[[28.41048427064858, 33.081120786920465, 37.80...","[[4.670636516271884, 4.720113639396864, 5.2501...",4,5,1,2,10
24,"[342.92, 343.12, 343.32]","[[30.161830895474754, 32.14337334634746, 34.80...","[[1.9815424508727055, 2.666059276865461], [3.5...",4,5,1,2,10
