In [1]:
import scipy.io, math, os
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as FuncAnimation
from mpl_toolkits.mplot3d import Axes3D

In [146]:
directory = 'data/Data/F1/mat'
counter = 1
UL_df, LL_df, JW_df, TD_df, TB_df, TT_df, audio = [], [], [], [], [], [], []

# Go through all the ema files and append a list so each file is found in one place
for filename in sorted(os.listdir(directory)):
    if filename.endswith('.mat'):
        f = os.path.join(directory, filename)
        mat = scipy.io.loadmat(f)
        # takes the data that is stored at the key that precedes the data for each .mat file
        data = mat['usctimit_ema_f1_{:03}_{:03}'.format(counter, counter + 4)]
        
        counter += 5

        # make dataframes of the six positions
        audio.append(pd.DataFrame.from_dict(data[0][0][2]))
        UL_df.append(pd.DataFrame.from_dict(data[0][1][2]))
        LL_df.append(pd.DataFrame.from_dict(data[0][2][2]))
        JW_df.append(pd.DataFrame.from_dict(data[0][3][2]))
        TD_df.append(pd.DataFrame.from_dict(data[0][4][2]))
        TB_df.append(pd.DataFrame.from_dict(data[0][5][2]))
        TT_df.append(pd.DataFrame.from_dict(data[0][6][2]))
        

# merge the list into dataframe 
merged_ul = pd.concat(UL_df, axis = 0, ignore_index=True)
merged_ll = pd.concat(LL_df, axis = 0, ignore_index=True)
merged_jw = pd.concat(JW_df, axis = 0, ignore_index=True)
merged_td = pd.concat(TD_df, axis = 0, ignore_index=True)
merged_tb = pd.concat(TB_df, axis = 0, ignore_index=True)
merged_tt = pd.concat(TT_df, axis = 0, ignore_index=True)

# rename the columns
merged_ul.columns = ['ul_0', 'ul_1', 'ul_2']
merged_ll.columns = ['ll_0', 'll_1', 'll_2']
merged_jw.columns = ['jw_0', 'jw_1', 'jw_2']
merged_td.columns = ['td_0', 'td_1', 'td_2']
merged_tb.columns = ['tb_0', 'tb_1', 'tb_2']
merged_tt.columns = ['tt_0', 'tt_1', 'tt_2']

In [4]:
def get_srate(file_number):
    '''
        From the ema files get the sampling rate
    ''' 
    directory = 'data/Data/F1/mat'
    
    # still needs to ignore the .DS_Store file in a better way
    file = sorted(os.listdir(directory))[file_number + 1]
    
    f = os.path.join(directory, file)
    mat = scipy.io.loadmat(f)['usctimit_ema_f1_{:03}_{:03}'.format(file_number*5 + 1, file_number*5 + 5)]
    
    #returns the srate which is stored here
    return mat[0][1][1][0][0]

In [129]:
ema_frames = list()
word_number = 0
diff = 0

with open('timestamps.txt', 'r') as file:
    timestamps = file.read().splitlines()
    for line in timestamps:
        split_line = line.split(',')
        sent_number = int(split_line[-1])
        
         # find start and end by multiplying the timestamps with the sampling rate
        starting_point = math.floor(float(split_line[2]) * get_srate(int(split_line[0])))
        end_point = math.ceil(float(split_line[3]) * get_srate(int(split_line[0])))
        
        # track the length of word
        diff = end_point - starting_point
        
        # get data frame with all values
        ul = merged_ul.iloc[starting_point:end_point, :]
        ll = merged_ll.iloc[starting_point:end_point, :]
        jw = merged_jw.iloc[starting_point:end_point, :]
        td = merged_td.iloc[starting_point:end_point, :]
        tb = merged_tb.iloc[starting_point:end_point, :]
        tt = merged_tt.iloc[starting_point:end_point, :]
        
        # concate all dataframes
        df_data = pd.concat([ul, ll, jw, td, tb, tt], axis =1)
        
        # retrieve meta data and combine with ema data into dictionary
        data = {'word' : [split_line[1]],
                'srate': [get_srate(int(split_line[0]))],
                'sent' : [int(split_line[-1])],
                'Data'   : df_data} 
        
        ema_frames.append(data)

KeyboardInterrupt: 

In [None]:



# get audio segment that is connected to this
        start = math.floor(float(split_line[2]) * 22050)
        end = math.floor(float(split_line[3]) * 22050)
        
        segment = file.loc[(file.index >= start) & (file.index <= end)]