In [2]:
import scipy.io, math, os
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as FuncAnimation
from mpl_toolkits.mplot3d import Axes3D

In [85]:
directory = 'data/Data/F1/mat'
counter = 1
UL_df, LL_df, JW_df, TD_df, TB_df, TT_df, audio_df = [], [], [], [], [], [], []

# Go through all the ema files and append a list so each file is found in one place
for filename in sorted(os.listdir(directory)):
    if filename.endswith('.mat'):
        f = os.path.join(directory, filename)
        mat = scipy.io.loadmat(f)
        
        # takes the data that is stored at the key that precedes the data for each .mat file
        data = mat['usctimit_ema_f1_{:03}_{:03}'.format(counter, counter + 4)]
        counter += 5

        # make dataframes of the six positions
        audio_df.append(pd.DataFrame.from_dict(data[0][0][2]))
        UL_df.append(pd.DataFrame.from_dict(data[0][1][2]))
        LL_df.append(pd.DataFrame.from_dict(data[0][2][2]))
        JW_df.append(pd.DataFrame.from_dict(data[0][3][2]))
        TD_df.append(pd.DataFrame.from_dict(data[0][4][2]))
        TB_df.append(pd.DataFrame.from_dict(data[0][5][2]))
        TT_df.append(pd.DataFrame.from_dict(data[0][6][2]))

In [124]:
len(UL_df)

92

In [133]:
# add file code to each dataframe
add_file(UL_df)
add_file(LL_df)
add_file(JW_df)
add_file(TD_df)
add_file(TB_df)
add_file(TT_df)

# merge the list into dataframe 
merged_ul = pd.concat(UL_df, axis = 0)
merged_ll = pd.concat(LL_df, axis = 0)
merged_jw = pd.concat(JW_df, axis = 0)
merged_td = pd.concat(TD_df, axis = 0)
merged_tb = pd.concat(TB_df, axis = 0)
merged_tt = pd.concat(TT_df, axis = 0)

# rename the columns
merged_ul.columns = ['File Code', 'ul_0', 'ul_1', 'ul_2']
merged_ll.columns = ['File Code','ll_0', 'll_1', 'll_2']
merged_jw.columns = ['File Code','jw_0', 'jw_1', 'jw_2']
merged_td.columns = ['File Code','td_0', 'td_1', 'td_2']
merged_tb.columns = ['File Code','tb_0', 'tb_1', 'tb_2']
merged_tt.columns = ['File Code','tt_0', 'tt_1', 'tt_2']

In [134]:
merged_ul
current_df = merged_ll.loc[merged_ll['File Code'] == 1]
current_df

Unnamed: 0,File Code,ll_0,ll_1,ll_2
0,1,11.680122,-68.402306,0.440171
1,1,11.612473,-68.485551,0.446420
2,1,11.592781,-68.574963,0.445524
3,1,11.625414,-68.646635,0.446909
4,1,11.668815,-68.666546,0.460491
...,...,...,...,...
2060,1,12.191929,-68.316692,-0.286679
2061,1,12.228898,-68.328487,-0.282531
2062,1,12.234798,-68.330998,-0.263476
2063,1,12.231281,-68.332498,-0.237793


In [135]:

current_var = current_df.iloc[204:600, :]
current_var

Unnamed: 0,File Code,ll_0,ll_1,ll_2
204,1,15.248408,-71.448277,0.216339
205,1,15.184509,-71.408274,0.226139
206,1,15.113233,-71.369577,0.234965
207,1,15.054278,-71.318789,0.240335
208,1,15.003096,-71.254977,0.256465
...,...,...,...,...
595,1,10.975394,-71.974722,-0.910497
596,1,11.105315,-71.729843,-0.933350
597,1,11.246685,-71.565311,-0.964884
598,1,11.388517,-71.464922,-1.000399


In [136]:
# FUNCTIONING FOR ADDING FILE CODE
def add_file(dataframe): 
    ''' 
        Using the list, get the index as the file code and add a new column with this index information
    '''
    for idx, frame in enumerate(dataframe):
        frame['File code'] = idx
        first_column = frame.pop('File code')
        frame.insert(0, 'File code', first_column)
        

# FUNCTION FOR OBTAINING SAMPLING RATE
def get_srate(file_number):
    '''
        From the ema files get the sampling rate
    ''' 
    directory = 'data/Data/F1/mat'
    
    # still needs to ignore the .DS_Store file in a better way
    file = sorted(os.listdir(directory))[file_number + 1]
    
    f = os.path.join(directory, file)
    mat = scipy.io.loadmat(f)['usctimit_ema_f1_{:03}_{:03}'.format(file_number*5 + 1, file_number*5 + 5)]
    
    #returns the srate which is stored here
    return mat[0][1][1][0][0]


# FUNCTION FOR GETTING COORIDNATES FOR WORD TIMEFRAME
def get_values(df_list, merged_df, file_code): 
    '''
        Goes through list of dataframe and if it matches the file code it gets the coordinates for th
    '''
    for i in range(len(df_list)):
        if (file_code == i):
            current_df = merged_df.loc[merged_df['File Code'] == i]
            current_var = current_df.iloc[starting_point:end_point, :]
            
    return current_var

In [139]:
ema_frames = list()

with open('timestamps.txt', 'r') as file:
    timestamps = file.read().splitlines()
    for line in timestamps:
        split_line = line.split(',')
        sent_number = int(split_line[-1])
        file_code = int(split_line[0])
       
        # find start and end by multiplying the timestamps with the sampling rate
        starting_point = math.floor(float(split_line[2]) * get_srate(int(split_line[0])))
        end_point = math.ceil(float(split_line[3]) * get_srate(int(split_line[0])))
        
        # track the length of word
        length = end_point - starting_point
                
        # get data frame with all values
        ul = get_values(UL_df, merged_ul, file_code)
        ll = get_values(LL_df, merged_ll, file_code)
        jw = get_values(JW_df, merged_jw, file_code)
        td = get_values(TD_df, merged_td, file_code)
        tb = get_values(TB_df, merged_tb, file_code)
        tt = get_values(TT_df, merged_tt, file_code)
        
        # concate all dataframes
        df_data = pd.concat([ul, ll, jw, td, tb, tt], axis = 1)
        df_data = df_data.loc[:,~df_data.columns.duplicated()]

        
        # retrieve meta data and combine with ema data into dictionary
        data = {'word' : [split_line[1]],
                'srate': [get_srate(int(split_line[0]))],
                'sent' : [int(split_line[-1])],
                'Data'   : [df_data]} 
        
        ema_frames.append(data)

In [None]:
audio_frames = list()

with open('timestamps.txt', 'r') as file:
    timestamps = file.read().splitlines()
    for line in timestamps:
        split_line = line.split(',')
        sent_number = int(split_line[-1])
        file_code = int(split_line[0])
       
        # find start and end by multiplying the timestamps with the sampling rate
        starting_point = math.floor(float(split_line[2]) * 22050)
        end_point = math.ceil(float(split_line[3]) * 22050)
        
                
        audio_df = 
        segment = file.loc[(file.index >= start) & (file.index <= end)]
        
        # retrieve meta data and combine with ema data into dictionary
        data = {'word' : [split_line[1]],
                'srate': [get_srate(int(split_line[0]))],
                'sent' : [int(split_line[-1])],
                'Data'   : [audio_df]} 
        
        audio_.append(data)