In [6]:
import scipy.io, math, os
import pandas as pd, numpy as np
import help_functions

import matplotlib.pyplot as plt
import matplotlib.animation as FuncAnimation
from mpl_toolkits.mplot3d import Axes3D

In [7]:
directory = 'data/Data/F1/mat'
counter = 1
UL_df, LL_df, JW_df, TD_df, TB_df, TT_df, audio_df = [], [], [], [], [], [], []

# Go through all the ema files and append a list so each file is found in one place
for filename in sorted(os.listdir(directory)):
    if filename.endswith('.mat'):
        f = os.path.join(directory, filename)
        mat = scipy.io.loadmat(f)
        
        # takes the data that is stored at the key that precedes the data for each .mat file
        data = mat['usctimit_ema_f1_{:03}_{:03}'.format(counter, counter + 4)]
        counter += 5

        # make dataframes of the six positions
        audio_df.append(pd.DataFrame.from_dict(data[0][0][2]))
        UL_df.append(pd.DataFrame.from_dict(data[0][1][2]))
        LL_df.append(pd.DataFrame.from_dict(data[0][2][2]))
        JW_df.append(pd.DataFrame.from_dict(data[0][3][2]))
        TD_df.append(pd.DataFrame.from_dict(data[0][4][2]))
        TB_df.append(pd.DataFrame.from_dict(data[0][5][2]))
        TT_df.append(pd.DataFrame.from_dict(data[0][6][2]))

# add file code to each dataframe
add_file(UL_df)
add_file(LL_df)
add_file(JW_df)
add_file(TD_df)
add_file(TB_df)
add_file(TT_df)

# merge the list into dataframe 
merged_ul = pd.concat(UL_df, axis = 0)
merged_ll = pd.concat(LL_df, axis = 0)
merged_jw = pd.concat(JW_df, axis = 0)
merged_td = pd.concat(TD_df, axis = 0)
merged_tb = pd.concat(TB_df, axis = 0)
merged_tt = pd.concat(TT_df, axis = 0)

# rename the columns
merged_ul.columns = ['File Code', 'ul_0', 'ul_1', 'ul_2']
merged_ll.columns = ['File Code','ll_0', 'll_1', 'll_2']
merged_jw.columns = ['File Code','jw_0', 'jw_1', 'jw_2']
merged_td.columns = ['File Code','td_0', 'td_1', 'td_2']
merged_tb.columns = ['File Code','tb_0', 'tb_1', 'tb_2']
merged_tt.columns = ['File Code','tt_0', 'tt_1', 'tt_2']

Unnamed: 0,File Code,ul_0,ul_1,ul_2
0,0,11.517622,-57.244717,0.553757
1,0,11.529473,-57.236338,0.553376
2,0,11.544196,-57.223581,0.568780
3,0,11.545286,-57.213561,0.593878
4,0,11.517394,-57.210572,0.609659
...,...,...,...,...
2455,91,11.521927,-54.761054,-0.245744
2456,91,11.582655,-54.773594,-0.209278
2457,91,11.631056,-54.774490,-0.209013
2458,91,11.633738,-54.772672,-0.271068


In [9]:
ema_frames = list()

with open('timestamps.txt', 'r') as file:
    timestamps = file.read().splitlines()
    for line in timestamps:
        split_line = line.split(',')
        sent_number = int(split_line[-1])
        file_code = int(split_line[0])
       
        # find start and end by multiplying the timestamps with the sampling rate for ema markers
        starting_point = math.floor(float(split_line[2]) * get_srate(int(split_line[0])))
        ending_point = math.ceil(float(split_line[3]) * get_srate(int(split_line[0])))
        
        # find start and end by multiplying the timestamps with the sampling rate for audio
        start_point = (math.floor(float(split_line[2]) * 22050)) - 220
        end_point = (math.ceil(float(split_line[3]) * 22050)) + 220
             
        # get data frame with all values for ema marker
        ul = get_values(UL_df, merged_ul, file_code)
        ll = get_values(LL_df, merged_ll, file_code)
        jw = get_values(JW_df, merged_jw, file_code)
        td = get_values(TD_df, merged_td, file_code)
        tb = get_values(TB_df, merged_tb, file_code)
        tt = get_values(TT_df, merged_tt, file_code)
        
        # concate all dataframes with ema markers
        df_data = pd.concat([ul, ll, jw, td, tb, tt], axis = 1)
        df_data = df_data.loc[:,~df_data.columns.duplicated()]
        
        # get matching audio segment
        for i in range(len(audio_df)):
            if (file_code == i):
                current_df = audio_df[i]
                segment = current_df.iloc[starting_point:end_point, :]
            break 

        
        # retrieve meta data and combine with ema data into dictionary
        data = {'word' : [split_line[1]],
                'srate': [get_srate(int(split_line[0]))],
                'sent' : [sent_number],
                'file' : [file_code],
                'Data'   : [df_data],
                'Audio' : [segment]} 
        
        ema_frames.append(data)

In [40]:
audio_frames = list()

with open('timestamps.txt', 'r') as file:
    timestamps = file.read().splitlines()
    for line in timestamps:
        split_line = line.split(',')
        sent_number = int(split_line[-1])
        file_code = int(split_line[0])
        
        # find start and end by multiplying the timestamps with the sampling rate
        starting_point = (math.floor(float(split_line[2]) * 22050)) - 220
        end_point = (math.ceil(float(split_line[3]) * 22050)) + 220
        
          
            
        data = {'word' : [split_line[1]],
                'srate': [get_srate(int(split_line[0]))],
                'sent' : [int(split_line[-1])],
                'file' : [file_code],
                'Audio'   : [segment]} 
        
        audio_frames.append(data)

In [44]:
word = ema_frames[0]
word_data = word['Data']

a_frame = audio_frames[0]
audio_data = a_frame['Audio']

In [48]:
word

{'word': ['this'],
 'srate': [100.08258180247088],
 'sent': [1],
 'Data': [     File Code       ul_0       ul_1      ul_2       ll_0       ll_1  \
  94           0  10.893743 -54.952419  0.801510  12.654867 -72.004662   
  95           0  11.001506 -55.057619  0.988263  13.165340 -71.794151   
  96           0  11.176305 -55.175010  1.199579  13.727597 -71.570474   
  97           0  11.362491 -55.282835  1.426503  14.253850 -71.302308   
  98           0  11.514360 -55.386777  1.651452  14.677819 -70.915380   
  99           0  11.624379 -55.506259  1.850430  14.977100 -70.337059   
  100          0  11.699646 -55.656112  2.010826  15.154638 -69.565731   
  101          0  11.745043 -55.836549  2.139870  15.223489 -68.684983   
  102          0  11.768086 -56.030778  2.247447  15.209142 -67.819581   
  103          0  11.777850 -56.204124  2.322978  15.145327 -67.085787   
  104          0  11.784194 -56.317588  2.342273  15.055025 -66.552043   
  105          0  11.799674 -56.362548 

In [47]:
a_frame

{'word': ['this'],
 'srate': [100.08258180247088],
 'sent': [1],
 'file': [0],
 'Audio': [              0
  20507 -0.046509
  20508 -0.045441
  20509 -0.043854
  20510 -0.041779
  20511 -0.037903
  ...         ...
  23809  0.001129
  23810 -0.000458
  23811 -0.000580
  23812 -0.000275
  23813 -0.000061
  
  [3307 rows x 1 columns]]}