In [None]:
import os

In [None]:
os.getcwd()

In [None]:
# change dir to root
os.chdir('..')
os.getcwd()

In [None]:
import os
import pandas as pd
import exploface
import explibrosa
import emrecdem as erd

### Specify data location

In [None]:
datadirectory = os.environ['EMRECDEM_DATADIR']
print(datadirectory)

### Identify csv files

In [None]:
# Search for files with csv extension, because we are only interested in those
# This assumes that there are no other csv files in folder other than the ones produced by OpenFace.
from os import listdir

def find_filenames(path_to_dir, suffix=".wav"):
    filenames = listdir(path_to_dir)
    return [ filename.split(suffix)[0] for filename in filenames if filename.endswith( suffix ) ]

filenames = find_filenames(datadirectory) # filenames will be a list
print(filenames)

In [None]:
#https://www.dataquest.io/blog/python-pandas-databases/
databasefile = datadirectory + "/emrecdem.db"
print(databasefile)

### Create database

In [None]:
erd.features2db.create_db(databasefile = databasefile)
# Now on command line: sqlite3 pathtomydatabase.db
# should give access to database
# and .tables should show overview of the tables as created

In [None]:
deleteDataBaseEntriesOnFirstRun = True

### Loop over files, extract features, store features in database

In [None]:
import importlib
importlib.reload(explibrosa)
importlib.reload(erd)

for filename in filenames:
    
    # extract participant id (PID) and experiment id (EXP) from filename:
    PID = filename.split("_")[0].split("P")[1]
    EXP = ''.join(filename.split("_")[1:4]) # extract integers from filename
    EXP = EXP.split(".csv")[0] # remove .csv at the end
    
    # determine fragments from praat transcription
    path = os.path.join(datadirectory, 'getranscribeerd_%s_Mparticipant_talkspurt.TextGrid' % (filename,))
    fragments = erd.fragments.get_fragments_from_textgrid(path)
    print("Fragments:", fragments)
    

    
    print("generate video features")
    
    # select one file from list of filenames
    openface_file = os.path.join(datadirectory, filename + '_processed', filename + '_Cfront.csv')
    print (openface_file)
    # process with exploface
    openface_features = exploface.get_feature_time_series(openface_file)

    # extract fragments
    openface_features = erd.fragments.extract_fragments_openface(openface_features, fragments, PID, EXP)
    print(openface_features.head())
    openface_features.to_csv(os.path.join(datadirectory, filename + "_openface_features.csv"))
    
    
    
    print("generate audio features")
    
    # Find wav file that matches the csv file produced by OpenFace
    # based on the assumption that filenames are identical except from file extension
    audio_file = erd.findwav(filename + '.wav', datadirectory)
    
    # process with explibrosa
    # explibrosa.get_info(audio_file)
    audio_features = explibrosa.get_feature_time_series(audio_file)
    
    audio_features = erd.fragments.extract_fragments_librosa(audio_features, fragments, PID, EXP)
    print(audio_features.head())
    audio_features.to_csv(os.path.join(datadirectory, filename + "_audio_features.csv"))
    
    
    
    #http://www.sqlitetutorial.net/sqlite-python/create-tables/
    #erd.features2sqlitedb.checkdbcontent(databasefile)
    # features to database
    if (deleteDataBaseEntriesOnFirstRun == 0):
        deleteDataBaseEntries = True
    else:
        deleteDataBaseEntries = False
    
    erd.features2db.add_features(audio_features, openface_features,
            databasefile, deleteDataBaseEntries)
    # check database content
    erd.check_db_content(databasefile)
    


In [None]:
merged_features = pd.merge(openface_features, audio_features)
merged_features.head()

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
plt.plot(merged_features['AU04_r'], merged_features['pitch'], 'o')
plt.grid(True)