In [1]:
import os

In [2]:
os.getcwd()

'/home/vincent/emotion/emrecdem-framework/notebooks'

In [3]:
# change dir to root
os.chdir('../.')
os.getcwd()

'/home/vincent/emotion/emrecdem-framework'

In [4]:
import os
import exploface
import explibrosa
import emrecdem as erd

### Specify data location

In [5]:
emrecdemStudyDataFolder = "/media/sf_sharedfolder/Emotion/emrecdemstudydata"
openface_outputfolder = emrecdemStudyDataFolder + "/OpenFaceOutput"

### Identify csv files

In [6]:
# Search for files with csv extension, because we are only interested in those
# This assumes that there are no other csv files in folder other than the ones produced by OpenFace.
from os import listdir

def find_csv_filenames( path_to_dir, suffix=".csv" ):
    filenames = listdir(path_to_dir)
    return [ filename for filename in filenames if filename.endswith( suffix ) ]

filenames = find_csv_filenames(openface_outputfolder) # filenames will be a list

In [7]:
filenames

['P18_S2_IAPS_HAPPY_Cfront.csv',
 'P18_S2_IAPS_SAD_Cfront.csv',
 'P21_S2_IAPS_HAPPY_C1.csv',
 'P21_S2_IAPS_SAD_C1.csv']

In [8]:
#https://www.dataquest.io/blog/python-pandas-databases/
databasefile = emrecdemStudyDataFolder + "/emrecdem.db"

### Create database

In [9]:
erd.features2sqlitedb.createdatabase(databasefile = databasefile)
# Now on command line: sqlite3 pathtomydatabase.db
# should give access to database
# and .tables should show overview of the tables as created

In [10]:
deleteDataBaseEntriesOnFirstRun = True

### Loop over files, extract features, store features in database

In [11]:
for i in range(len(filenames)):
    # select one file from list of filenames
    openface_file = openface_outputfolder + '/' + filenames[i]
    # Find wav file that matches the csv file produced by OpenFace
    # based on the assumption that filenames are identical except from file extension
    audio_file = erd.findwav(filenames[i], emrecdemStudyDataFolder)
    print("generate video features")
    # process with exploface
    openface_features = exploface.get_feature_time_series(openface_file)
    print("generate audio features")
    # process with explibrosa
    # explibrosa.get_info(audio_file)
    audio_features = explibrosa.get_feature_time_series(audio_file)
    
    # extract participant id (PID) and experiment id (EXP) from filename:
    PID = filenames[i].split("_")[0].split("P")[1]
    EXP = ''.join(filenames[i].split("_")[1:4]) # extract integers from filename
    EXP = EXP.split(".csv")[0] # remove .csv at the end
    openface_features = erd.downsample(openface_features,res = '0.2S')
    openface_features = erd.tidyup_video_features(openface_features, PID, EXP)
    
    audio_features = erd.downsample(audio_features,res = '0.2S')
    audio_features = erd.tidyup_audio_features(audio_features, PID, EXP)
    
    #http://www.sqlitetutorial.net/sqlite-python/create-tables/
    #erd.features2sqlitedb.checkdbcontent(databasefile)
    # features to database
    if (deleteDataBaseEntriesOnFirstRun == 0):
        deleteDataBaseEntries = True
    else:
        deleteDataBaseEntries = False
    
    erd.features2sqlitedb.features2db(audio_features, openface_features,
            databasefile, deleteDataBaseEntries)
    # check database content
    erd.checkdbcontent(databasefile)
    
    print(openface_features.head())
    print(audio_features.head())
 

P18_S2_IAPS_HAPPY.wav
find /media/sf_sharedfolder/Emotion/emrecdemstudydata -name P18_S2_IAPS_HAPPY.wav
['/media/sf_sharedfolder/Emotion/emrecdemstudydata/P18/S2/Audio/P18_S2_IAPS_HAPPY.wav']
Audio file identified
generate video features
generate audio features
Running librosa (no results found on disk)
RMS energy
     0.29 seconds
Zero crossing
     0.34 seconds
Pitches
     3.11 seconds
  Pitches smoothing
     3.25 seconds
TOTAL execution time: 0.05 min

Top 3 rows of audiofeatures table:
[(1, 21, 'S2IAPSSAD', '1970-01-01 00:00:00', 372.8440881978551, 0.008293401449918747, 0.2760273972602741), (2, 21, 'S2IAPSSAD', '1970-01-01 00:00:00.200000', 347.70841719968547, 0.007039304822683334, 0.25844748858447486), (3, 21, 'S2IAPSSAD', '1970-01-01 00:00:00.400000', 230.50576523158662, 0.007673874497413635, 0.25136986301369874)]

Row count:
5738

Column names:
[(0, 'id', 'integer', 0, None, 1), (1, 'participant_id', 'integer', 1, None, 0), (2, 'experiment_id', 'text', 1, None, 0), (3, 'time',

generate audio features
Running librosa (no results found on disk)
RMS energy
     0.16 seconds
Zero crossing
     0.19 seconds
Pitches
     1.91 seconds
  Pitches smoothing
     2.0 seconds
TOTAL execution time: 0.03 min

Top 3 rows of audiofeatures table:
[(1, 21, 'S2IAPSSAD', '1970-01-01 00:00:00', 372.8440881978551, 0.008293401449918747, 0.2760273972602741), (2, 21, 'S2IAPSSAD', '1970-01-01 00:00:00.200000', 347.70841719968547, 0.007039304822683334, 0.25844748858447486), (3, 21, 'S2IAPSSAD', '1970-01-01 00:00:00.400000', 230.50576523158662, 0.007673874497413635, 0.25136986301369874)]

Row count:
7814

Column names:
[(0, 'id', 'integer', 0, None, 1), (1, 'participant_id', 'integer', 1, None, 0), (2, 'experiment_id', 'text', 1, None, 0), (3, 'time', 'text', 0, None, 0), (4, 'pitch', 'real', 0, None, 0), (5, 'rmse', 'real', 0, None, 0), (6, 'zcrate', 'real', 0, None, 0)]

Top 3 rows of videofeatures table:
[(1, 21, 'S2IAPSSAD', '1970-01-01 00:00:00', 0.0, 0.0, 1.698, 0.0, 1.156, 1.526