# Merge features from various modalitis into single database

## 1. Load feature data from Video

Load OpenFace output data and load features in pandas dataframe: So far copied from: https://github.com/emrecdem/exploface/blob/master/TUTORIALS/tutorial1.ipynb

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import exploface

In [2]:
exploface.__version__

'0.0.0.dev6'

In [3]:
# specify some paths
emrecdemStudyDataFolder = "/media/sf_sharedfolder/Emotion/emrecdemstudydata"
openface_outputfolder = emrecdemStudyDataFolder + "/OpenFaceOutput"

In [4]:
# Search for files with csv extension, because we are only interested in those
# This assumes that there are no other csv files in folder other than the ones produced by OpenFace.
from os import listdir

def find_csv_filenames( path_to_dir, suffix=".csv" ):
    filenames = listdir(path_to_dir)
    return [ filename for filename in filenames if filename.endswith( suffix ) ]

filenames = find_csv_filenames(openface_outputfolder)

In [5]:
filenames

['P18_S2_IAPS_HAPPY_Cfront.csv',
 'P18_S2_IAPS_SAD_Cfront.csv',
 'P21_S2_IAPS_HAPPY_C1.csv',
 'P21_S2_IAPS_SAD_C1.csv']

In [6]:
# select one file to process (in the future this can be a loop over all the files)
openface_file = openface_outputfolder + '/' + filenames[0]
openface_features = exploface.get_feature_time_series(openface_file)

In [7]:
openface_features.head(5)

Unnamed: 0,frame,face_id,timestamp,confidence,success,gaze_0_x,gaze_0_y,gaze_0_z,gaze_1_x,gaze_1_y,...,AU12_c,AU14_c,AU15_c,AU17_c,AU20_c,AU23_c,AU25_c,AU26_c,AU28_c,AU45_c
0,1,0,0.0,0.98,1,0.028242,0.015028,-0.999488,0.045799,-0.019971,...,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0
1,2,0,0.04,0.98,1,0.012176,-0.017202,-0.999778,0.035786,-0.044246,...,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0
2,3,0,0.08,0.98,1,0.003201,0.007913,-0.999964,0.030401,-0.01686,...,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,0.0
3,4,0,0.12,0.98,1,0.011002,0.017785,-0.999781,-0.06609,-0.075202,...,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0
4,5,0,0.16,0.98,1,0.060468,0.055646,-0.996618,-0.093641,0.164191,...,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0


In [8]:
openface_features.shape


(5036, 714)

## 2. Load feature data from audio

Process data with Librosa in pandas dataframe: So far copied from: https://github.com/emrecdem/explibrosa/blob/master/TUTORIALS/tutorial1.ipynb

In [9]:
import matplotlib.pyplot as plt
import os
import explibrosa

In [10]:
explibrosa.__version__

'0.0.0.dev1'

Find wav file that matches the csv file produced by OpenFace based on the assumption that filenames are identical except from file extension

In [11]:
transformfilename = filenames[0]

In [12]:
transformfilename

'P18_S2_IAPS_HAPPY_Cfront.csv'

In [13]:
audiofile_name = transformfilename.replace('.csv','.wav').replace('_Cfront','').replace('_Cside','')

In [14]:
audiofile_name 

'P18_S2_IAPS_HAPPY.wav'

In [15]:
import subprocess
 
# Set up find command
findCMD = 'find ' + emrecdemStudyDataFolder + ' -name ' + audiofile_name 
out = subprocess.Popen(findCMD,shell=True,stdin=subprocess.PIPE, 
                        stdout=subprocess.PIPE,stderr=subprocess.PIPE)
# Get standard out and error
(stdout, stderr) = out.communicate()
 
# Save found files to list
filelist = stdout.decode().split()

In [16]:
audiofiles_fullPaths = filelist # probably the list has only one filename
audio_file = audiofiles_fullPaths[0]

In [17]:
os.path.isfile(audio_file)

True

In [18]:
explibrosa.get_info(audio_file)

{'#frames': 3223486, 'duration (min)': 3.4, 'Sample freq (kHz)': 16.0}

In [19]:

time_series = explibrosa.get_feature_time_series(audio_file)

Running librosa (no results found on disk)
RMS energy
     0.29 seconds
Zero crossing
     0.36 seconds
Pitches
     3.07 seconds
  Pitches smoothing
     3.2 seconds
TOTAL execution time: 0.05 min


In [20]:
time_series.head()

Unnamed: 0,timestamp,rmse,zrc,pitch
0,0.0,0.013185,0.038095,186.62088
1,0.01,0.012107,0.066667,185.620404
2,0.020001,0.010913,0.052381,184.32665
3,0.030001,0.01129,0.052381,182.657746
4,0.040002,0.010963,0.042857,180.606069


In [21]:
type(time_series)

pandas.core.frame.DataFrame

In [22]:
from scipy import signal

In [23]:
type(time_series.timestamp)

pandas.core.series.Series

### Downsample the time_series pandas dataframe

In [24]:
# Convert timestamps to datetime format in order to be able to do resampe
import datetime as dt
import numpy as np

In [25]:
Nvalues = len(time_series.index)
samplerate = 1/ ((time_series.timestamp[Nvalues-1] - time_series.timestamp[0]) / Nvalues)

In [26]:
# Create dummy timeseries to replace the timestamps column with datetime type timestamps 
timenow = dt.datetime.now()

start = pd.Timestamp(timenow)
end = pd.Timestamp(timenow + dt.timedelta(seconds=Nvalues/samplerate))

In [27]:
t = np.linspace(start.value, end.value, Nvalues)
t = pd.to_datetime(t)
time_series['timestamp2'] = t

In [28]:
time_series.head()

Unnamed: 0,timestamp,rmse,zrc,pitch,timestamp2
0,0.0,0.013185,0.038095,186.62088,2019-01-30 16:04:49.856816896
1,0.01,0.012107,0.066667,185.620404,2019-01-30 16:04:49.866817280
2,0.020001,0.010913,0.052381,184.32665,2019-01-30 16:04:49.876817920
3,0.030001,0.01129,0.052381,182.657746,2019-01-30 16:04:49.886818304
4,0.040002,0.010963,0.042857,180.606069,2019-01-30 16:04:49.896818944


In [29]:
time_series = time_series.resample('0.2S',on='timestamp2').mean() # downsample to 0.2 second intervals
time_series.head()

Unnamed: 0_level_0,timestamp,rmse,zrc,pitch
timestamp2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-01-30 16:04:49.800,0.070003,0.008595,0.109841,204.026116
2019-01-30 16:04:50.000,0.245012,0.00919,0.139762,320.064795
2019-01-30 16:04:50.200,0.445022,0.003651,0.185238,242.428636
2019-01-30 16:04:50.400,0.645032,0.005303,0.185476,210.402133
2019-01-30 16:04:50.600,0.845042,0.023773,0.137857,311.804865


## 3. Store in database


http://www.sqlitetutorial.net/sqlite-python/create-tables/

In [30]:
import sqlite3

In [31]:
def create_connection(db_file):
    """ create a database connection to the SQLite database
        specified by db_file
    :param db_file: database file
    :return: Connection object or None
    """
    try:
        conn = sqlite3.connect(db_file)
        return conn
    except Error as e:
        print(e)
 
    return None

In [32]:
def create_table(conn, create_table_sql):
    """ create a table from the create_table_sql statement
    :param conn: Connection object
    :param create_table_sql: a CREATE TABLE statement
    :return:
    """
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except Error as e:
        print(e)

In [33]:
def main():
    database = "/media/sf_sharedfolder/Emotion/emrecdemstudydata/emrecdemdb.db"
     
    sql_create_participants_table = """ CREATE TABLE IF NOT EXISTS participants (
                                        id integer PRIMARY KEY,
                                        age real
                                    ); """
    
    sql_create_experiments_table = """ CREATE TABLE IF NOT EXISTS experiments (
                                        id integer PRIMARY KEY,
                                        name text NOT NULL
                                    ); """
    sql_create_videofeatures_table = """ CREATE TABLE IF NOT EXISTS videofeatures (
                                        id integer PRIMARY KEY,
                                        participant_id integer NOT NULL,
                                        experiment_id integer NOT NULL,
                                        AU01 real,
                                        AU02 real,
                                        AU03 real,
                                        AU04 real,
                                        FOREIGN KEY (participant_id) REFERENCES participants (id),
                                        FOREIGN KEY (experiment_id) REFERENCES experiments (id)
                                    ); """
    
    sql_create_audiofeatures_table = """ CREATE TABLE IF NOT EXISTS audiofeatures (
                                        id integer PRIMARY KEY,
                                        participant_id integer NOT NULL,
                                        experiment_id integer NOT NULL,
                                        pitch real,
                                        rmse real,
                                        zcrate real,
                                        FOREIGN KEY (participant_id) REFERENCES participants (id),
                                        FOREIGN KEY (experiment_id) REFERENCES experiments (id)
                                    ); """
        
    sql_create_facslabels_table = """ CREATE TABLE IF NOT EXISTS facslabels (
                                        id integer PRIMARY KEY,
                                        name text NOT NULL
                                    ); """
 
    sql_create_facsencoding_table = """CREATE TABLE IF NOT EXISTS tasks (
                                    id integer PRIMARY KEY,
                                    participant_id integer NOT NULL,
                                    experiment_id integer NOT NULL,
                                    emotion_id integer NOT NULL,
                                    FOREIGN KEY (participant_id) REFERENCES participants (id),
                                    FOREIGN KEY (experiment_id) REFERENCES experiments (id),
                                    FOREIGN KEY (emotion_id) REFERENCES facslabels (id)
                                );"""
 
    # create a database connection
    conn = create_connection(database)
    if conn is not None:
        # create participants table
        create_table(conn, sql_create_participants_table)
        # create experiments table
        create_table(conn, sql_create_experiments_table)
        create_table(conn, sql_create_videofeatures_table)
        create_table(conn, sql_create_audiofeatures_table)
        create_table(conn, sql_create_facslabels_table)
        create_table(conn, sql_create_facsencoding_table)
    else:
        print("Error! cannot create the database connection.")

In [34]:

if __name__ == '__main__':
    main()

In [35]:
# Now on command line: sqlite3 pathtomydatabase.db
# should give access to database
# and .tables should show overview of the tables as created

### To do Video:
- Strip away irrelevant information, e.g. make selection of features of interest.

### To do Video + Audio:
- Optionally downsample to minimally acceptable resolution, e.g. 10 Hertz.
- Retrieve participant ID from files and store these together with data and filename in database.
- Develop demo for searchability
