### This notebook is used to preprocess the input modalities and make them suitable for the developed framework on the Bipolar Disorder Corpus. The audio-visual modalities are aligned and written into correspoding files. One file is created for MFCC and one for eGeMAPS.

In [None]:
from __future__ import unicode_literals
import pandas as pd
import numpy as np
import os
import cv2
import json
import pandas as pd
from matplotlib import pyplot as plt
import traceback
import statistics
from scipy.io import arff
from collections import Counter
from smart_open import smart_open
import os
import json
import traceback
import statistics
import numpy as np
import pandas as pd
from scipy.io import arff
from collections import Counter
from smart_open import smart_open
matplotlib.use('GTK')

def get_sample(partition, index):
    if index < 0:
        return
    sample_name = ''
    if partition == 'train':
        if index > 104:
            print("ERROR")
        else:
            sample_name = 'train_' + str(index).zfill(3)
    elif partition == 'dev':
        if index > 60:
            print("ERROR")
        else:
            sample_name = 'dev_' + str(index).zfill(3)
    elif partition == 'test':
        if index > 54:
            print("ERROR")
        else:
            sample_name = 'test_' + str(index).zfill(3)
    else:
        print("INCORRECT PARTITION INPUT")
    return sample_name

#only select relevant features
def preprocess_AU():
    verbose = True
    raw_dir = "/home/ceccarelli/Work/Bipolar/LLDs_video_openface"
    proc_dir = "/home/ceccarelli/Work/Bipolar/LLDs_video_openface_processed"
    length = dict()
    length['train'] = 104
    length['dev'] = 60
    length['test'] = 54

    time = ['timestamp']
    landmarks = ['%s_%d' % (xy, i) for xy in ['x', 'y'] for i in range(68)]
    gazes = ['gaze_%d_%s' % (no, di) for no in range(2) for di in ['x', 'y', 'z']]
    poses = ['pose_%s' % xyz for xyz in ['Tx', 'Ty', 'Tz', 'Rx', 'Ry', 'Rz']]
    actions = ['AU01_r', 'AU02_r', 'AU04_r', 'AU05_r', 'AU06_r', 'AU07_r',
               'AU09_r', 'AU10_r', 'AU12_r', 'AU14_r', 'AU15_r', 'AU17_r',
               'AU20_r', 'AU23_r', 'AU25_r', 'AU26_r', 'AU45_r', 'AU01_c',
               'AU02_c', 'AU04_c', 'AU05_c', 'AU06_c', 'AU07_c', 'AU09_c',
               'AU10_c', 'AU12_c', 'AU14_c', 'AU15_c', 'AU17_c', 'AU20_c',
               'AU23_c', 'AU25_c', 'AU26_c', 'AU28_c', 'AU45_c']

    visual = time
    visual.extend(landmarks)
    visual.extend(gazes)
    visual.extend(poses)
    visual.extend(actions)

    if verbose:
        print(time)
        print(landmarks)
        print(gazes)
        print(poses)
        print(actions)

    for partition in ['train', 'dev', 'test']:
        for i in range(length[partition]):
            filename = get_sample(partition, (i+1))
            temp = pd.read_csv(os.path.join(raw_dir, filename + '.csv'))
            temp.columns = temp.columns.str.strip()
            print("file %s loaded" % filename)
            # select specified columns
            temp = temp.loc[:, visual]
            temp.to_csv(os.path.join(proc_dir, filename + '.csv'), index=False)
            print("file %s processing completed & saved" % filename)

In [None]:
#done it once
preprocess_AU()

In [None]:
#align auudio and video data
def align(eGeMAPS=False, verbose=False):

    input_dir_A = '/home/ceccarelli/Work/Bipolar/LLDs_audio_opensmile/LLDs_audio_opensmile_MFCCs' if not eGeMAPS else '/home/ceccarelli/Work/Bipolar/LLDs_audio_opensmile/LLDs_audio_eGeMAPS'
    input_dir_V = '/home/ceccarelli/Work/Bipolar/LLDs_video_openface_processed'
    output_dir_A = '/home/ceccarelli/Work/Bipolar/LLDs_audio_opensmile_aligned/MFCCs' if not eGeMAPS else '/home/ceccarelli/Work/Bipolar/LLDs_audio_opensmile_aligned/eGeMAPS'
    length = dict()
    length['train'] = 104
    length['dev'] = 60
    length['test'] = 54

    for partition in ['train', 'dev', 'test']:
        for i in range(length[partition]):
            filename = get_sample(partition, (i+1)) + '.csv'

            if verbose:
                print("file %s loaded." % filename)

            temp_A = pd.read_csv(os.path.join(input_dir_A, filename), sep=';', index_col=1)
            temp_A.drop("name", axis=1, inplace=True)
            del temp_A.index.name
            temp_V = pd.read_csv(os.path.join(input_dir_V, filename))
            align_A = pd.DataFrame(np.zeros((len(temp_V), temp_A.shape[1]*3)))
            align_A.index = temp_V.loc[:, 'timestamp']
            del align_A.index.name

            A_list = temp_A.index.tolist()
            V_list = temp_V.loc[:, 'timestamp'].tolist()

            for j in range(len(V_list) - 1):
                a_list = []
                for a in A_list:
                    if a > V_list[j] and a < V_list[j+1]:
                        a_list.append(a)
                if len(a_list) == 1:
                    a_list *= 3
                elif len(a_list) == 2:
                    a_list.append(a_list[1])
                elif len(a_list) == 3:
                    a_list = a_list
                else:
                    continue

                assert len(a_list) == 3

                align_A.loc[V_list[j], :] = pd.concat([
                    temp_A.loc[a_list[0]],
                    temp_A.loc[a_list[1]],
                    temp_A.loc[a_list[2]]],
                    axis=0, sort=False, ignore_index=True)

            align_A.to_csv(os.path.join(output_dir_A, filename))

            if verbose:
                print("file %s processed & saved." % filename)
                

In [None]:
# run for both kind of audio modalities 
align(True)
#align(False)

In [None]:
def load_label(partition=True, verbose=True):
    
    label = pd.read_csv("/home/ceccarelli/Work/Bipolar/labels_metadata.csv")
    id_list = label['SubjectID'].tolist()

    id_set = set()
    age_list = list()
    for id in id_list:
        id_set.add(id)
        age_list.extend(label[label.SubjectID == id]['Age'].tolist())

    gender_list = list()
    for sub in id_set:
        gender_list.append(sub[:1])
        if verbose:
            print("%s subject have %d instances" % (sub, id_list.count(sub)))

    classes_stats = Counter(label['ManiaLevel'].tolist())

    if verbose:
        print("All subjects", len(id_set))
        print("Male subjects ", gender_list.count('M'))
        print("Female subjects", gender_list.count('F'))
        print("Age range (%d, %d), Age median %d" % (min(age_list), max(age_list), statistics.median(age_list)))
        print("Class distribution stats", classes_stats)

    ymrs_score = pd.concat([label.iloc[:, 0], label.iloc[:, 4]], axis=1)
    mania_level = pd.concat([label.iloc[:, 0], label.iloc[:, 5]], axis=1)
    if partition:
        ymrs_dev = ymrs_score.iloc[:60, :]
        ymrs_train = ymrs_score.iloc[60:, :]
        level_dev = mania_level.iloc[:60, :]
        level_train = mania_level.iloc[60:, :]
        return ymrs_dev.values[:, 1], ymrs_train.values[:, 1], level_dev.values[:, 1], level_train.values[:, 1]
    else:
        return ymrs_score, mania_level, 0, 0

def write_features(no_data=False, eGeMAPS=False, verbose=False):
    """load preprocessed visual and acoustic features
    """

    visual_dir = '/home/ceccarelli/Work/Bipolar/LLDs_video_openface_processed'
    acoustic_dir = '/home/ceccarelli/Work/Bipolar/LLDs_audio_opensmile_aligned/MFCCs' if not eGeMAPS else '/home/ceccarelli/Work/Bipolar/LLDs_audio_opensmile_aligned/eGeMAPS'
    output_dir = '/home/ceccarelli/Work/Bipolar/aligned_AV' if not eGeMAPS else '/home/ceccarelli/Work/Bipolar/aligned_EAV' 

    if no_data:
        print ("This will only load labels later.")
    else:
        length = dict()
        length['train'] = 104
        length['dev'] = 60
        length['test'] = 54

        _, _, level_dev, level_train = load_label()
        label_train, label_dev = level_train, level_dev
        labels = dict()
        labels['train'] = label_train
        labels['dev'] = label_dev

        dimensionality = dict()
        dimensionality['train'] = 0
        dimensionality['dev'] = 0

        for partition in ['train', 'dev']:
            s1 = output_dir+'/'+partition+'_label'
            s2 = output_dir+'/'+partition+'_inst'
            label_f = smart_open(s1, 'w', encoding='utf-8')
            inst_f = smart_open(s2, 'w', encoding='utf-8')
            A_data, V_data = None, None
            label = labels[partition]

            for i in range(length[partition]):
                filename = get_sample(partition, (i+1)) + '.csv'
                A_feature = pd.read_csv(os.path.join(acoustic_dir, filename), low_memory=False)
                V_feature = pd.read_csv(os.path.join(visual_dir, filename), low_memory=False)
                A_t, _ = A_feature.shape
                V_t, _ = V_feature.shape
                print ('A shape is', A_t)
                print ('V shape is', V_t)
                assert A_t == V_t
                timestep = A_t
                dimensionality[partition] += timestep

                if verbose:
                    print("file %s loaded with timestep %d" % (filename, timestep), A_feature.shape, V_feature.shape)

                # concatenate features
                A_data = A_feature.copy() if not i else pd.concat([A_data, A_feature])
                V_data = V_feature.copy() if not i else pd.concat([V_data, V_feature])
                # write labels and instances
                l = str(label[i])+','
                p = str(i+1)+','
                label_f.write(l * timestep)
                inst_f.write(p * timestep)
            s3 = output_dir+'/'+partition+'_data_A'
            s4 = output_dir+'/'+partition+'_data_V'
            A_data.to_csv(s3, header=False, index=False)
            V_data.to_csv(s4, header=False, index=False)
            label_f.close()
            inst_f.close()
            print("partition %s done." % partition)

In [None]:
#run for the different audio modalities
write_features(eGeMAPS=True)
#write_features(eGeMAPS=False)