In [18]:
# -*- coding: utf-8 -*-
import sys
import random
import numpy as np
from tqdm import tqdm
import pickle

sys.path.append('../')
from pytorch.common.datasets_parsers.av_parser import AVDBParser

from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.decomposition import PCA

from pytorch.common.datasets_parsers.av_parser import AVDBParser
from voice_feature_extraction import OpenSMILE
from accuracy import Accuracy, Accuracy_regression




In [16]:
def get_data(dataset_root, file_list, max_num_clips=0):
    dataset_parser = AVDBParser(dataset_root, file_list,
                                max_num_clips=max_num_clips)
    data = dataset_parser.get_data()
    print('clips count:', len(data))
    print('frames count:', dataset_parser.get_dataset_size())
    return data

def calc_features(data, opensmile_root_dir, opensmile_config_path):
    vfe = OpenSMILE(opensmile_root_dir, opensmile_config_path)

    progresser = tqdm(iterable=range(0, len(data)),
                      desc='calc audio features',
                      total=len(data),
                      unit='files')

    feat, targets = [], []
    for i in progresser:
        print(len(data))
        clip = data[i]
        print(clip.wav_rel_path)

        try:
            voice_feat = vfe.process(clip.wav_rel_path)
        except:
            data.remove(clip)

        feat.append(voice_feat)
        targets.append(clip.labels)

    print('feat count:', len(feat))
    return np.asarray(feat, dtype=np.float32), np.asarray(targets, dtype=np.float32)

def classification(X_train, X_test, y_train, y_test, accuracy_fn, pca_dim=100):
    if pca_dim > 0:
        pca_model = PCA(n_components=min(pca_dim, X_train.shape[1])).fit(X_train)
        X_train = pca_model.transform(X_train)
        X_test = pca_model.transform(X_test)

    # shuffle
    combined = list(zip(X_train, y_train))
    random.shuffle(combined)
    X_train[:], y_train[:] = zip(*combined)

    # TODO: используйте классификаторы из sklearn

    y_pred = []
    accuracy_fn.by_clips(y_pred)




In [20]:
experiment_name = 'exp_1'
max_num_clips = 0 # загружайте только часть данных для отладки кода
use_dump = False # используйте dump для быстрой загрузки рассчитанных фич из файла

# dataset dir
base_dir = 'D:/AVER'
if 1:
    train_dataset_root = 'C:/Users/ipmstud/Desktop/STCML/Ryerson/Video'
    train_file_list = 'C:/Users/ipmstud/Desktop/STCML/Ryerson/train_data_with_landmarks.txt'
    test_dataset_root = 'C:/Users/ipmstud/Desktop/STCML/Ryerson/Video'
    test_file_list = 'C:/Users/ipmstud/Desktop/STCML/Ryerson/test_data_with_landmarks.txt'
elif 1:
    train_dataset_root = base_dir + '/OMGEmotionChallenge-master/omg_TrainVideos/preproc/frames'
    train_file_list = base_dir + '/OMGEmotionChallenge-master/omg_TrainVideos/preproc/train_data_with_landmarks.txt'
    test_dataset_root =base_dir + '/OMGEmotionChallenge-master/omg_ValidVideos/preproc/frames'
    test_file_list = base_dir + '/OMGEmotionChallenge-master/omg_ValidVideos/preproc/valid_data_with_landmarks.txt'



In [21]:
# opensmile configuration
opensmile_root_dir = 'C:/Users/ipmstud/Desktop/crt/opensmile-2.3.0'
# TODO: поэкспериментируйте с различными конфигурационными файлами библиотеки OpenSmile
opensmile_config_path = 'C:/Users/ipmstud/Desktop/crt/opensmile-2.3.0/config/avec2013.conf'

# load dataset
train_data = get_data(train_dataset_root, train_file_list, max_num_clips=max_num_clips)
test_data = get_data(test_dataset_root, test_file_list, max_num_clips=max_num_clips)







AVDB meta parsing:   0%|                                                                 | 0/33087 [00:00<?, ?images/s]



AVDB meta parsing:   3%|█▌                                                   | 947/33087 [00:00<00:03, 9432.52images/s]



AVDB meta parsing:   6%|███▏                                               | 2032/33087 [00:00<00:03, 10131.75images/s]



AVDB meta parsing:   9%|████▊                                              | 3108/33087 [00:00<00:02, 10290.48images/s]



AVDB meta parsing:  13%|██████▌                                            | 4297/33087 [00:00<00:02, 10323.99images/s]



AVDB meta parsing:  16%|████████▏                                          | 5298/33087 [00:00<00:02, 10234.81images/s]



AVDB meta parsing:  19%|█████████▉                                         | 6447/33087 [00:00<00:02, 10184.27images/s]



AVDB meta parsing:  23%|███████████▌                                       | 7463/33087 [00:00<00:02, 10168.07images/s]



AVDB meta pa

clips count: 1200
frames count: 33087






AVDB meta parsing:   0%|                                                                  | 0/6839 [00:00<?, ?images/s]



AVDB meta parsing:  17%|████████▊                                           | 1156/6839 [00:00<00:00, 10001.54images/s]



AVDB meta parsing:  33%|█████████████████                                   | 2236/6839 [00:00<00:00, 10364.97images/s]



AVDB meta parsing:  41%|█████████████████████▊                               | 2810/6839 [00:00<00:00, 4712.67images/s]



AVDB meta parsing:  56%|█████████████████████████████▉                       | 3864/6839 [00:00<00:00, 5557.83images/s]



AVDB meta parsing:  72%|██████████████████████████████████████▎              | 4944/6839 [00:00<00:00, 6176.33images/s]



AVDB meta parsing:  87%|██████████████████████████████████████████████       | 5940/6839 [00:00<00:00, 6594.62images/s]



AVDB meta parsing: 100%|█████████████████████████████████████████████████████| 6839/6839 [00:00<00:00, 7025.23images/s]

clips count: 240
frames count: 6839


In [22]:
len(train_data)

1200

In [23]:
len(test_data)

240

In [24]:
# get features
train_feat, train_targets = calc_features(train_data, opensmile_root_dir, opensmile_config_path)
test_feat, test_targets = calc_features(test_data, opensmile_root_dir, opensmile_config_path)


openSMILE version  b'2.3.0rc1 (Rev. 1593:1650M)\r'






calc audio features:   0%|                                                                 | 0/1200 [00:00<?, ?files/s]

1200
None


TypeError: expected str, bytes or os.PathLike object, not NoneType

In [None]:

accuracy_fn = Accuracy(test_data, experiment_name=experiment_name)

with open(experiment_name + '.pickle', 'wb') as f:
    pickle.dump([train_feat, train_targets, test_feat, test_targets, accuracy_fn], f, protocol=2)
else:
with open(experiment_name + '.pickle', 'rb') as f:
    train_feat, train_targets, test_feat, test_targets, accuracy_fn = pickle.load(f)

# run classifiers
classification(train_feat, test_feat, train_targets, test_targets, accuracy_fn=accuracy_fn, pca_dim=0)