In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import cross_val_predict, cross_val_score
from sklearn.externals import joblib
%matplotlib inline

In [2]:
%env CUDA_VISIBLE_DEVICES=-1

env: CUDA_VISIBLE_DEVICES=-1


In [3]:
import keras

Using TensorFlow backend.


In [4]:
def log_progress(sequence, every=None, size=None, name='Items'):
    from ipywidgets import IntProgress, HTML, VBox
    from IPython.display import display

    is_iterator = False
    if size is None:
        try:
            size = len(sequence)
        except TypeError:
            is_iterator = True
    if size is not None:
        if every is None:
            if size <= 200:
                every = 1
            else:
                every = int(size / 200)     # every 0.5%
    else:
        assert every is not None, 'sequence is iterator, set every'

    if is_iterator:
        progress = IntProgress(min=0, max=1, value=1)
        progress.bar_style = 'info'
    else:
        progress = IntProgress(min=0, max=size, value=0)
    label = HTML()
    box = VBox(children=[label, progress])
    display(box)

    index = 0
    try:
        for index, record in enumerate(sequence, 1):
            if index == 1 or index % every == 0:
                if is_iterator:
                    label.value = '{name}: {index} / ?'.format(
                        name=name,
                        index=index
                    )
                else:
                    progress.value = index
                    label.value = u'{name}: {index} / {size}'.format(
                        name=name,
                        index=index,
                        size=size
                    )
            yield record
    except:
        progress.bar_style = 'danger'
        raise
    else:
        progress.bar_style = 'success'
        progress.value = index
        label.value = "{name}: {index}".format(
            name=name,
            index=str(index or '?')
        )

In [5]:
def norm(arr, mins, maxs):
    arr = arr.astype(float)
    for i in range(arr.shape[-1]):
        arr[:, :, i] -= mins[i]
        arr[:, :, i] /= maxs[i]
    return arr

In [8]:
mins_audio, maxs_audio = np.load('data/audio_mins.npy'), np.load('data/audio_maxs.npy')
mins_kinect, maxs_kinect = np.load('data/kinect_mins.npy'), np.load('data/kinect_maxs.npy')

## Тест

In [3]:
model_tmp = keras.models.load_model('model_audio.h5')
model_audio = keras.models.Model(model_tmp.input, model_tmp.get_layer('dropout_2').output)

In [4]:
model_tmp = keras.models.load_model('model_eyes.h5')
model_eyes = keras.models.Model(model_tmp.input, model_tmp.get_layer('dropout_2').output)

In [5]:
model_tmp = keras.models.load_model('model_face.h5')
model_face = keras.models.Model(model_tmp.input, model_tmp.get_layer('dropout_2').output)

In [6]:
model_tmp = keras.models.load_model('model_kinect.h5')
model_kinect = keras.models.Model(model_tmp.input, model_tmp.get_layer('dropout_2').output)

In [10]:
X_val_audio = norm(np.load('X_val_audio.npy'), mins_audio, maxs_audio)
X_val_eyes = np.load('X_val_eyes.npy')
X_val_face = np.load('X_val_face.npy')
X_val_kinect = norm(np.load('X_val_kinect.npy'), mins_kinect, maxs_kinect)

print X_val_audio.shape, X_val_eyes.shape, X_val_face.shape, X_val_kinect.shape

(2114, 400, 36) (2114, 200, 6) (2114, 200, 100) (2114, 60, 27)


In [13]:
y_val = np.load('y_val.npy')

print y_val.shape

(2114,)


In [11]:
nn_audio_val = model_audio.predict(X_val_audio)
nn_eyes_val = model_eyes.predict(X_val_eyes)
nn_face_val = model_face.predict(X_val_face)
nn_kinect_val = model_kinect.predict(X_val_kinect)

print nn_audio_val.shape, nn_eyes_val.shape, nn_face_val.shape, nn_kinect_val.shape

(2114, 30) (2114, 12) (2114, 40) (2114, 20)


**Weighted Soft Aggregation**

In [19]:
predictor_audio = RandomForestClassifier(n_estimators=100)
predictor_eyes = RandomForestClassifier(n_estimators=100)
predictor_face = RandomForestClassifier(n_estimators=100)
predictor_kinect = RandomForestClassifier(n_estimators=100)

In [20]:
scores = cross_val_score(predictor_audio, nn_audio_val, y_val, cv=4)
print 'audio:', scores, np.mean(scores)
scores = cross_val_score(predictor_eyes, nn_eyes_val, y_val, cv=4)
print 'eyes:', scores, np.mean(scores)
scores = cross_val_score(predictor_face, nn_face_val, y_val, cv=4)
print 'face:', scores, np.mean(scores)
scores = cross_val_score(predictor_kinect, nn_kinect_val, y_val, cv=4)
print 'kinect:', scores, np.mean(scores)

audio: [ 0.44256121  0.40377358  0.38636364  0.42857143] 0.415317463778
eyes: [ 0.39171375  0.29433962  0.26325758  0.26095238] 0.302565831749
face: [ 0.58380414  0.45283019  0.60037879  0.6       ] 0.559253279921
kinect: [ 0.44256121  0.27735849  0.33712121  0.41904762] 0.369022131752


In [27]:
weak_audio_val = cross_val_predict(predictor_audio, nn_audio_val, y_val, cv=4, method='predict_proba')
weak_eyes_val = cross_val_predict(predictor_eyes, nn_eyes_val, y_val, cv=4, method='predict_proba')
weak_face_val = cross_val_predict(predictor_face, nn_face_val, y_val, cv=4, method='predict_proba')
weak_kinect_val = cross_val_predict(predictor_kinect, nn_kinect_val, y_val, cv=4, method='predict_proba')

print weak_audio_val.shape, weak_eyes_val.shape, weak_face_val.shape, weak_kinect_val.shape

(2114, 6) (2114, 6) (2114, 6) (2114, 6)


In [32]:
weights_weak = [0.42, 0.3, 0.56, 0.37]

# preds_weak = 0.42 * weak_audio_val + 0.3 * weak_eyes_val + 0.56 * weak_face_val + 0.37 * weak_kinect_val
preds_weak = weak_audio_val + weak_eyes_val + weak_face_val + weak_kinect_val

In [33]:
y_pred = np.argmax(preds_weak, axis=1)

print y_pred.shape

(2114,)


In [34]:
print np.mean(y_pred != y_val)

0.450331125828


**Fusion-based**

In [35]:
nn_val = np.concatenate([nn_audio_val, nn_eyes_val, nn_face_val, nn_kinect_val], axis=1)

print nn_val.shape

(2114, 102)


In [38]:
predictor = RandomForestClassifier(n_estimators=1000)

scores = cross_val_score(predictor, nn_val, y_val, cv=3, verbose=2)
print scores, np.mean(scores)

[CV]  ................................................................
[CV] ................................................. , total=   6.3s
[CV]  ................................................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    6.3s remaining:    0.0s


[CV] ................................................. , total=   6.1s
[CV]  ................................................................
[CV] ................................................. , total=   6.1s
[ 0.61189802  0.56028369  0.66571835] 0.612633351623


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:   18.4s finished


## Подготовим модель

In [9]:
X_audio = np.concatenate([norm(np.load('data/X_train_audio.npy'), mins_audio, maxs_audio), 
                          norm(np.load('data/X_val_audio.npy'), mins_audio, maxs_audio)])
X_eyes = np.concatenate([np.load('data/X_train_eyes.npy'), np.load('data/X_val_eyes.npy')])
X_face = np.concatenate([np.load('data/X_train_face.npy'), np.load('data/X_val_face.npy')])
X_kinect = np.concatenate([norm(np.load('data/X_train_kinect.npy'), mins_kinect, maxs_kinect), 
                           norm(np.load('data/X_val_kinect.npy'),mins_kinect, maxs_kinect)])

print X_audio.shape, X_eyes.shape, X_face.shape, X_kinect.shape

(10446, 400, 36) (10446, 200, 6) (10446, 200, 100) (10446, 60, 27)


In [11]:
y = np.concatenate([np.load('data/y_train.npy'), np.load('data/y_val.npy')])

print y.shape

(10446,)


In [12]:
model_tmp = keras.models.load_model('models/model_audio_phase2.h5')
model_audio = keras.models.Model(model_tmp.input, model_tmp.get_layer('dropout_2').output)

In [13]:
model_tmp = keras.models.load_model('models/model_eyes_phase2.h5')
model_eyes = keras.models.Model(model_tmp.input, model_tmp.get_layer('dropout_2').output)

In [14]:
model_tmp = keras.models.load_model('models/model_face_phase2.h5')
model_face = keras.models.Model(model_tmp.input, model_tmp.get_layer('dropout_2').output)

In [15]:
model_tmp = keras.models.load_model('models/model_kinect_phase2.h5')
model_kinect = keras.models.Model(model_tmp.input, model_tmp.get_layer('dropout_2').output)

In [16]:
del model_tmp

In [17]:
nn_audio = model_audio.predict(X_audio)
nn_eyes = model_eyes.predict(X_eyes)
nn_face = model_face.predict(X_face)
nn_kinect = model_kinect.predict(X_kinect)

print nn_audio.shape, nn_eyes.shape, nn_face.shape, nn_kinect.shape

(10446, 30) (10446, 12) (10446, 40) (10446, 20)


In [18]:
del model_audio, model_eyes, model_face, model_kinect

In [19]:
nn = np.concatenate([nn_audio, nn_eyes, nn_face, nn_kinect], axis=1)

print nn.shape

(10446, 102)


In [20]:
predictor = RandomForestClassifier(n_estimators=1000, n_jobs=4).fit(nn, y)

In [21]:
joblib.dump(predictor, 'models/predictor_phase2.pkl')

['models/predictor_phase2.pkl']