In [None]:
from __future__ import print_function
from __future__ import division

import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier, GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, accuracy_score, f1_score, precision_score, recall_score, precision_recall_fscore_support, average_precision_score
from sklearn.metrics import make_scorer, f1_score
from sklearn.feature_selection import SelectKBest, VarianceThreshold
from sklearn.feature_selection import f_regression, f_classif
from sklearn.model_selection import GridSearchCV, fit_grid_point
from sklearn import svm
from scipy.stats import pearsonr
%matplotlib inline
import matplotlib.pyplot as plt

def select_features(dev_features_raw, test_features_raw, annotations, maxk, reg=True, scale=True):
    dev_features = preprocessing.minmax_scale(dev_features_raw, feature_range=(0, 1), axis=0) if scale else dev_features_raw
    test_features = preprocessing.minmax_scale(test_features_raw, feature_range=(0, 1), axis=0) if scale else test_features_raw
    print(dev_features.shape, test_features.shape)
    bestk_selection = SelectKBest(f_regression if reg else f_classif, k=maxk)
    dev_features_selected = bestk_selection.fit_transform(dev_features, annotations)
    test_features_selected = bestk_selection.transform(test_features)
    print(dev_features_selected.shape, test_features_selected.shape)
    return dev_features_selected, test_features_selected

def cat_features(feature1, feature2):
    f1_d, f1_t = feature1
    f2_d, f2_t = feature2
    return (np.concatenate((f1_d, f2_d), 1), np.concatenate((f1_t, f2_t), 1))

In [None]:
dev_features_vgg = np.load('../data/dev_uploaded_data/vgg_features_avg.npy')
dev_features_cls = np.load('../data/dev_uploaded_data/features_avg.npy')
dev_annotations = np.load('../data/dev_uploaded_data/anno_valence_arousal.npy')
dev_annotations_fear = np.load('../data/dev_uploaded_data/anno_fear.npy')
test_features_vgg = np.load('../data/test_uploaded_data/vgg_features_avg.npy')
test_features_cls = np.load('../data/test_uploaded_data/features_avg.npy')
test_annotations = np.load('../data/test_uploaded_data/anno_valence_arousal.npy')
test_annotations_fear = np.load('../data/test_uploaded_data/anno_fear.npy')

dev_features_valence, test_features_valence = cat_features(
     select_features(dev_features_vgg, test_features_vgg, dev_annotations[:, 0], 100),
     select_features(dev_features_cls, test_features_cls, dev_annotations[:, 0], 100))
dev_features_arousal, test_features_arousal = cat_features(
     select_features(dev_features_vgg, test_features_vgg, dev_annotations[:, 1], 100),
     select_features(dev_features_cls, test_features_cls, dev_annotations[:, 1], 100))
dev_features_fear, test_features_fear = cat_features(
     select_features(dev_features_vgg, test_features_vgg, dev_annotations_fear, 100, False),
     select_features(dev_features_cls, test_features_cls, dev_annotations_fear, 100, False))

In [None]:
print(dev_features_valence.shape, test_features_valence.shape)

In [None]:
# valence model using mixed features to test feature behavior
valence_model = GradientBoostingRegressor()
valence_model.fit(dev_features_valence, dev_annotations[:,0])

In [None]:
valence_pred = valence_model.predict(test_features_valence)
print(mean_squared_error(test_annotations[:,0], valence_pred), pearsonr(test_annotations[:,0], valence_pred))

In [None]:
for val in range(3, 11, 1):
    print('val = {}'.format(val))
    valence_model = GradientBoostingRegressor(max_depth=val)
    valence_model.fit(dev_features_valence, dev_annotations[:,0])
    valence_pred = valence_model.predict(test_features_valence)
    print(mean_squared_error(test_annotations[:,0], valence_pred), pearsonr(test_annotations[:,0], valence_pred))