In [None]:
from __future__ import print_function
from __future__ import division

import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, accuracy_score, f1_score, precision_score, recall_score, precision_recall_fscore_support, average_precision_score
from sklearn.metrics import make_scorer, f1_score
from sklearn.feature_selection import SelectKBest, VarianceThreshold
from sklearn.feature_selection import f_regression, f_classif
from sklearn.model_selection import GridSearchCV, fit_grid_point
from sklearn import svm
from scipy.stats import pearsonr
%matplotlib inline
import matplotlib.pyplot as plt

In [None]:
def select_features(dev_features_raw, test_features_raw, annotations, maxk, reg=True, scale=True):
    dev_features = preprocessing.minmax_scale(dev_features_raw, feature_range=(0, 1), axis=0) if scale else dev_features_raw
    test_features = preprocessing.minmax_scale(test_features_raw, feature_range=(0, 1), axis=0) if scale else test_features_raw
    print(dev_features.shape, test_features.shape)
    bestk_selection = SelectKBest(f_regression if reg else f_classif, k=maxk)
    dev_features_selected = bestk_selection.fit_transform(dev_features, annotations)
    test_features_selected = bestk_selection.transform(test_features)
    print(dev_features_selected.shape, test_features_selected.shape)
    return dev_features_selected, test_features_selected

In [None]:
dev_features = np.load('../data/dev_uploaded_data/vgg_features_avg.npy')
# dev_features = np.load('../data/dev_uploaded_data/features_avg.npy')
dev_annotations = np.load('../data/dev_uploaded_data/anno_valence_arousal.npy')
dev_annotations_fear = np.load('../data/dev_uploaded_data/anno_fear.npy')
test_features = np.load('../data/test_uploaded_data/vgg_features_avg.npy')
# test_features = np.load('../data/test_uploaded_data/features_avg.npy')
test_annotations = np.load('../data/test_uploaded_data/anno_valence_arousal.npy')
test_annotations_fear = np.load('../data/test_uploaded_data/anno_fear.npy')
dev_features_valence, test_features_valence = select_features(dev_features, test_features, 
                                                              dev_annotations[:, 0], 100)
dev_features_arousal, test_features_arousal = select_features(dev_features, test_features, 
                                                              dev_annotations[:, 1], 100)
dev_features_fear, test_features_fear = select_features(dev_features, test_features, dev_annotations_fear, 100)

In [None]:
valence_model = RandomForestRegressor(min_samples_leaf=100, n_estimators=50, criterion='mse')
valence_model.fit(dev_features_valence, dev_annotations[:,0])

In [None]:
valence_pred = valence_model.predict(test_features_valence)
print(mean_squared_error(test_annotations[:,0], valence_pred), pearsonr(test_annotations[:,0], valence_pred))

In [None]:
def evaluate_param(parameter, num_range, index):
    grid_search = GridSearchCV(valence_model, param_grid = {parameter: num_range}, scoring='r2')
    grid_search.fit(dev_features_valence, dev_annotations[:,0])
    df = {}
    for i, score in enumerate(grid_search.grid_scores_):
        df[score[0][parameter]] = score[1]
    df = pd.DataFrame.from_dict(df, orient='index')
    df.reset_index(level=0, inplace=True)
    df = df.sort_values(by='index')
    plt.subplot(3,2,index)
    plot = plt.plot(df['index'], df[0])
    plt.title(parameter)
    return plot, df

In [None]:
# parameter tuning
param_grid = {"max_depth": np.arange(1, 10, 1)}
index = 1
plt.figure(figsize=(16,12))
for parameter, param_range in dict.items(param_grid):   
    evaluate_param(parameter, param_range, index)
    index += 1

In [None]:
arousal_model = RandomForestRegressor(min_samples_leaf=25, min_samples_split=50, criterion='mse')
arousal_model.fit(dev_features_arousal, dev_annotations[:,1])

In [None]:
arousal_pred = arousal_model.predict(test_features_arousal)
print(mean_squared_error(test_annotations[:,1], arousal_pred), pearsonr(test_annotations[:,1], arousal_pred))

In [None]:
# classification method for fear model
fear_model = RandomForestClassifier(class_weight={0:250, 1:1})
fear_model.fit(dev_features_fear, dev_annotations_fear)

In [None]:
fear_pred = fear_model.predict(test_features_fear)
print(accuracy_score(test_annotations_fear, fear_pred), 
          precision_score(test_annotations_fear, fear_pred), 
          recall_score(test_annotations_fear, fear_pred),
     f1_score(test_annotations_fear, fear_pred))

In [None]:
# regression method for fear model
fear_model = RandomForestRegressor(n_estimators=10, criterion='mse', min_samples_leaf=20)
fear_model.fit(dev_features_fear, dev_annotations_fear)

In [None]:
# parameter tuning
for val in range(1, 50, 1):
    fear_model = RandomForestRegressor(n_estimators=10, criterion='mse', min_samples_leaf=val)
    fear_model.fit(dev_features_fear, dev_annotations_fear)
    best_f1 = 0
    best_threshold = 0
    for i in range(1, 45, 1):
        fear_pred = fear_model.predict(test_features_fear)
        fear_pred = fear_pred > i / 100.
        f1 = f1_score(test_annotations_fear, fear_pred)
        if(f1 > best_f1):
            best_f1 = f1
            best_threshold = i / 100.
    print("val={}, best f1={}, best threshold={}".format(val, best_f1, best_threshold))

In [None]:
# parameter tuning
for i in range(1, 45, 1):
    fear_pred = fear_model.predict(test_features_fear)
    fear_pred = fear_pred > i / 100.
    print('threshold = {}'.format({i / 100.}))
    print(accuracy_score(test_annotations_fear, fear_pred), 
          precision_score(test_annotations_fear, fear_pred), 
          recall_score(test_annotations_fear, fear_pred),
         f1_score(test_annotations_fear, fear_pred))