In [1]:
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.metrics import explained_variance_score
from sklearn.decomposition import PCA
from sklearn import cross_validation
from scipy.stats import spearmanr
from sklearn import preprocessing
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(context="paper", font="monospace")

from bokeh.plotting import figure, show, output_file



In [10]:
def load_new_geo():
    geometric_all = pd.read_csv('../clean_data/geometric_wSmoothness.csv')
    x = geometric_all.drop(['imgName', 'attractive'], axis=1)
    field_names = list(x.columns.values)
    feature_x = x.values
    feature_x = preprocessing.scale(feature_x)
    attract_y = geometric_all['attractive'].values
    return feature_x, attract_y, field_names


def ridge_regression(X, y):
    itr_num = 100
    test_corr_list = np.zeros((itr_num, 1))
    coef_list = np.zeros((X.shape[1], itr_num))

    alphas = np.logspace(-4, 3, num=40)

    for cur_itr in range(itr_num):
        x_train, x_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.5)
        clf = linear_model.RidgeCV(alphas=alphas, fit_intercept=True)
        clf.fit(x_train, y_train)

        y_test_pred = clf.predict(x_test)

        corr = spearmanr(y_test, y_test_pred)
        test_corr_list[cur_itr] = corr[0]

        coef_list[:, cur_itr] = clf.coef_

    return test_corr_list.mean(), coef_list, alphas

In [18]:
feature_x, attract_y, field_names = load_new_geo()
test_corr_mean, coef_list, alphas = ridge_regression(feature_x, attract_y)
print test_corr_mean

0.527458369445


In [3]:
# First apply PCA on geometric features only, then concatenate hsv and smoothness features into it. 
geometric_all = pd.read_csv('../clean_data/geometric_wSmoothness.csv')
only_geo_feature = geometric_all.drop(['imgName', 'attractive', 'h', 's', 'v', 'smoothness'], axis=1)

In [4]:
hsv_smooth_feature = geometric_all[['h', 's', 'v', 'smoothness']]

In [19]:
pca = PCA()
pca_feat = pca.fit_transform(only_geo_feature.values)
pca_feat.shape

(2207, 32)

In [20]:
new_feature = np.hstack((pca_feat, hsv_smooth_feature.values))

In [8]:
new_feature.shape

(2207, 9)

In [21]:
attract_y = geometric_all['attractiv']
test_corr_mean, coef_list, alphas = ridge_regression(new_feature, attract_y)
print test_corr_mean

0.525635838153


In [None]:
# Plot the heat map.
relevant_feature = geometric_all.drop(['imgName'], axis=1)
corrmat = relevant_feature.corr() 
f, ax = plt.subplots(figsize=(16, 16))

# Draw the heatmap using seaborn
sns.heatmap(corrmat, square=True, vmin=-1, vmax=1)
locs, labels = plt.xticks(rotation=9)
plt.setp(labels, rotation=90)
plt.tick_params(labelsize=10)
plt.savefig('../figs/geometric_feature_correlation_heatmap_all.png')
plt.show()