In [1]:
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.metrics import explained_variance_score
from sklearn.decomposition import PCA
from sklearn import cross_validation
from scipy.stats import spearmanr
from sklearn import preprocessing
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(context="paper", font="monospace")

from bokeh.plotting import figure, show, output_file

In [11]:
def ridge_regression(X, y):
    itr_num = 50
    test_corr_list = np.zeros((itr_num, 1))
    coef_list = np.zeros((X.shape[1], itr_num))

    alphas = np.logspace(-3, 2, num=20)

    for cur_itr in range(itr_num):
        x_train, x_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.5)
        clf = linear_model.RidgeCV(alphas=alphas, fit_intercept=True)
        clf.fit(x_train, y_train)

        y_test_pred = clf.predict(x_test)

        corr = spearmanr(y_test, y_test_pred)
        test_corr_list[cur_itr] = corr[0]

        coef_list[:, cur_itr] = clf.coef_

    return test_corr_list.mean(), coef_list

In [7]:
def load_all_feat():
    geometric_all = pd.read_csv('../clean_data/geometric_wSmoothness.csv')
    x_1 = geometric_all.drop(['imgName', 'attractive'], axis=1)

    social_all = pd.read_csv('../clean_data/reordered_social_feature.csv')
    x_2 = social_all.drop(['attractive', 'unattractive'], axis=1)

    
    feature_x = np.hstack((x_1.values, x_2.values))
    feature_x = preprocessing.scale(feature_x)
    
    field_names = list(x_1.columns.values) + list(x_2.columns.values)
    attract_y = geometric_all['attractive'].values
    return feature_x, attract_y, field_names


In [8]:
feature_x, attract_y, field_names = load_all_feat()

In [12]:
test_mean, coef = ridge_regression(feature_x, attract_y)

In [13]:
print test_mean

0.845448398276
