In [2]:
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.metrics import explained_variance_score
from sklearn import cross_validation
from scipy.stats import spearmanr

In [2]:
# load the feature array
field_names = pd.read_csv('./clean_data/feature_field_list.txt')
feature_arr = pd.read_pickle('./clean_data/feature_array')

feature_x = feature_arr.drop(['attractive','unattractive'], axis=1)
feature_x = feature_x.values

attract_y = feature_arr['attractive']
attract_y = attract_y.values

In [14]:
# Run linear regression multiple times. 
itr_num = 25
random_seed = np.random.randint(1, 1000, size=itr_num)
test_corr_list = np.zeros((itr_num, 1))

for cur_itr in range(itr_num):
    x_train, x_test, y_train, y_test = cross_validation.train_test_split(feature_x, attract_y, test_size=0.5, random_state=random_seed[cur_itr])
    clf = linear_model.RidgeCV(alphas=np.logspace(-3, 1, num=20), fit_intercept=True)
    clf.fit(x_train, y_train)
    y_test_pred = clf.predict(x_test)
    corr = spearmanr(y_test, y_test_pred)
    test_corr_list[cur_itr] = corr[0]

test_corr_list.mean()

0.81771792355737516

In [3]:
# Now use geometric features to predict facial attractiveness
file_name = './Full Attribute Scores/psychology attributes/psychology-attributes.xlsx'
xl_file = pd.ExcelFile(file_name)
dfs = {sheet_name: xl_file.parse(sheet_name) for sheet_name in xl_file.sheet_names}
df = dfs['Final Values']  # only use the group average data

In [4]:
df.head()

Unnamed: 0,Filename,Image #,atypical,boring,calm,catch,catchAns,cold,common,confident,...,normal,sociable,subage.1,submale.1,subrace.1,typical,uncertain,uncommon,unintelligent,untrustworthy
0,Google_1_Danielle Goble_5_oval.jpg,1,3.933333,6.0,6.285714,5.933333,5.933333,4.133333,5.533333,5.666667,...,5.933333,4.666667,4.0,0.0,1.0,5.4,4.266667,4.666667,3.333333,3.666667
1,Google_1_Phillip Owensby_9_oval.jpg,2,2.933333,3.866667,6.466667,5.0,5.0,2.066667,5.933333,6.533333,...,7.333333,6.6,3.5,0.5,1.0,5.733333,2.6,2.6,2.666667,2.6
2,Google_1_Douglas Ziegler_3_oval.jpg,3,2.866667,4.2,6.666667,4.733333,4.733333,3.2,5.733333,6.066667,...,6.2,6.666667,2.75,0.75,1.0,5.933333,3.266667,3.333333,2.733333,2.733333
3,Google_1_Donald Sauls_11_oval.jpg,4,4.785714,4.733333,5.4,4.4,4.4,5.4,5.571429,4.8,...,5.2,3.266667,3.0,1.0,1.0,5.0,4.8,2.933333,4.533333,5.466667
4,Google_1_Eric Harman_5_oval.jpg,5,4.066667,4.933333,5.866667,4.466667,4.466667,4.6,5.533333,6.666667,...,6.2,5.333333,3.0,0.0,1.0,6.266667,4.2,2.6,2.666667,4.733333


In [5]:
delete_list = ['Albert_Belk_5_oval.jpg',
 'Google_1_Amy Shull_7_oval.jpg',
 'Google_1_Dale Villalpando_11_oval.jpg',
 'Google_1_Douglas Macarthur_1_oval.jpg',
 'Google_1_Earl Oh_5_oval.jpg',
 'Google_1_Ernest Crump_3_oval.jpg',
 'Google_1_Genevieve Springs_19_oval.jpg',
 'Google_1_Georgia Dingess_11_oval.jpg',
 'Google_1_Gertrude Bayne_11_oval.jpg',
 'Google_1_Jeffrey Chao_1_oval.jpg',
 'Google_1_Jonathan Wille_10_oval.jpg',
 'Google_1_Lori Weisman_12_oval.jpg',
 'Google_1_Louis Tillis_19_oval.jpg',
 'Google_1_Marjorie Mahon_1_oval.jpg',
 'Google_1_Philip Winfield_9_oval.jpg']

In [6]:
filename = df['Filename']

In [13]:
delete_idx = []
for idx, item in enumerate(filename):
    if item in delete_list:
        delete_idx.append(idx)

In [14]:
delete_idx

[161,
 302,
 315,
 450,
 828,
 900,
 978,
 1395,
 1602,
 1645,
 1725,
 1726,
 2086,
 2092,
 2210]

In [16]:
filename[delete_idx]

161         Google_1_Marjorie Mahon_1_oval.jpg
302              Google_1_Amy Shull_7_oval.jpg
315       Google_1_Georgia Dingess_11_oval.jpg
450      Google_1_Douglas Macarthur_1_oval.jpg
828                Google_1_Earl Oh_5_oval.jpg
900                     Albert_Belk_5_oval.jpg
978          Google_1_Louis Tillis_19_oval.jpg
1395          Google_1_Ernest Crump_3_oval.jpg
1602     Google_1_Dale Villalpando_11_oval.jpg
1645       Google_1_Philip Winfield_9_oval.jpg
1725    Google_1_Genevieve Springs_19_oval.jpg
1726          Google_1_Jeffrey Chao_1_oval.jpg
2086       Google_1_Jonathan Wille_10_oval.jpg
2092       Google_1_Gertrude Bayne_11_oval.jpg
2210         Google_1_Lori Weisman_12_oval.jpg
Name: Filename, dtype: object