# Exploratory Data Analysis for Aesthetics

Import libraries:

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Some Matplotlib configuration:

In [2]:
font = { 'family': 'DejaVu Sans', 'weight': 'bold', 'size': 22 }
plt.rc('font', **font)

Import the config file which contains all the paths:

In [3]:
import sys

In [4]:
sys.path.append('..')

In [5]:
import config

**Aesthetic Features**

In [6]:
config.DEV_FEATURES

'/datasets/dev-set/features'

In [7]:
config.DEV_FEATURES_LIST

['C3D',
 'HMP',
 'InceptionV3',
 'LBP',
 'aesthetic_feat_dev-set_mean',
 'ColorHistogram',
 'HOG',
 'ORB']

In [8]:
config.DEV_AESTHETIC_FEATURE

'/datasets/dev-set/features/aesthetic_feat_dev-set_mean'

In [13]:
AESTHETIC_FILENAMES = os.listdir(config.DEV_AESTHETIC_FEATURE)

In [14]:
AESTHETIC_FILENAMES[:5]

['video3094.txt',
 'video4963.txt',
 'video3388.txt',
 'video6587.txt',
 'video4709.txt']

In [17]:
def read_features(filename):
    with open(filename) as f:
        return { 'AESTHETIC_{}'.format(i + 1) : float(feature) for i, feature in enumerate(f.read().split(',')) }

In [18]:
%%time

features = []
for i, video in enumerate(AESTHETIC_FILENAMES):
    video_features = read_features(os.path.join(config.DEV_AESTHETIC_FEATURE, video))
    video_features['video'] = video.split('.txt')[0] + '.webm'
    features.append(video_features)

CPU times: user 5.25 s, sys: 6.11 s, total: 11.4 s
Wall time: 1min 19s


In [19]:
dataframe = pd.DataFrame(features)

In [20]:
dataframe.head()

Unnamed: 0,AESTHETIC_1,AESTHETIC_10,AESTHETIC_100,AESTHETIC_101,AESTHETIC_102,AESTHETIC_103,AESTHETIC_104,AESTHETIC_105,AESTHETIC_106,AESTHETIC_107,...,AESTHETIC_91,AESTHETIC_92,AESTHETIC_93,AESTHETIC_94,AESTHETIC_95,AESTHETIC_96,AESTHETIC_97,AESTHETIC_98,AESTHETIC_99,video
0,0.09449,2.0,-0.001027,-0.000899,0.531603,0.116133,0.484598,1.035787,0.0,-0.074866,...,0.454548,0.56099,0.499152,0.530111,0.611689,0.133933,0.065942,0.048722,0.003436,video3094.webm
1,0.580488,1.6,-0.001494,0.002818,0.321116,0.02714,0.207377,1.111711,0.0,0.306076,...,0.537296,0.393132,0.455645,0.630204,0.677637,0.137946,0.080201,0.045218,-0.004557,video4963.webm
2,0.200979,3.066667,-0.002438,-3e-06,0.522213,0.204919,0.306396,0.395069,0.0,0.210751,...,0.467651,0.327667,0.495558,0.662631,0.700623,0.15536,0.046387,0.007309,-0.000943,video3388.webm
3,0.28072,3.2,0.001626,-0.00203,0.204535,0.443462,0.29684,0.910423,0.0,8.029627,...,0.48101,0.508088,0.553311,0.505576,0.410188,0.134964,0.090767,0.027107,-0.006516,video6587.webm
4,0.453907,2.0,-0.004014,0.000108,0.178191,0.425658,0.242266,0.901258,0.0,0.188016,...,0.563867,0.267973,0.320418,0.765719,0.094167,0.118029,0.104865,0.005237,0.00286,video4709.webm


In [23]:
dataframe.corr()

Unnamed: 0,AESTHETIC_1,AESTHETIC_10,AESTHETIC_100,AESTHETIC_101,AESTHETIC_102,AESTHETIC_103,AESTHETIC_104,AESTHETIC_105,AESTHETIC_106,AESTHETIC_107,...,AESTHETIC_90,AESTHETIC_91,AESTHETIC_92,AESTHETIC_93,AESTHETIC_94,AESTHETIC_95,AESTHETIC_96,AESTHETIC_97,AESTHETIC_98,AESTHETIC_99
AESTHETIC_1,1.000000,0.167039,0.004676,-0.017936,0.110677,0.324737,-0.122366,0.032481,,0.012735,...,-0.051047,0.060345,0.054721,-0.000771,-0.014502,0.021597,-0.033616,0.014038,-0.048739,0.052176
AESTHETIC_10,0.167039,1.000000,0.001874,-0.010853,-0.151292,0.271513,0.043663,0.140077,,0.005983,...,-0.003836,-0.005874,0.004128,0.011559,0.005380,-0.009335,0.024588,-0.036346,0.022183,0.009375
AESTHETIC_100,0.004676,0.001874,1.000000,0.014938,0.045668,0.000634,-0.016784,-0.015817,,0.007402,...,0.277381,-0.309160,0.022901,0.189042,-0.206249,0.047199,-0.022402,-0.035888,0.021492,-0.287381
AESTHETIC_101,-0.017936,-0.010853,0.014938,1.000000,0.002076,0.009186,-0.013131,0.020072,,-0.015850,...,0.083952,-0.015474,-0.165848,0.036226,0.011861,-0.066201,0.009067,0.008928,-0.024959,-0.083979
AESTHETIC_102,0.110677,-0.151292,0.045668,0.002076,1.000000,0.125619,0.129982,0.016301,,0.016524,...,0.053640,-0.046565,0.012314,-0.007331,-0.004727,0.005517,-0.062425,-0.148264,-0.245888,-0.031174
AESTHETIC_103,0.324737,0.271513,0.000634,0.009186,0.125619,1.000000,0.094078,0.208488,,0.010030,...,0.003737,-0.002231,0.014931,-0.000940,-0.000442,0.009704,0.010700,-0.040090,-0.056668,0.023284
AESTHETIC_104,-0.122366,0.043663,-0.016784,-0.013131,0.129982,0.094078,1.000000,0.178999,,0.007297,...,-0.028071,0.040454,0.019736,-0.002977,0.007427,0.009267,0.116503,-0.118371,-0.103646,0.017133
AESTHETIC_105,0.032481,0.140077,-0.015817,0.020072,0.016301,0.208488,0.178999,1.000000,,-0.005323,...,0.015306,-0.019935,-0.009503,0.003540,-0.003237,-0.019431,-0.006578,0.033994,0.011952,-0.019962
AESTHETIC_106,,,,,,,,,,,...,,,,,,,,,,
AESTHETIC_107,0.012735,0.005983,0.007402,-0.015850,0.016524,0.010030,0.007297,-0.005323,,1.000000,...,-0.015138,0.002107,0.024046,-0.006844,-0.010808,0.011209,0.013151,-0.023967,-0.019254,0.030405


**Groundtruth**

In [24]:
dev_ground_truth = pd.read_csv(config.DEV_GROUNDTRUTH) 

In [25]:
# dev_ground_truth.set_index('video', inplace=True)

Merging both datasets:

In [26]:
dataframe = dataframe.merge(dev_ground_truth)

In [27]:
dataframe.head()

Unnamed: 0,AESTHETIC_1,AESTHETIC_10,AESTHETIC_100,AESTHETIC_101,AESTHETIC_102,AESTHETIC_103,AESTHETIC_104,AESTHETIC_105,AESTHETIC_106,AESTHETIC_107,...,AESTHETIC_95,AESTHETIC_96,AESTHETIC_97,AESTHETIC_98,AESTHETIC_99,video,short-term_memorability,nb_short-term_annotations,long-term_memorability,nb_long-term_annotations
0,0.09449,2.0,-0.001027,-0.000899,0.531603,0.116133,0.484598,1.035787,0.0,-0.074866,...,0.611689,0.133933,0.065942,0.048722,0.003436,video3094.webm,0.955,37,0.917,12
1,0.580488,1.6,-0.001494,0.002818,0.321116,0.02714,0.207377,1.111711,0.0,0.306076,...,0.677637,0.137946,0.080201,0.045218,-0.004557,video4963.webm,0.746,33,0.7,10
2,0.200979,3.066667,-0.002438,-3e-06,0.522213,0.204919,0.306396,0.395069,0.0,0.210751,...,0.700623,0.15536,0.046387,0.007309,-0.000943,video3388.webm,0.82,37,0.818,11
3,0.28072,3.2,0.001626,-0.00203,0.204535,0.443462,0.29684,0.910423,0.0,8.029627,...,0.410188,0.134964,0.090767,0.027107,-0.006516,video6587.webm,0.858,32,0.75,12
4,0.453907,2.0,-0.004014,0.000108,0.178191,0.425658,0.242266,0.901258,0.0,0.188016,...,0.094167,0.118029,0.104865,0.005237,0.00286,video4709.webm,0.637,34,0.6,10


In [64]:
dataframe['AESTHETIC_106'].value_counts()

0.0    6941
Name: AESTHETIC_106, dtype: int64

In [65]:
cols_to_remove = [ 'AESTHETIC_106' ]

In [72]:
AESTHETIC_COLS = [ col for col in dataframe.filter(regex='AESTHETIC').columns if col not in cols_to_remove ]

In [73]:
from scipy.stats import spearmanr

In [74]:
correlations = []
for col in AESTHETIC_COLS:
    short_corr, short_p_value = spearmanr(dataframe[col], dataframe['short-term_memorability'])
    long_corr, long_p_value = spearmanr(dataframe[col], dataframe['long-term_memorability'])
    correlations.append({
        'column': col,
        'short-term correlation coefficient': short_corr,
        'short-term p-value': short_p_value,
        'long-term correlation coefficient': long_corr,
        'long-term p-value': long_p_value,
    })

In [102]:
pd.DataFrame(correlations).sort_values(by='short-term correlation coefficient', ascending=False).head(10)[
    ['column', 'short-term correlation coefficient', 'short-term p-value']]

Unnamed: 0,column,short-term correlation coefficient,short-term p-value
104,AESTHETIC_96,0.106888,4.2955659999999994e-19
63,AESTHETIC_59,0.100822,3.7740580000000004e-17
68,AESTHETIC_63,0.098565,1.868246e-16
92,AESTHETIC_85,0.098493,1.963697e-16
87,AESTHETIC_80,0.098038,2.699947e-16
40,AESTHETIC_38,0.095912,1.168329e-15
69,AESTHETIC_64,0.08964,7.327364e-14
6,AESTHETIC_104,0.083037,4.244444e-12
83,AESTHETIC_77,0.078097,7.258112e-11
106,AESTHETIC_98,0.077303,1.127416e-10


In [103]:
pd.DataFrame(correlations).sort_values(by='long-term correlation coefficient', ascending=False).head(10)[
    ['column', 'long-term correlation coefficient', 'long-term p-value']]

Unnamed: 0,column,long-term correlation coefficient,long-term p-value
40,AESTHETIC_38,0.077339,1.105113e-10
104,AESTHETIC_96,0.065193,5.442952e-08
39,AESTHETIC_37,0.061617,2.785237e-07
38,AESTHETIC_36,0.056659,2.32017e-06
97,AESTHETIC_9,0.056194,2.805541e-06
92,AESTHETIC_85,0.0494,3.831957e-05
87,AESTHETIC_80,0.046543,0.0001048868
63,AESTHETIC_59,0.045817,0.0001342915
67,AESTHETIC_62,0.044389,0.0002161961
68,AESTHETIC_63,0.04399,0.0002463283
