# Exploratory Data Analysis for Aesthetics

Import libraries:

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Some Matplotlib configuration:

In [2]:
font = { 'family': 'DejaVu Sans', 'weight': 'bold', 'size': 22 }
plt.rc('font', **font)

Import the config file which contains all the paths:

In [3]:
import sys

In [4]:
sys.path.append('..')

In [5]:
import config

**Aesthetic Features**

In [6]:
config.DEV_FEATURES

'/datasets/devset/dev-set/features'

In [8]:
config.DEV_AESTHETIC_FEATURE

'/datasets/devset/dev-set/features/aesthetic_visual_features/aesthetic_feat_dev-set_mean'

In [9]:
AESTHETIC_FILENAMES = os.listdir(config.DEV_AESTHETIC_FEATURE)

In [10]:
AESTHETIC_FILENAMES[:5]

['video8181.txt',
 'video1217.txt',
 'video385.txt',
 'video9142.txt',
 'video6804.txt']

In [11]:
def read_features(filename):
    with open(filename) as f:
        return { 'AESTHETIC_{}'.format(i + 1) : float(feature) for i, feature in enumerate(f.read().split(',')) }

In [12]:
%%time

features = []
for i, video in enumerate(AESTHETIC_FILENAMES):
    video_features = read_features(os.path.join(config.DEV_AESTHETIC_FEATURE, video))
    video_features['video'] = video.split('.txt')[0] + '.webm'
    features.append(video_features)

CPU times: user 1.75 s, sys: 440 ms, total: 2.19 s
Wall time: 4.24 s


In [13]:
dataframe = pd.DataFrame(features)

In [14]:
dataframe.head()

Unnamed: 0,AESTHETIC_1,AESTHETIC_2,AESTHETIC_3,AESTHETIC_4,AESTHETIC_5,AESTHETIC_6,AESTHETIC_7,AESTHETIC_8,AESTHETIC_9,AESTHETIC_10,...,AESTHETIC_101,AESTHETIC_102,AESTHETIC_103,AESTHETIC_104,AESTHETIC_105,AESTHETIC_106,AESTHETIC_107,AESTHETIC_108,AESTHETIC_109,video
0,0.416655,0.287846,0.787102,0.48152,0.678284,0.378925,42.204762,0.541843,0.013355,7.0,...,0.000234,0.038703,0.402853,0.27661,0.798512,0.0,1.127979,0.52154,0.412572,video8181.webm
1,0.323288,0.39172,0.416775,0.313573,0.353575,0.331604,42.410663,0.581111,0.02773,8.0,...,0.001197,0.07669,0.227644,0.39324,1.220702,0.0,0.218243,0.641691,0.649648,video1217.webm
2,0.2659,0.264796,0.368271,0.180579,0.328278,0.402712,48.976618,0.090741,0.015647,3.533333,...,0.000292,0.245427,0.369687,0.252546,1.907696,0.0,0.385917,0.402759,0.049053,video385.webm
3,0.341944,0.290298,0.55993,0.318769,0.480231,0.209707,37.2082,0.605263,0.021809,5.4,...,-0.001025,0.262531,0.262475,0.240423,1.221575,0.0,-0.995609,0.171773,1.960187,video9142.webm
4,0.265779,0.191051,0.511892,0.145751,0.469708,0.239208,47.662151,0.337732,0.011001,2.666667,...,0.000484,0.633522,0.32369,0.304035,1.265255,0.0,-0.002173,-0.206931,0.273556,video6804.webm


In [15]:
dataframe.corr()

Unnamed: 0,AESTHETIC_1,AESTHETIC_2,AESTHETIC_3,AESTHETIC_4,AESTHETIC_5,AESTHETIC_6,AESTHETIC_7,AESTHETIC_8,AESTHETIC_9,AESTHETIC_10,...,AESTHETIC_100,AESTHETIC_101,AESTHETIC_102,AESTHETIC_103,AESTHETIC_104,AESTHETIC_105,AESTHETIC_106,AESTHETIC_107,AESTHETIC_108,AESTHETIC_109
AESTHETIC_1,1.000000,-0.267587,0.077563,-0.196112,0.143062,-0.043329,0.075495,0.703273,0.373499,0.171035,...,0.004028,-0.016844,0.108299,0.327298,-0.122886,0.030067,,0.013330,-0.000452,-0.000165
AESTHETIC_2,-0.267587,1.000000,-0.244221,0.869801,-0.498959,0.288800,0.110964,-0.160969,-0.095609,-0.156132,...,0.020841,0.001147,-0.057833,-0.346861,0.268018,-0.281546,,-0.000126,0.009034,-0.008719
AESTHETIC_3,0.077563,-0.244221,1.000000,0.104091,0.954090,-0.420338,-0.255604,0.096375,0.096394,0.005596,...,-0.027804,-0.013448,0.218371,0.064755,0.110790,0.256991,,0.009254,0.002664,-0.011139
AESTHETIC_4,-0.196112,0.869801,0.104091,1.000000,-0.144319,0.243644,0.119067,-0.088758,-0.037952,-0.133445,...,0.010545,-0.000046,0.023929,-0.305395,0.313620,-0.047323,,0.009784,0.007626,-0.008277
AESTHETIC_5,0.143062,-0.498959,0.954090,-0.144319,1.000000,-0.429415,-0.254672,0.127137,0.118177,0.057499,...,-0.030844,-0.013543,0.208476,0.157478,0.043007,0.336480,,0.009641,0.000952,-0.007982
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
AESTHETIC_105,0.030067,-0.281546,0.256991,-0.047323,0.336480,-0.267032,-0.092786,0.065239,0.120927,0.134576,...,-0.019961,0.022832,0.023383,0.200845,0.180556,1.000000,,-0.004779,0.003634,-0.002649
AESTHETIC_106,,,,,,,,,,,...,,,,,,,,,,
AESTHETIC_107,0.013330,-0.000126,0.009254,0.009784,0.009641,0.011796,0.003734,0.004734,0.018516,0.006456,...,0.006857,-0.015156,0.014347,0.010834,0.007769,-0.004779,,1.000000,0.002867,0.001400
AESTHETIC_108,-0.000452,0.009034,0.002664,0.007626,0.000952,-0.009605,-0.016046,0.017834,0.011738,0.003247,...,-0.008146,-0.009495,-0.019172,0.003605,0.001684,0.003634,,0.002867,1.000000,-0.000188


**Groundtruth**

In [16]:
dev_ground_truth = pd.read_csv(config.DEV_GROUNDTRUTH) 

In [17]:
# dev_ground_truth.set_index('video', inplace=True)

Merging both datasets:

In [18]:
dataframe = dataframe.merge(dev_ground_truth)

In [19]:
dataframe.head()

Unnamed: 0,AESTHETIC_1,AESTHETIC_2,AESTHETIC_3,AESTHETIC_4,AESTHETIC_5,AESTHETIC_6,AESTHETIC_7,AESTHETIC_8,AESTHETIC_9,AESTHETIC_10,...,AESTHETIC_105,AESTHETIC_106,AESTHETIC_107,AESTHETIC_108,AESTHETIC_109,video,short-term_memorability,nb_short-term_annotations,long-term_memorability,nb_long-term_annotations
0,0.416655,0.287846,0.787102,0.48152,0.678284,0.378925,42.204762,0.541843,0.013355,7.0,...,0.798512,0.0,1.127979,0.52154,0.412572,video8181.webm,0.537,33,0.4,10
1,0.323288,0.39172,0.416775,0.313573,0.353575,0.331604,42.410663,0.581111,0.02773,8.0,...,1.220702,0.0,0.218243,0.641691,0.649648,video1217.webm,0.905,38,0.5,10
2,0.2659,0.264796,0.368271,0.180579,0.328278,0.402712,48.976618,0.090741,0.015647,3.533333,...,1.907696,0.0,0.385917,0.402759,0.049053,video385.webm,0.839,34,0.889,9
3,0.341944,0.290298,0.55993,0.318769,0.480231,0.209707,37.2082,0.605263,0.021809,5.4,...,1.221575,0.0,-0.995609,0.171773,1.960187,video9142.webm,0.789,92,0.862,29
4,0.265779,0.191051,0.511892,0.145751,0.469708,0.239208,47.662151,0.337732,0.011001,2.666667,...,1.265255,0.0,-0.002173,-0.206931,0.273556,video6804.webm,0.801,33,0.5,14


In [20]:
dataframe['AESTHETIC_106'].value_counts()

0.0    8000
Name: AESTHETIC_106, dtype: int64

In [21]:
cols_to_remove = [ 'AESTHETIC_106' ]

In [22]:
AESTHETIC_COLS = [ col for col in dataframe.filter(regex='AESTHETIC').columns if col not in cols_to_remove ]

In [23]:
from scipy.stats import spearmanr

In [24]:
correlations = []
for col in AESTHETIC_COLS:
    short_corr, short_p_value = spearmanr(dataframe[col], dataframe['short-term_memorability'])
    long_corr, long_p_value = spearmanr(dataframe[col], dataframe['long-term_memorability'])
    correlations.append({
        'column': col,
        'short-term correlation coefficient': short_corr,
        'short-term p-value': short_p_value,
        'long-term correlation coefficient': long_corr,
        'long-term p-value': long_p_value,
    })

  c /= stddev[:, None]
  c /= stddev[None, :]
  return (a < x) & (x < b)
  return (a < x) & (x < b)
  cond2 = cond0 & (x <= _a)


In [25]:
pd.DataFrame(correlations).sort_values(by='short-term correlation coefficient', ascending=False).head(10)[
    ['column', 'short-term correlation coefficient', 'short-term p-value']]

Unnamed: 0,column,short-term correlation coefficient,short-term p-value
95,AESTHETIC_96,0.104936,4.949561e-21
84,AESTHETIC_85,0.099663,4.070698e-19
79,AESTHETIC_80,0.099114,6.359945999999999e-19
62,AESTHETIC_63,0.096538,4.994126e-18
58,AESTHETIC_59,0.095923,8.101472e-18
37,AESTHETIC_38,0.094622,2.232873e-17
63,AESTHETIC_64,0.086726,7.819583e-15
103,AESTHETIC_104,0.081775,2.383451e-13
97,AESTHETIC_98,0.077733,3.35304e-12
61,AESTHETIC_62,0.074663,2.288489e-11


In [26]:
pd.DataFrame(correlations).sort_values(by='long-term correlation coefficient', ascending=False).head(10)[
    ['column', 'long-term correlation coefficient', 'long-term p-value']]

Unnamed: 0,column,long-term correlation coefficient,long-term p-value
37,AESTHETIC_38,0.07995,7.991171e-13
95,AESTHETIC_96,0.065648,4.173221e-09
36,AESTHETIC_37,0.060406,6.411144e-08
8,AESTHETIC_9,0.058661,1.517059e-07
35,AESTHETIC_36,0.057183,3.087826e-07
84,AESTHETIC_85,0.052448,2.685105e-06
79,AESTHETIC_80,0.049,1.161954e-05
21,AESTHETIC_22,0.047413,2.210781e-05
11,AESTHETIC_12,0.046128,3.66908e-05
20,AESTHETIC_21,0.04555,4.590492e-05
