# To plot the year for generated art. 

In [1]:
import pickle
import numpy as np
from scipy.io import loadmat
from sklearn.decomposition import PCA, FastICA
from sklearn.manifold import LocallyLinearEmbedding
import os
import re
import pandas as pd
from scipy.stats import pearsonr
from pathlib import Path
pd.set_option("display.max_rows", None, "display.max_columns", None)

In [2]:
mat_file = loadmat('./datasets/processed/groundtruth_pruned.mat')

In [3]:
years = []
re_string = '[0-9][0-9][0-9][0-9]\.'
for m in mat_file['groundtruth_pruned'][0][0][0]:
    text = re.findall(re_string, m[0][0])
    if text != []:
        year = int(text[0][:-1])
        if year < 1400 or year > 2010:
            years.append(np.nan)
        else:
            years.append(int(year))
    else:
        years.append(np.nan)
#         print("XXXX")

years = np.array(years)
indexes = ~np.isnan(years)

In [4]:
print('Average year for real art: ',years[indexes].mean())

Average year for real art:  1869.7717107241149


In [5]:
df_1k = pd.read_csv('./datasets/processed/combined_wofflins_real.csv')
df_5k = pd.read_csv('./datasets/processed/real_wofflin_scores_combined_normalised.csv')
combined = [df_1k, df_5k]
df = pd.concat(combined)

In [6]:
df['name'] = df['Input.image'].apply(lambda x: Path(x).name)

def get_year(row):
    try:
        y = int(row['name'].split('.')[0].split('-')[-1])
        if int(y) < 1400:
            y = None  
        if int(y) > 2000:
            y = None 
    except ValueError:
        y = None
        
    return y

In [7]:
df['year'] = df.apply(get_year, axis=1)

In [8]:
df[df['year'].isna()]

Unnamed: 0.1,Input.image,Linearly-vs-Painterly,Planar-vs-Recessional,closed-form-vs-open-form,multiplicity-vs-unity,absolute-clarity-vs-relative-clarity,Unnamed: 0,name,year
223,https://crw2020.s3-us-west-2.amazonaws.com/ran...,0.35,0.85,0.65,0.5,0.6,,161_not_detected_251590.jpg!Blog.jpg,
985,https://crw2020.s3-us-west-2.amazonaws.com/ran...,0.5,0.55,0.75,0.5,0.75,985.0,145_not_detected_251621.jpg!Blog.jpg,
1142,https://crw2020.s3-us-west-2.amazonaws.com/ran...,0.8,0.6,1.0,0.4,0.7,1142.0,160_not_detected_251609.jpg!Blog.jpg,


In [9]:
df.drop(index=[223,985,1142], inplace=True)

In [10]:
df['year'] = df['year'].astype(int)

mat_file = loadmat('./datasets/processed/groundtruth_pruned.mat')
files = [Path(f[0][0]).name for f in mat_file['groundtruth_pruned'][0][0][0]]
real_indexes = [files.index(f) for f in df.name.values]

In [11]:
from sklearn.neighbors import NearestNeighbors

In [12]:
model='vit'
real_features = './datasets/features/real_art_features/resnet50_pretrained_real.pkl'
real_features = './datasets/features/real_art_features/vit_real.pkl'
generated_features = ['./datasets/features/generated_art_features/vit_StyleGAN2.pkl',
                      './datasets/features/generated_art_features/vit_StyleCAN2.pkl',
                      './datasets/features/generated_art_features/vit_StyleCWAN1.pkl',
                      './datasets/features/generated_art_features/vit_StyleCWAN2.pkl',
                     ]

In [13]:
years = df['year'].values

In [14]:
with open(real_features, 'rb') as f:
    real_feats = pickle.load(f)

real_feats = real_feats[real_indexes]
neigh = NearestNeighbors(n_neighbors=5,n_jobs=8)
neigh.fit(real_feats)

NearestNeighbors(n_jobs=8)

In [15]:
from scipy import stats
import collections

for i in generated_features:
    print('File: ', i.split('/')[-1])
    with open(i, 'rb') as f:
        generated_feats = pickle.load(f)
    distances,indices = neigh.kneighbors(generated_feats, 5)
    average_year=[]
    for i in range(distances.shape[0]):
        year_1 = years[indices[i]]
        year_painting_avg = np.mean(year_1)
        average_year.append(year_painting_avg)

    np_average_year = np.array(average_year, dtype=np.int32)
    print(min(np_average_year),max(np_average_year))
    counter=collections.Counter(np_average_year)
    my_years=[1400,1450,1500,1550,1600,1650,1700,1750,1800,1850,1900,1950,2000]
    all_sum=0
    for j in range(len(my_years)-1):
        st = my_years[j]
        et = my_years[j+1]
        ck=0
        for i in sorted(counter.items()):
            if st <= i[0] < et:
                ck+=i[1]
            if i[0] >= et:
                break
        all_sum+=ck
        print(st,' - ',et,' : ',ck)
    print('Average Year: ',np.mean(np_average_year))    

File:  vit_StyleGAN2.pkl
1465 1977
1400  -  1450  :  0
1450  -  1500  :  7
1500  -  1550  :  10
1550  -  1600  :  8
1600  -  1650  :  17
1650  -  1700  :  8
1700  -  1750  :  11
1750  -  1800  :  20
1800  -  1850  :  24
1850  -  1900  :  143
1900  -  1950  :  119
1950  -  2000  :  33
Average Year:  1850.4
File:  vit_StyleCAN2.pkl
1439 1976
1400  -  1450  :  1
1450  -  1500  :  9
1500  -  1550  :  7
1550  -  1600  :  10
1600  -  1650  :  9
1650  -  1700  :  6
1700  -  1750  :  10
1750  -  1800  :  15
1800  -  1850  :  21
1850  -  1900  :  107
1900  -  1950  :  163
1950  -  2000  :  42
Average Year:  1863.6575
File:  vit_StyleCWAN1.pkl
1454 1980
1400  -  1450  :  0
1450  -  1500  :  7
1500  -  1550  :  7
1550  -  1600  :  8
1600  -  1650  :  5
1650  -  1700  :  8
1700  -  1750  :  3
1750  -  1800  :  15
1800  -  1850  :  27
1850  -  1900  :  134
1900  -  1950  :  146
1950  -  2000  :  40
Average Year:  1869.9625
File:  vit_StyleCWAN2.pkl
1439 1976
1400  -  1450  :  1
1450  -  1500  :  9


## The values have been plotted [here](https://public.flourish.studio/visualisation/9156959/)