In [None]:
import random
import numpy as np
import pandas as pd
import seaborn as sns
from PIL import Image
from tqdm import tqdm
from os.path import join
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
device='cuda:0'

In [None]:
train_df = pd.read_csv('/data/wikiart/wikiart_Painting100k/MultitaskPainting100k_Dataset_groundtruth/groundtruth_multiloss_train_header.csv')
valid_df = pd.read_csv('/data/wikiart/wikiart_Painting100k/MultitaskPainting100k_Dataset_groundtruth/groundtruth_multiloss_test_header.csv')
print(len(train_df), len(valid_df))

train_df['img_path'] = train_df.apply(lambda x: join('/data/wikiart/wikiart_Painting100k/images_256minside',x.filename),1)
valid_df['img_path'] = valid_df.apply(lambda x: join('/data/wikiart/wikiart_Painting100k/images_256minside',x.filename),1)

class_dict_genre = {}
for i, genre in enumerate(np.sort(train_df.genre.unique())):
    train_df.loc[train_df.genre==genre, 'class_genre'] = i
    valid_df.loc[valid_df.genre==genre, 'class_genre'] = i
    class_dict_genre.update({i:genre})
pd.concat([train_df.genre.value_counts().to_frame('train_counts'), valid_df.genre.value_counts().to_frame('valid_counts')],1)

In [None]:
class_dict_genre

#### Clip model

In [None]:
from clip.model import build_model
import clip

In [None]:
clip_model, preprocess = clip.load("ViT-B/32", device)
clip_model.float()

In [None]:
c = 0

In [None]:
row = train_df[train_df.genre=='cloudscape'].iloc[c]; c+=1
print(row.genre)
image = Image.open(row.img_path).convert('RGB')
img_tr = preprocess(image).unsqueeze(0)
out = clip_model.visual(img_tr.to(device))
image

In [None]:
train_df = train_df.reset_index(drop=True)

In [None]:
len(train_df)

In [None]:
features, labels = [], []
for i, row in tqdm(train_df.iterrows()):
    image = Image.open(row.img_path).convert('RGB')
    img_tr = preprocess(image).unsqueeze(0)
    out = clip_model.visual(img_tr.to(device))
    features.append(out.detach().cpu().numpy())
    labels.append(row.class_genre)

In [None]:
feat = np.concatenate(features)
X = np.asarray(feat, dtype='float32')
X_embedded = TSNE(n_components=2, init='random').fit_transform(X)

In [None]:
df = pd.DataFrame(X_embedded)
df['labels'] = np.concatenate(np.array(labels).reshape(-1,1))
# k = random.choices(range(41), k=15)
# df = df[df.labels.isin(k)]
df['labels'] = df.labels.apply(lambda x: class_dict_genre[x])

In [None]:
df.head()

In [None]:
means = df.groupby('labels').mean().reset_index()

In [None]:
stds = df.groupby('labels').std().reset_index()

In [None]:
from scipy.spatial import distance
a = (1, 2, 3)
b = (4, 5, 6)
dst = distance.euclidean(a, b)

In [None]:
dst

In [None]:
tuple(means.loc[means.labels=='abstract'][[0,1]].values[0])

In [None]:
means

In [None]:
means['bird-and-flower painting_dst'] = means.apply(lambda x: distance.euclidean((x[0],x[1]),tuple(means.loc[means.labels=='bird-and-flower painting'][[0,1]].values[0])), 1)

In [None]:
colors = ["#FF5733","#DAF7A6","#C0C0C0","#FFC300","#C70039",
          "#FF00FF","#008000","#3498DB","#2471A3","#7D3C98",
          "#00FFFF","#7D6608","#00FF00","#000000"]
pal = sns.color_palette(colors)
tmp = means.sort_values('bird-and-flower painting_dst').head(14) #[(means[0]<0)&(means[1]<0)]

fig = plt.figure(figsize=(15, 11))
sns.scatterplot(data=tmp, x=tmp[0], y=tmp[1], hue="labels", palette=pal, s=300, alpha=1)

In [None]:
tmp

In [None]:
#df

In [None]:
fig = plt.figure(figsize=(15, 11))
sns.kdeplot(
    data=df[df.labels.isin(['bird-and-flower painting', 'animal painting'])],
    x=df.columns[0],
    y=df.columns[1],
    hue="labels",
    thresh=.1,
    palette='rainbow',
    fill=True,
    alpha=.5
)

In [None]:
tmp = df[df.labels.isin(['bird-and-flower painting', 'animal painting'])]
fig = plt.figure(figsize=(15, 11))
sns.scatterplot(data=tmp, x=tmp[0], y=tmp[1], hue="labels", palette='rainbow', s=300, alpha=1)