In [1]:
import os
import numpy as np
import pandas as pd
import json
import h5py
import argparse

import torch
import torch.nn as nn
from collections import Counter, defaultdict

### generate hdf5 file for ArtPedia

In [3]:
# original json file of artpedia dataset 
file_path = "../Dataset/artpedia/artpedia.json"
with open(file_path, 'r') as f:
    artpedia = json.load(f)
    
roi_path = "../Dataset/artpedia/artpedia_features/"

In [4]:
idx_my2ap = np.load('../Dataset/artpedia/idx_my2ap.npy', allow_pickle=True).item()

In [42]:
my_idx = sorted([int(name[:-4]) for name in os.listdir(roi_path)])

In [6]:
def get_roi_caption_pair(idx):
    roi = np.load(roi_path + str(my_idx[idx])+'.npz')['x']
    fst_cap = artpedia[str(idx_my2ap[my_idx[idx]])]['visual_sentences'][0]
    return roi, fst_cap

In [23]:
ap_roi_cap = h5py.File("../Dataset/artpedia/roi_feats.hdf5", "w")

for i in range(len(my_idx)):
    if (i%200==0):
        print(i)
    roi, cap = get_roi_caption_pair(i)
    ap_roi_cap.create_dataset(f"{my_idx[i]}_cap", data=cap)
    ap_roi_cap.create_dataset(f"{my_idx[i]}_features", data=roi)

ap_roi_cap.close()

0
200
400
600
800
1000
1200
1400
1600
1800
2000
2200
2400
2600
2800


In [46]:
ap_roi_cap.close()

### img2captions for semart

In [3]:
sa_test = pd.read_csv("../Dataset/SemArt/prediction_csvs/semart_test_prediction.csv")
sa_test = sa_test[sa_test['predictioin']==0][['img_name', "caption"]]
np.unique(sa_test['img_name'].to_numpy())[:5]

array(["['00097-still_fr.jpg']", "['00123-marketse.jpg']",
       "['00195-5verospi.jpg']", "['00255-v_josefa.jpg']",
       "['00334-portra1.jpg']"], dtype=object)

In [41]:
img_names = sa_val['img_name'].to_numpy()
len(img_names)

1301

In [42]:
## img_names = sa_test['img_name'].to_numpy()
img_cap_map = defaultdict(list)
for it, img_name in enumerate(img_names):
    img_name = img_name[2:-6]
    img_cap_map[img_name].append(it)

In [43]:
with open('../Dataset/SemArt/val_img_caps_map.json', 'w') as fp:
    json.dump(img_cap_map, fp)

In [44]:
data = json.load(open("../Dataset/SemArt/val_img_caps_map.json"))

### generate hdf5 file for SemArt

In [32]:
sa_train = pd.read_csv("../Dataset/SemArt/prediction_csvs/semart_val_prediction.csv")
sa_train = sa_train[sa_train['predictioin']==0][['img_name', 'caption']]
sa_train.head(2)

Unnamed: 0,img_name,caption
16,['10998-307david.jpg'],Madame S�riziat is shown in an interior settin...
17,['10998-307david.jpg'],Her cheeks are ruddy and she carries a recentl...


In [33]:
test_img_names = np.unique(sa_train['img_name'].to_numpy())

In [34]:
len(test_img_names)

581

In [95]:
sa_roi_cap = h5py.File("../Dataset/SemArt/sa_test_roi.hdf5", "w")
for it, img_name in enumerate(test_img_names):
    if it % 200 == 0:
        print(it)
    img_name = img_name[2:-6]
    roi_path = "../Dataset/SemArt/roi/" + img_name + ".npz"
    roi = np.load(roi_path)['x']
    sa_roi_cap.create_dataset(img_name, data=roi)
sa_roi_cap.close()

0
200
400
600


In [112]:
list(sa_roi_cap.keys())[:10]

['00124-marketst',
 '00221-oratory',
 '00361-flight_e',
 '00418-4saints2',
 '00515-mother',
 '00606-4esther',
 '00640-port_lad',
 '00849-35_eucha',
 '00965-05humili',
 '00973-0fiesol1']

### generate hdf5 file for SemArt (grid feature)

In [2]:
class DataProcessor(nn.Module):
    def __init__(self):
        super(DataProcessor, self).__init__()
        self.pool = nn.AdaptiveAvgPool2d((7, 7))

    def forward(self, x):
        x = self.pool(x)
        x = torch.squeeze(x)    # [1, d, h, w] => [d, h, w] 
        x = x.permute(1, 2, 0)  # [d, h, w] => [h, w, d]
        return x.view(-1, x.size(-1))   # [h*w, d]

In [48]:
sa_train = pd.read_csv("../Dataset/SemArt/prediction_csvs/semart_train_prediction.csv", delimiter="\t")
sa_train = sa_train[sa_train['prediction']==0][['img_name', 'caption']]
sa_train.head()

Unnamed: 0,img_name,caption
1,['19873-1darmst.jpg'],Standing in a scalloped niche with projecting ...
2,['19873-1darmst.jpg'],"The hooped crown, an allusion to the German im..."
7,['19873-1darmst.jpg'],"Before them kneels Anna, the only surviving ch..."
9,['19873-1darmst.jpg'],"In front of Jakob, in a Raphaelesque triangula..."
10,['19873-1darmst.jpg'],"The baby, with curly blonde hair and pudgy che..."


In [49]:
train_img_names = np.unique(sa_train['img_name'].to_numpy())

In [50]:
train_img_names.shape

(10860,)

In [51]:
sa_grid_cap.close()
sa_grid_cap = h5py.File("../Dataset/sa_train_grid.hdf5", "w")

processor = DataProcessor()  ';kkk'

for it, img_name in enumerate(train_img_names):
    if it % 500 == 0:
        print(it)
    img_name = img_name[2:-6]
    grid_path = "../Dataset/SemArt/semart_grid_feats/" + img_name + ".npy"
    grid_feat = np.load(grid_path)
    grid_feat = processor(torch.tensor(grid_feat))
    sa_grid_cap.create_dataset(img_name, data=grid_feat)
sa_grid_cap.close()

0
500
1000
1500
2000
2500
3000
3500
4000
4500
5000
5500
6000
6500
7000
7500
8000
8500
9000
9500
10000
10500


### generate hdf5 file for ArtPedia  (grid features)

In [2]:
class DataProcessor(nn.Module):
    def __init__(self):
        super(DataProcessor, self).__init__()
        self.pool = nn.AdaptiveAvgPool2d((7, 7))

    def forward(self, x):
        x = self.pool(x)
        x = torch.squeeze(x)    # [1, d, h, w] => [d, h, w] 
        x = x.permute(1, 2, 0)  # [d, h, w] => [h, w, d]
        return x.view(-1, x.size(-1))   # [h*w, d]

In [3]:
# original json file of artpedia dataset 
file_path = "../Dataset/artpedia/artpedia.json"
with open(file_path, 'r') as f:
    artpedia = json.load(f)
    
grid_path = "../../hy-nas/Dataset/artpedia/artpedia_grid_feats/"

In [4]:
idx_my2ap = np.load('../Dataset/artpedia/idx_my2ap.npy', allow_pickle=True).item()

In [7]:
my_idx = sorted([int(name[:-4]) for name in os.listdir(grid_path)])

train_myidx = np.load('../Dataset/artpedia/train_myidx.npy')
val_myidx = np.load('../Dataset/artpedia/val_myidx.npy')
test_myidx = np.load('../Dataset/artpedia/test_myidx.npy')

In [9]:
def get_grid_caption_pair(idx):
    grid_feat = np.load(grid_path + str(my_idx[idx])+'.npy')
    fst_cap = artpedia[str(idx_my2ap[my_idx[idx]])]['visual_sentences']
    return grid_feat, fst_cap

In [10]:
print(get_grid_caption_pair(0)[1])
print("---")
print([n.encode("ascii", "ignore") for n in get_grid_caption_pair(0)[1]])

['It is also stylistically earlier to that work, being painted without pseudo-perspective, and having the angels around the Virgin simply placed one above the other, rather than being spatially arranged.', 'The throne is similar to the Maestà painted by Cimabue in the Basilica of San Francesco di Assisi (1288–1292).']
---
[b'It is also stylistically earlier to that work, being painted without pseudo-perspective, and having the angels around the Virgin simply placed one above the other, rather than being spatially arranged.', b'The throne is similar to the Maest painted by Cimabue in the Basilica of San Francesco di Assisi (12881292).']


In [11]:
processor = DataProcessor()  

feat = torch.tensor(get_grid_caption_pair(0)[0])
print(feat.shape)

pool = nn.AdaptiveAvgPool2d((7, 7))

processor(feat).shape

torch.Size([1, 2048, 19, 19])


torch.Size([49, 2048])

In [12]:
len(my_idx)

2804

In [20]:
# train_ap_grid_cap.close()
# val_ap_grid_cap.close()
# test_ap_grid_cap.close()

train_ap_grid_cap = h5py.File("..//Dataset/artpedia/ap_train_grid.hdf5", "w")
val_ap_grid_cap = h5py.File("../Dataset/artpedia/ap_val_grid.hdf5", "w")
test_ap_grid_cap = h5py.File("../Dataset/artpedia/ap_test_grid.hdf5", "w")

processor = DataProcessor()  

for i in range(len(my_idx)):
    if (i%300==0):
        print(i)
    grid_feat, cap = get_grid_caption_pair(i)
    grid_feat = processor(torch.tensor(grid_feat))
    
    myidx = my_idx[i]
    
    cap = [n.encode("ascii", "ignore") for n in cap]
    
    if myidx in train_myidx:
#         print(cap, len(cap))
        train_ap_grid_cap.create_dataset(f"{my_idx[i]}_cap", (len(cap),),  dtype=h5py.special_dtype(vlen=str), data=cap)
        train_ap_grid_cap.create_dataset(f"{my_idx[i]}_grids", data=grid_feat)
    elif myidx in val_myidx:
#         print(cap, len(cap))
        val_ap_grid_cap.create_dataset(f"{my_idx[i]}_cap", (len(cap),),  dtype=h5py.special_dtype(vlen=str), data=cap)
        val_ap_grid_cap.create_dataset(f"{my_idx[i]}_grids", data=grid_feat)
    elif myidx in test_myidx:
#         print(cap, len(cap))
        test_ap_grid_cap.create_dataset(f"{my_idx[i]}_cap", (len(cap),),  dtype=h5py.special_dtype(vlen=str), data=cap)
        test_ap_grid_cap.create_dataset(f"{my_idx[i]}_grids", data=grid_feat)


    

train_ap_grid_cap.close()
val_ap_grid_cap.close()
test_ap_grid_cap.close()

0
300
600
900
1200
1500
1800
2100
2400
2700


In [23]:
train_ap_grid_cap_r = h5py.File("../Dataset/artpedia/ap_train_grid.hdf5", "r")
val_ap_grid_cap_r = h5py.File("../Dataset/artpedia/ap_val_grid.hdf5", "r")
test_ap_grid_cap_r = h5py.File("../Dataset/artpedia/ap_test_grid.hdf5", "r")

print(list(train_ap_grid_cap_r.keys())[:5])
print(len(list(train_ap_grid_cap_r.keys())))

print(type(train_ap_grid_cap_r['0_cap'].value))
print(train_ap_grid_cap_r['0_cap'].value)

# train_ap_grid_cap_r.close()
# val_ap_grid_cap_r.close()
# test_ap_grid_cap_r.close()

['0_cap', '0_grids', '1000_cap', '1000_grids', '1001_cap']
4316
<class 'numpy.ndarray'>
['It is also stylistically earlier to that work, being painted without pseudo-perspective, and having the angels around the Virgin simply placed one above the other, rather than being spatially arranged.'
 'The throne is similar to the Maest painted by Cimabue in the Basilica of San Francesco di Assisi (12881292).']


In [24]:
train_ap_grid_cap_r['0_grids'].value.shape

(49, 2048)

In [25]:
print(list(val_ap_grid_cap_r.keys())[:5])
print(len(list(val_ap_grid_cap_r.keys())))

['1002_cap', '1002_grids', '1016_cap', '1016_grids', '1026_cap']
634


In [26]:
print(list(test_ap_grid_cap_r.keys())[:5])
print(len(list(test_ap_grid_cap_r.keys())))

['1004_cap', '1004_grids', '1006_cap', '1006_grids', '1023_cap']
658


In [27]:
train_ap_grid_cap_r.close()
val_ap_grid_cap_r.close()
test_ap_grid_cap_r.close()