In [1]:
from PIL import Image
import requests
import matplotlib.pyplot as plt
import numpy as np
import scipy
import pandas as pd
import os
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from tqdm import tqdm
from transformers import CLIPProcessor, CLIPModel
from transformers import logging
logging.set_verbosity_error()



In [2]:
# model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
# processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
# model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
# processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

In [3]:
from transformers import CLIPConfig, CLIPTextConfig, CLIPVisionConfig

config_text = CLIPTextConfig(max_position_embeddings=512)
config_vision = CLIPVisionConfig()

config = CLIPConfig.from_text_vision_configs(config_text, config_vision)
config.name_or_path = "openai/clip-vit-base-patch32"

model = CLIPModel(config)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

In [4]:
print(model.num_parameters()) # 427616513 params for large-patch14
                              # 151277313 params for base-patch32
                              # 149620737 params for base-patch16

151500033


In [5]:
# # boilerplate code from huggingface docs
# url = "http://images.cocodataset.org/val2017/000000039769.jpg"
# image = Image.open(requests.get(url, stream=True).raw)
# plt.imshow(image)
# inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)

# outputs = model(**inputs)
# logits_per_image = outputs.logits_per_image # this is the image-text similarity score
# probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
# print(outputs.text_embeds.shape)

In [6]:
# Loading data
caption_path = "./summaries"
cartoon_path = "./cartoons"
ids = set()
cap_paths = []
img_paths = []
for file in os.listdir(caption_path):
    file_path = os.path.join(caption_path, file)
    if not os.path.isfile(file_path) or file[-3:]!='csv' or file[:3] in ids:
        continue
    ids.add(file[:3])
    cap_paths.append(file_path)
for file in os.listdir(cartoon_path):
    file_path = os.path.join(cartoon_path, file)
    if not os.path.isfile(file_path) or file[-3:]!='jpg':
        continue
    if file[:3] not in ids: 
        print("extra_img: ", file)
        continue
    img_paths.append(file_path)
id_list = list(ids)
id_list.sort()
cap_paths.sort()
img_paths.sort()
assert(len(img_paths) == len(cap_paths))

extra_img:  890.jpg


In [7]:
# pca = PCA(n_components=50)
# condensed_data = pca.fit_transform(img1_cap)
# print(condensed_data.shape, np.max(condensed_data), np.min(condensed_data))
# tsne = TSNE()
# tsne_embeds = tsne.fit_transform(condensed_data)
# norm_tsne_embeds = np.zeros(tsne_embeds.shape)
# mins = np.min(tsne_embeds, axis=0, keepdims=True)
# maxes = np.max(tsne_embeds, axis=0, keepdims=True)
# norm_tsne_embeds = (tsne_embeds-mins)/(maxes-mins)
# print(norm_tsne_embeds.shape, np.max(norm_tsne_embeds), np.min(norm_tsne_embeds))
# print(norm_tsne_embeds[:5])

# TSNE Processing Functions

In [8]:
def get_embeds(model, image, caption_list):
    inputs = processor(text=caption_list, images=image, return_tensors="pt", padding=True)
    outputs = model(**inputs)
    text_embeds = outputs.text_embeds.detach().numpy()
    img_embeds = outputs.image_embeds.detach().numpy()
    return text_embeds, img_embeds

In [9]:
pca = PCA(n_components=50)
tsne = TSNE()
def get_tsne_embeds(img_df):
    caps = img_df["cap_feat"]
    cap_data = np.vstack(caps)
    condensed_data = pca.fit_transform(cap_data)
    tsne_embeds = tsne.fit_transform(condensed_data)
    norm_tsne_embeds = np.zeros(tsne_embeds.shape)
    mins = np.min(tsne_embeds, axis=0, keepdims=True)
    maxes = np.max(tsne_embeds, axis=0, keepdims=True)
    norm_tsne_embeds = (tsne_embeds-mins)/(maxes-mins)
    norm_tsne_embeds_df = pd.DataFrame({'X': norm_tsne_embeds[:,0], 'Y': norm_tsne_embeds[:,1], "caption": caps})
    return norm_tsne_embeds_df

In [10]:
def filter_long_captions(cap_df, filter_len=50):
    cap_df['sentence_len'] = cap_df['caption'].apply(lambda x: len(x.split()))
    return cap_df[cap_df['sentence_len'] <= filter_len].reset_index()

In [11]:
batch_size = 32
def get_embeds_from_path(img_id, img_path, cap_path, batch_size=32, filter_len=100):
    df_list = []
    img = Image.open(img_path)
    inputs = processor(text=['test'], images=img, return_tensors="pt", padding=True)
    img_feat = model(**inputs).image_embeds.detach().numpy()
    cap_csv = pd.read_csv(cap_path)
    cap_csv = filter_long_captions(cap_csv, filter_len)
    cap_csv["img_id"] = img_id
    cap_csv["cap_feat"] = None
    cap_csv["img_feat"] = [img_feat]*len(cap_csv.index)
    caption_list = []
    for idx,row in cap_csv.iterrows():
        caption_list.append(row['caption'])
        if (idx == (len(cap_csv.index)-1)):
            text_embeds, _ = get_embeds(model, img, caption_list)
            for i,embed in enumerate(text_embeds):
                cap_csv.at[idx-len(text_embeds)+1+i,"cap_feat"] = text_embeds[i]
            caption_list = []
        if (len(caption_list) != batch_size):
            continue
        text_embeds, _ = get_embeds(model, img, caption_list)
        for i,embed in enumerate(text_embeds):
            cap_csv.at[idx-batch_size+1+i,"cap_feat"] = text_embeds[i]
        caption_list = []
    df_list.append(cap_csv)
    return pd.concat(df_list)

In [12]:
# relevant_columns = ["caption", "img_id", "mean","votes"]
# def process_dataset_tsne(id_list, img_paths, cap_paths):
#     tsne_list = []
#     cap_list = []
#     for img_id, img_path, cap_path in zip(id_list, img_paths, cap_paths):
#         # process for TSNE data

#         img_df = get_embeds_from_path(img_id, img_path, cap_path)
#         tsne_feats = get_tsne_embeds(img_df)
#         tsne_feats["img_id"] = img_id
#         tsne_list.append(tsne_feats)

#         # save complete dataset
#         cap_csv = pd.read_csv(cap_path)
#         cap_csv["img_id"] = img_id
#         # print(list(cap_csv.columns))
#         cap_list.append(cap_csv[relevant_columns])
#         # break

#     return pd.concat(tsne_list), pd.concat(cap_list)

# Create complete TSNE DataFrame

In [13]:
relevant_columns = ['rank', 'caption', 'mean', 'precision', 'votes', 'not_funny', 'somewhat_funny', 'funny', 'img_id', 'cap_feat', 'img_feat']
def process_dataset(id_list, img_paths, cap_paths):
    ################################################################
    # Processes all data to get the relevant columns and tsne data #
    ################################################################
    cap_folder = 'caption_data'  # Folder to save cap_df files
    tsne_folder = 'tsne_data'  # Folder to save tsne_df files
    
    # Ensure the folders exist
    os.makedirs(cap_folder, exist_ok=True)
    os.makedirs(tsne_folder, exist_ok=True)
    
    cap_failed_ids = []
    tsne_failed_ids = []
    
    for img_id, img_path, cap_path in tqdm(zip(id_list, img_paths, cap_paths), total=len(id_list)):
        # Define file paths
        cap_file_path = os.path.join(cap_folder, f'cap_df_{img_id}.pkl')
        tsne_file_path = os.path.join(tsne_folder, f'tsne_df_{img_id}.pkl')
        
        if (not os.path.exists(cap_file_path)) and (not os.path.exists(tsne_file_path)):
            print("Running for image id:", img_id)
            try:
                cap_df = get_embeds_from_path(img_id, img_path, cap_path)
                cap_df.to_pickle(cap_file_path)
                try:
                    tsne_df = get_tsne_embeds(cap_df)
                    tsne_df["img_id"] = img_id
                    tsne_df.to_pickle(tsne_file_path)
                    print(f'Saved: {cap_file_path} and {tsne_file_path}')
                except:
                    tsne_failed_ids.append(img_id)
                
            except:
                cap_failed_ids.append(img_id)
    print('cap_failed_ids:', cap_failed_ids)
    print('tsne_failed_ids:', tsne_failed_ids)
    print('Equal:', tsne_failed_ids == cap_failed_ids)
    return cap_failed_ids, tsne_failed_ids

In [14]:
process_dataset(id_list, img_paths, cap_paths)

  0%|          | 0/379 [00:00<?, ?it/s]

Running for image id: 661


 40%|███▉      | 151/379 [00:00<00:00, 784.86it/s]

Running for image id: 662


 40%|████      | 152/379 [01:16<02:42,  1.40it/s] 

Saved: caption_data\cap_df_662.pkl and tsne_data\tsne_df_662.pkl
Running for image id: 663


 40%|████      | 153/379 [02:19<05:47,  1.54s/it]

Saved: caption_data\cap_df_663.pkl and tsne_data\tsne_df_663.pkl
Running for image id: 664


 41%|████      | 154/379 [03:26<10:19,  2.75s/it]

Saved: caption_data\cap_df_664.pkl and tsne_data\tsne_df_664.pkl
Running for image id: 665


 41%|████      | 155/379 [04:45<17:36,  4.72s/it]

Saved: caption_data\cap_df_665.pkl and tsne_data\tsne_df_665.pkl
Running for image id: 666


 41%|████      | 156/379 [06:08<27:53,  7.50s/it]

Saved: caption_data\cap_df_666.pkl and tsne_data\tsne_df_666.pkl
Running for image id: 667


 41%|████▏     | 157/379 [07:27<40:33, 10.96s/it]

Saved: caption_data\cap_df_667.pkl and tsne_data\tsne_df_667.pkl
Running for image id: 668


 42%|████▏     | 159/379 [08:21<46:10, 12.59s/it]

Saved: caption_data\cap_df_668.pkl and tsne_data\tsne_df_668.pkl
Running for image id: 669
Running for image id: 670
Running for image id: 671
Running for image id: 672


 43%|████▎     | 162/379 [09:25<53:45, 14.86s/it]

Saved: caption_data\cap_df_672.pkl and tsne_data\tsne_df_672.pkl
Running for image id: 673


 43%|████▎     | 163/379 [10:02<1:02:41, 17.41s/it]

Saved: caption_data\cap_df_673.pkl and tsne_data\tsne_df_673.pkl
Running for image id: 674


 44%|████▎     | 165/379 [11:03<1:09:45, 19.56s/it]

Saved: caption_data\cap_df_674.pkl and tsne_data\tsne_df_674.pkl
Running for image id: 675
Running for image id: 676


 44%|████▍     | 166/379 [11:59<1:33:59, 26.48s/it]

Saved: caption_data\cap_df_676.pkl and tsne_data\tsne_df_676.pkl
Running for image id: 677


 44%|████▍     | 167/379 [12:43<1:46:29, 30.14s/it]

Saved: caption_data\cap_df_677.pkl and tsne_data\tsne_df_677.pkl
Running for image id: 678


 44%|████▍     | 168/379 [13:47<2:14:12, 38.16s/it]

Saved: caption_data\cap_df_678.pkl and tsne_data\tsne_df_678.pkl
Running for image id: 679


 45%|████▍     | 169/379 [14:43<2:29:01, 42.58s/it]

Saved: caption_data\cap_df_679.pkl and tsne_data\tsne_df_679.pkl
Running for image id: 680


 45%|████▍     | 170/379 [15:53<2:54:11, 50.01s/it]

Saved: caption_data\cap_df_680.pkl and tsne_data\tsne_df_680.pkl
Running for image id: 681


 45%|████▌     | 171/379 [17:16<3:23:52, 58.81s/it]

Saved: caption_data\cap_df_681.pkl and tsne_data\tsne_df_681.pkl
Running for image id: 682


 45%|████▌     | 172/379 [18:25<3:32:44, 61.67s/it]

Saved: caption_data\cap_df_682.pkl and tsne_data\tsne_df_682.pkl
Running for image id: 683


 46%|████▌     | 173/379 [19:13<3:18:24, 57.79s/it]

Saved: caption_data\cap_df_683.pkl and tsne_data\tsne_df_683.pkl
Running for image id: 684


 46%|████▌     | 174/379 [20:32<3:38:48, 64.04s/it]

Saved: caption_data\cap_df_684.pkl and tsne_data\tsne_df_684.pkl
Running for image id: 685


 46%|████▌     | 175/379 [21:29<3:30:47, 62.00s/it]

Saved: caption_data\cap_df_685.pkl and tsne_data\tsne_df_685.pkl
Running for image id: 686


 46%|████▋     | 176/379 [22:39<3:37:28, 64.28s/it]

Saved: caption_data\cap_df_686.pkl and tsne_data\tsne_df_686.pkl
Running for image id: 687


 47%|████▋     | 177/379 [23:42<3:35:08, 63.90s/it]

Saved: caption_data\cap_df_687.pkl and tsne_data\tsne_df_687.pkl
Running for image id: 688


 47%|████▋     | 178/379 [24:35<3:23:43, 60.81s/it]

Saved: caption_data\cap_df_688.pkl and tsne_data\tsne_df_688.pkl
Running for image id: 689


 47%|████▋     | 179/379 [25:28<3:14:45, 58.43s/it]

Saved: caption_data\cap_df_689.pkl and tsne_data\tsne_df_689.pkl
Running for image id: 690


 48%|████▊     | 181/379 [26:34<2:20:30, 42.58s/it]

Saved: caption_data\cap_df_690.pkl and tsne_data\tsne_df_690.pkl
Running for image id: 691
Running for image id: 692
Running for image id: 693
Running for image id: 694
Running for image id: 695
Running for image id: 696
Running for image id: 697


 50%|████▉     | 188/379 [27:34<51:56, 16.32s/it]  

Saved: caption_data\cap_df_697.pkl and tsne_data\tsne_df_697.pkl
Running for image id: 698
Running for image id: 699


 50%|████▉     | 189/379 [28:58<1:28:54, 28.08s/it]

Saved: caption_data\cap_df_699.pkl and tsne_data\tsne_df_699.pkl
Running for image id: 700


 50%|█████     | 190/379 [30:38<2:13:12, 42.29s/it]

Saved: caption_data\cap_df_700.pkl and tsne_data\tsne_df_700.pkl
Running for image id: 701


 51%|█████     | 192/379 [32:01<2:01:28, 38.98s/it]

Saved: caption_data\cap_df_701.pkl and tsne_data\tsne_df_701.pkl
Running for image id: 702
Running for image id: 703


 51%|█████     | 194/379 [33:10<1:45:08, 34.10s/it]

Saved: caption_data\cap_df_703.pkl and tsne_data\tsne_df_703.pkl
Running for image id: 704
Running for image id: 705


 51%|█████▏    | 195/379 [34:20<2:15:38, 44.23s/it]

Saved: caption_data\cap_df_705.pkl and tsne_data\tsne_df_705.pkl
Running for image id: 706


 52%|█████▏    | 197/379 [35:22<1:46:15, 35.03s/it]

Saved: caption_data\cap_df_706.pkl and tsne_data\tsne_df_706.pkl
Running for image id: 707
Running for image id: 708


 53%|█████▎    | 199/379 [37:29<2:11:01, 43.67s/it]

Saved: caption_data\cap_df_708.pkl and tsne_data\tsne_df_708.pkl
Running for image id: 709
Running for image id: 710


 53%|█████▎    | 201/379 [39:21<2:13:23, 44.97s/it]

Saved: caption_data\cap_df_710.pkl and tsne_data\tsne_df_710.pkl
Running for image id: 711
Running for image id: 712


 54%|█████▎    | 203/379 [39:22<1:11:33, 24.40s/it]

Running for image id: 713
Running for image id: 714


 54%|█████▍    | 205/379 [40:39<1:20:28, 27.75s/it]

Saved: caption_data\cap_df_714.pkl and tsne_data\tsne_df_714.pkl
Running for image id: 715
Running for image id: 716
Running for image id: 717


 55%|█████▍    | 207/379 [42:47<2:03:34, 43.11s/it]

Saved: caption_data\cap_df_717.pkl and tsne_data\tsne_df_717.pkl
Running for image id: 718


 55%|█████▍    | 208/379 [44:52<2:57:36, 62.32s/it]

Saved: caption_data\cap_df_718.pkl and tsne_data\tsne_df_718.pkl
Running for image id: 719


 55%|█████▌    | 209/379 [47:10<3:50:01, 81.18s/it]

Saved: caption_data\cap_df_719.pkl and tsne_data\tsne_df_719.pkl
Running for image id: 720


 56%|█████▌    | 211/379 [49:09<3:05:35, 66.29s/it]

Saved: caption_data\cap_df_720.pkl and tsne_data\tsne_df_720.pkl
Running for image id: 721
Running for image id: 722


 56%|█████▌    | 212/379 [51:15<3:51:30, 83.18s/it]

Saved: caption_data\cap_df_722.pkl and tsne_data\tsne_df_722.pkl
Running for image id: 723


 56%|█████▌    | 213/379 [52:47<3:56:55, 85.64s/it]

Saved: caption_data\cap_df_723.pkl and tsne_data\tsne_df_723.pkl
Running for image id: 724


 56%|█████▋    | 214/379 [54:15<3:57:20, 86.31s/it]

Saved: caption_data\cap_df_724.pkl and tsne_data\tsne_df_724.pkl
Running for image id: 725


 57%|█████▋    | 215/379 [55:59<4:09:51, 91.41s/it]

Saved: caption_data\cap_df_725.pkl and tsne_data\tsne_df_725.pkl
Running for image id: 726


 57%|█████▋    | 216/379 [57:39<4:15:37, 94.09s/it]

Saved: caption_data\cap_df_726.pkl and tsne_data\tsne_df_726.pkl
Running for image id: 727


 57%|█████▋    | 217/379 [59:51<4:44:20, 105.31s/it]

Saved: caption_data\cap_df_727.pkl and tsne_data\tsne_df_727.pkl
Running for image id: 728


 58%|█████▊    | 218/379 [1:01:10<4:21:38, 97.51s/it]

Saved: caption_data\cap_df_728.pkl and tsne_data\tsne_df_728.pkl
Running for image id: 729


 58%|█████▊    | 219/379 [1:02:25<4:01:52, 90.70s/it]

Saved: caption_data\cap_df_729.pkl and tsne_data\tsne_df_729.pkl
Running for image id: 730


 58%|█████▊    | 221/379 [1:04:25<3:03:40, 69.75s/it]

Saved: caption_data\cap_df_730.pkl and tsne_data\tsne_df_730.pkl
Running for image id: 731
Running for image id: 732


 59%|█████▊    | 222/379 [1:05:51<3:15:22, 74.67s/it]

Saved: caption_data\cap_df_732.pkl and tsne_data\tsne_df_732.pkl
Running for image id: 733


 59%|█████▉    | 223/379 [1:08:08<4:02:36, 93.31s/it]

Saved: caption_data\cap_df_733.pkl and tsne_data\tsne_df_733.pkl
Running for image id: 734


 59%|█████▉    | 224/379 [1:09:52<4:08:55, 96.36s/it]

Saved: caption_data\cap_df_734.pkl and tsne_data\tsne_df_734.pkl
Running for image id: 735


 59%|█████▉    | 225/379 [1:10:55<3:41:57, 86.48s/it]

Saved: caption_data\cap_df_735.pkl and tsne_data\tsne_df_735.pkl
Running for image id: 736


 60%|█████▉    | 226/379 [1:12:27<3:44:42, 88.12s/it]

Saved: caption_data\cap_df_736.pkl and tsne_data\tsne_df_736.pkl
Running for image id: 737


 60%|█████▉    | 227/379 [1:13:58<3:45:47, 89.13s/it]

Saved: caption_data\cap_df_737.pkl and tsne_data\tsne_df_737.pkl
Running for image id: 738


 60%|██████    | 228/379 [1:15:51<4:01:57, 96.14s/it]

Saved: caption_data\cap_df_738.pkl and tsne_data\tsne_df_738.pkl
Running for image id: 739


 60%|██████    | 229/379 [1:17:31<4:03:27, 97.38s/it]

Saved: caption_data\cap_df_739.pkl and tsne_data\tsne_df_739.pkl
Running for image id: 740


 61%|██████    | 230/379 [1:20:05<4:44:01, 114.37s/it]

Saved: caption_data\cap_df_740.pkl and tsne_data\tsne_df_740.pkl
Running for image id: 741


 61%|██████    | 231/379 [1:21:22<4:14:34, 103.20s/it]

Saved: caption_data\cap_df_741.pkl and tsne_data\tsne_df_741.pkl
Running for image id: 742


 61%|██████    | 232/379 [1:22:42<3:55:28, 96.11s/it] 

Saved: caption_data\cap_df_742.pkl and tsne_data\tsne_df_742.pkl
Running for image id: 743


 61%|██████▏   | 233/379 [1:24:02<3:42:23, 91.40s/it]

Saved: caption_data\cap_df_743.pkl and tsne_data\tsne_df_743.pkl
Running for image id: 744


 62%|██████▏   | 234/379 [1:26:35<4:25:01, 109.67s/it]

Saved: caption_data\cap_df_744.pkl and tsne_data\tsne_df_744.pkl
Running for image id: 745


 62%|██████▏   | 235/379 [1:28:47<4:39:22, 116.41s/it]

Saved: caption_data\cap_df_745.pkl and tsne_data\tsne_df_745.pkl
Running for image id: 746


 62%|██████▏   | 236/379 [1:30:53<4:44:37, 119.42s/it]

Saved: caption_data\cap_df_746.pkl and tsne_data\tsne_df_746.pkl
Running for image id: 747


 63%|██████▎   | 237/379 [1:33:00<4:48:10, 121.77s/it]

Saved: caption_data\cap_df_747.pkl and tsne_data\tsne_df_747.pkl
Running for image id: 748


 63%|██████▎   | 238/379 [1:33:01<3:20:26, 85.30s/it] 

Running for image id: 749


 63%|██████▎   | 239/379 [1:34:19<3:14:08, 83.20s/it]

Saved: caption_data\cap_df_749.pkl and tsne_data\tsne_df_749.pkl
Running for image id: 750


 63%|██████▎   | 240/379 [1:35:58<3:23:27, 87.82s/it]

Saved: caption_data\cap_df_750.pkl and tsne_data\tsne_df_750.pkl
Running for image id: 751


 64%|██████▍   | 242/379 [1:37:34<2:24:34, 63.32s/it]

Saved: caption_data\cap_df_751.pkl and tsne_data\tsne_df_751.pkl
Running for image id: 752
Running for image id: 753


 64%|██████▍   | 243/379 [1:40:12<3:27:40, 91.62s/it]

Saved: caption_data\cap_df_753.pkl and tsne_data\tsne_df_753.pkl
Running for image id: 754


 64%|██████▍   | 244/379 [1:41:35<3:20:18, 89.02s/it]

Saved: caption_data\cap_df_754.pkl and tsne_data\tsne_df_754.pkl
Running for image id: 755


 65%|██████▍   | 245/379 [1:43:45<3:46:24, 101.38s/it]

Saved: caption_data\cap_df_755.pkl and tsne_data\tsne_df_755.pkl
Running for image id: 756


 65%|██████▍   | 246/379 [1:45:55<4:03:59, 110.07s/it]

Saved: caption_data\cap_df_756.pkl and tsne_data\tsne_df_756.pkl
Running for image id: 757


 65%|██████▌   | 247/379 [1:47:24<3:47:43, 103.51s/it]

Saved: caption_data\cap_df_757.pkl and tsne_data\tsne_df_757.pkl
Running for image id: 758


 65%|██████▌   | 248/379 [1:48:26<3:19:00, 91.15s/it] 

Saved: caption_data\cap_df_758.pkl and tsne_data\tsne_df_758.pkl
Running for image id: 759


 66%|██████▌   | 250/379 [1:50:37<2:35:12, 72.19s/it] 

Saved: caption_data\cap_df_759.pkl and tsne_data\tsne_df_759.pkl
Running for image id: 760
Running for image id: 761


 66%|██████▌   | 251/379 [1:52:59<3:18:42, 93.15s/it]

Saved: caption_data\cap_df_761.pkl and tsne_data\tsne_df_761.pkl
Running for image id: 762


 66%|██████▋   | 252/379 [1:54:37<3:20:24, 94.68s/it]

Saved: caption_data\cap_df_762.pkl and tsne_data\tsne_df_762.pkl
Running for image id: 763


 67%|██████▋   | 253/379 [1:56:46<3:40:36, 105.05s/it]

Saved: caption_data\cap_df_763.pkl and tsne_data\tsne_df_763.pkl
Running for image id: 764


 67%|██████▋   | 254/379 [1:58:13<3:27:30, 99.60s/it] 

Saved: caption_data\cap_df_764.pkl and tsne_data\tsne_df_764.pkl
Running for image id: 765


 67%|██████▋   | 255/379 [1:59:42<3:18:50, 96.21s/it]

Saved: caption_data\cap_df_765.pkl and tsne_data\tsne_df_765.pkl
Running for image id: 766


 68%|██████▊   | 256/379 [2:00:58<3:05:18, 90.40s/it]

Saved: caption_data\cap_df_766.pkl and tsne_data\tsne_df_766.pkl
Running for image id: 767


 68%|██████▊   | 257/379 [2:02:44<3:13:16, 95.05s/it]

Saved: caption_data\cap_df_767.pkl and tsne_data\tsne_df_767.pkl
Running for image id: 768


 68%|██████▊   | 259/379 [2:04:13<2:10:37, 65.31s/it]

Saved: caption_data\cap_df_768.pkl and tsne_data\tsne_df_768.pkl
Running for image id: 769
Running for image id: 770


 69%|██████▉   | 261/379 [2:05:30<1:34:23, 47.99s/it]

Saved: caption_data\cap_df_770.pkl and tsne_data\tsne_df_770.pkl
Running for image id: 771
Running for image id: 772


 69%|██████▉   | 262/379 [2:05:30<1:05:34, 33.63s/it]

Running for image id: 773


 69%|██████▉   | 263/379 [2:06:59<1:37:03, 50.21s/it]

Saved: caption_data\cap_df_773.pkl and tsne_data\tsne_df_773.pkl
Running for image id: 774


 70%|██████▉   | 264/379 [2:08:09<1:47:56, 56.32s/it]

Saved: caption_data\cap_df_774.pkl and tsne_data\tsne_df_774.pkl
Running for image id: 775


 70%|██████▉   | 265/379 [2:10:01<2:18:55, 73.12s/it]

Saved: caption_data\cap_df_775.pkl and tsne_data\tsne_df_775.pkl
Running for image id: 776


 70%|███████   | 266/379 [2:11:44<2:34:29, 82.03s/it]

Saved: caption_data\cap_df_776.pkl and tsne_data\tsne_df_776.pkl
Running for image id: 777


 70%|███████   | 267/379 [2:13:17<2:39:20, 85.37s/it]

Saved: caption_data\cap_df_777.pkl and tsne_data\tsne_df_777.pkl
Running for image id: 778


 71%|███████   | 268/379 [2:14:54<2:43:55, 88.60s/it]

Saved: caption_data\cap_df_778.pkl and tsne_data\tsne_df_778.pkl
Running for image id: 779


 71%|███████   | 269/379 [2:15:57<2:28:22, 80.94s/it]

Saved: caption_data\cap_df_779.pkl and tsne_data\tsne_df_779.pkl
Running for image id: 780


 71%|███████   | 270/379 [2:17:20<2:28:29, 81.74s/it]

Saved: caption_data\cap_df_780.pkl and tsne_data\tsne_df_780.pkl
Running for image id: 781


 72%|███████▏  | 272/379 [2:18:39<1:40:47, 56.52s/it]

Saved: caption_data\cap_df_781.pkl and tsne_data\tsne_df_781.pkl
Running for image id: 782
Running for image id: 783


 72%|███████▏  | 273/379 [2:20:06<1:56:27, 65.92s/it]

Saved: caption_data\cap_df_783.pkl and tsne_data\tsne_df_783.pkl
Running for image id: 784


 72%|███████▏  | 274/379 [2:21:02<1:49:55, 62.81s/it]

Saved: caption_data\cap_df_784.pkl and tsne_data\tsne_df_784.pkl
Running for image id: 785


 73%|███████▎  | 275/379 [2:22:29<2:01:23, 70.04s/it]

Saved: caption_data\cap_df_785.pkl and tsne_data\tsne_df_785.pkl
Running for image id: 786


 73%|███████▎  | 276/379 [2:24:30<2:26:18, 85.22s/it]

Saved: caption_data\cap_df_786.pkl and tsne_data\tsne_df_786.pkl
Running for image id: 787


 73%|███████▎  | 277/379 [2:25:40<2:17:07, 80.66s/it]

Saved: caption_data\cap_df_787.pkl and tsne_data\tsne_df_787.pkl
Running for image id: 788


 73%|███████▎  | 278/379 [2:26:54<2:12:51, 78.93s/it]

Saved: caption_data\cap_df_788.pkl and tsne_data\tsne_df_788.pkl
Running for image id: 789


 74%|███████▎  | 279/379 [2:28:13<2:11:29, 78.90s/it]

Saved: caption_data\cap_df_789.pkl and tsne_data\tsne_df_789.pkl
Running for image id: 790


 74%|███████▍  | 280/379 [2:29:04<1:56:10, 70.41s/it]

Saved: caption_data\cap_df_790.pkl and tsne_data\tsne_df_790.pkl
Running for image id: 791


 74%|███████▍  | 281/379 [2:30:39<2:07:05, 77.82s/it]

Saved: caption_data\cap_df_791.pkl and tsne_data\tsne_df_791.pkl
Running for image id: 792


 75%|███████▍  | 283/379 [2:31:50<1:24:56, 53.09s/it]

Saved: caption_data\cap_df_792.pkl and tsne_data\tsne_df_792.pkl
Running for image id: 793
Running for image id: 794


 75%|███████▍  | 284/379 [2:33:03<1:33:38, 59.14s/it]

Saved: caption_data\cap_df_794.pkl and tsne_data\tsne_df_794.pkl
Running for image id: 795


 75%|███████▌  | 285/379 [2:34:18<1:39:46, 63.69s/it]

Saved: caption_data\cap_df_795.pkl and tsne_data\tsne_df_795.pkl
Running for image id: 796


 75%|███████▌  | 286/379 [2:35:33<1:44:19, 67.30s/it]

Saved: caption_data\cap_df_796.pkl and tsne_data\tsne_df_796.pkl
Running for image id: 797


 76%|███████▌  | 287/379 [2:36:51<1:47:44, 70.27s/it]

Saved: caption_data\cap_df_797.pkl and tsne_data\tsne_df_797.pkl
Running for image id: 798


 76%|███████▌  | 288/379 [2:37:52<1:42:37, 67.66s/it]

Saved: caption_data\cap_df_798.pkl and tsne_data\tsne_df_798.pkl
Running for image id: 799


 76%|███████▋  | 289/379 [2:39:28<1:54:06, 76.07s/it]

Saved: caption_data\cap_df_799.pkl and tsne_data\tsne_df_799.pkl
Running for image id: 800


 77%|███████▋  | 290/379 [2:41:10<2:04:34, 83.99s/it]

Saved: caption_data\cap_df_800.pkl and tsne_data\tsne_df_800.pkl
Running for image id: 801


 77%|███████▋  | 291/379 [2:42:08<1:51:38, 76.12s/it]

Saved: caption_data\cap_df_801.pkl and tsne_data\tsne_df_801.pkl
Running for image id: 802


 77%|███████▋  | 292/379 [2:43:22<1:49:14, 75.34s/it]

Saved: caption_data\cap_df_802.pkl and tsne_data\tsne_df_802.pkl
Running for image id: 803


 77%|███████▋  | 293/379 [2:45:05<2:00:05, 83.78s/it]

Saved: caption_data\cap_df_803.pkl and tsne_data\tsne_df_803.pkl
Running for image id: 804


 78%|███████▊  | 294/379 [2:46:18<1:54:00, 80.47s/it]

Saved: caption_data\cap_df_804.pkl and tsne_data\tsne_df_804.pkl
Running for image id: 805


 78%|███████▊  | 295/379 [2:48:02<2:02:47, 87.70s/it]

Saved: caption_data\cap_df_805.pkl and tsne_data\tsne_df_805.pkl
Running for image id: 806


 78%|███████▊  | 296/379 [2:49:09<1:52:22, 81.23s/it]

Saved: caption_data\cap_df_806.pkl and tsne_data\tsne_df_806.pkl
Running for image id: 807


 78%|███████▊  | 297/379 [2:50:16<1:45:33, 77.23s/it]

Saved: caption_data\cap_df_807.pkl and tsne_data\tsne_df_807.pkl
Running for image id: 808


 79%|███████▊  | 298/379 [2:51:31<1:43:00, 76.30s/it]

Saved: caption_data\cap_df_808.pkl and tsne_data\tsne_df_808.pkl
Running for image id: 809


 79%|███████▉  | 299/379 [2:52:57<1:45:58, 79.49s/it]

Saved: caption_data\cap_df_809.pkl and tsne_data\tsne_df_809.pkl
Running for image id: 810


 79%|███████▉  | 300/379 [2:54:49<1:57:13, 89.03s/it]

Saved: caption_data\cap_df_810.pkl and tsne_data\tsne_df_810.pkl
Running for image id: 811


 79%|███████▉  | 301/379 [2:55:50<1:44:57, 80.73s/it]

Saved: caption_data\cap_df_811.pkl and tsne_data\tsne_df_811.pkl
Running for image id: 812


 80%|███████▉  | 302/379 [2:57:09<1:43:01, 80.28s/it]

Saved: caption_data\cap_df_812.pkl and tsne_data\tsne_df_812.pkl
Running for image id: 813


 80%|████████  | 304/379 [2:58:10<1:05:07, 52.09s/it]

Saved: caption_data\cap_df_813.pkl and tsne_data\tsne_df_813.pkl
Running for image id: 814


 80%|████████  | 305/379 [2:58:10<45:01, 36.51s/it]  

Running for image id: 815
Running for image id: 816


 81%|████████  | 306/379 [2:59:33<1:01:17, 50.38s/it]

Saved: caption_data\cap_df_816.pkl and tsne_data\tsne_df_816.pkl
Running for image id: 817


 81%|████████▏ | 308/379 [3:00:33<44:04, 37.24s/it]  

Saved: caption_data\cap_df_817.pkl and tsne_data\tsne_df_817.pkl
Running for image id: 818
Running for image id: 819


 82%|████████▏ | 309/379 [3:00:33<30:28, 26.12s/it]

Running for image id: 820


 82%|████████▏ | 310/379 [3:01:54<49:11, 42.77s/it]

Saved: caption_data\cap_df_820.pkl and tsne_data\tsne_df_820.pkl
Running for image id: 821


 82%|████████▏ | 311/379 [3:03:02<56:58, 50.27s/it]

Saved: caption_data\cap_df_821.pkl and tsne_data\tsne_df_821.pkl
Running for image id: 822


 82%|████████▏ | 312/379 [3:04:10<1:01:55, 55.45s/it]

Saved: caption_data\cap_df_822.pkl and tsne_data\tsne_df_822.pkl
Running for image id: 823


 83%|████████▎ | 313/379 [3:05:13<1:03:40, 57.89s/it]

Saved: caption_data\cap_df_823.pkl and tsne_data\tsne_df_823.pkl
Running for image id: 824


 83%|████████▎ | 314/379 [3:06:19<1:05:12, 60.19s/it]

Saved: caption_data\cap_df_824.pkl and tsne_data\tsne_df_824.pkl
Running for image id: 825


 83%|████████▎ | 316/379 [3:08:16<56:43, 54.02s/it]  

Saved: caption_data\cap_df_825.pkl and tsne_data\tsne_df_825.pkl
Running for image id: 826
Running for image id: 827


 84%|████████▍ | 318/379 [3:09:36<44:02, 43.33s/it]  

Saved: caption_data\cap_df_827.pkl and tsne_data\tsne_df_827.pkl
Running for image id: 828
Running for image id: 829


 84%|████████▍ | 319/379 [3:10:43<50:31, 50.53s/it]

Saved: caption_data\cap_df_829.pkl and tsne_data\tsne_df_829.pkl
Running for image id: 830


 84%|████████▍ | 320/379 [3:11:53<55:22, 56.31s/it]

Saved: caption_data\cap_df_830.pkl and tsne_data\tsne_df_830.pkl
Running for image id: 831


 85%|████████▍ | 321/379 [3:12:55<56:01, 57.96s/it]

Saved: caption_data\cap_df_831.pkl and tsne_data\tsne_df_831.pkl
Running for image id: 832


 85%|████████▍ | 322/379 [3:13:49<53:58, 56.81s/it]

Saved: caption_data\cap_df_832.pkl and tsne_data\tsne_df_832.pkl
Running for image id: 833


 85%|████████▌ | 323/379 [3:15:19<1:02:18, 66.76s/it]

Saved: caption_data\cap_df_833.pkl and tsne_data\tsne_df_833.pkl
Running for image id: 834


 86%|████████▌ | 325/379 [3:15:39<33:11, 36.88s/it]  

Saved: caption_data\cap_df_834.pkl and tsne_data\tsne_df_834.pkl
Running for image id: 835
Running for image id: 836


 86%|████████▌ | 326/379 [3:16:11<31:19, 35.47s/it]

Saved: caption_data\cap_df_836.pkl and tsne_data\tsne_df_836.pkl
Running for image id: 837


 86%|████████▋ | 327/379 [3:17:29<41:48, 48.23s/it]

Saved: caption_data\cap_df_837.pkl and tsne_data\tsne_df_837.pkl
Running for image id: 838


 87%|████████▋ | 328/379 [3:18:55<50:38, 59.59s/it]

Saved: caption_data\cap_df_838.pkl and tsne_data\tsne_df_838.pkl
Running for image id: 839


 87%|████████▋ | 329/379 [3:19:50<48:37, 58.35s/it]

Saved: caption_data\cap_df_839.pkl and tsne_data\tsne_df_839.pkl
Running for image id: 840


 87%|████████▋ | 330/379 [3:21:03<51:11, 62.68s/it]

Saved: caption_data\cap_df_840.pkl and tsne_data\tsne_df_840.pkl
Running for image id: 841


 87%|████████▋ | 331/379 [3:22:09<50:49, 63.53s/it]

Saved: caption_data\cap_df_841.pkl and tsne_data\tsne_df_841.pkl
Running for image id: 842


 88%|████████▊ | 332/379 [3:23:05<48:03, 61.35s/it]

Saved: caption_data\cap_df_842.pkl and tsne_data\tsne_df_842.pkl
Running for image id: 843


 88%|████████▊ | 333/379 [3:23:54<44:12, 57.67s/it]

Saved: caption_data\cap_df_843.pkl and tsne_data\tsne_df_843.pkl
Running for image id: 844


 88%|████████▊ | 334/379 [3:25:03<45:53, 61.19s/it]

Saved: caption_data\cap_df_844.pkl and tsne_data\tsne_df_844.pkl
Running for image id: 845


 88%|████████▊ | 335/379 [3:26:10<46:04, 62.83s/it]

Saved: caption_data\cap_df_845.pkl and tsne_data\tsne_df_845.pkl
Running for image id: 846


 89%|████████▊ | 336/379 [3:27:07<43:47, 61.10s/it]

Saved: caption_data\cap_df_846.pkl and tsne_data\tsne_df_846.pkl
Running for image id: 847


 89%|████████▉ | 337/379 [3:28:26<46:26, 66.35s/it]

Saved: caption_data\cap_df_847.pkl and tsne_data\tsne_df_847.pkl
Running for image id: 848


 89%|████████▉ | 339/379 [3:29:28<30:26, 45.66s/it]

Saved: caption_data\cap_df_848.pkl and tsne_data\tsne_df_848.pkl
Running for image id: 849
Running for image id: 850


 90%|████████▉ | 340/379 [3:30:36<33:55, 52.20s/it]

Saved: caption_data\cap_df_850.pkl and tsne_data\tsne_df_850.pkl
Running for image id: 851


 90%|████████▉ | 341/379 [3:31:53<37:47, 59.67s/it]

Saved: caption_data\cap_df_851.pkl and tsne_data\tsne_df_851.pkl
Running for image id: 852


 90%|█████████ | 342/379 [3:32:54<36:59, 59.99s/it]

Saved: caption_data\cap_df_852.pkl and tsne_data\tsne_df_852.pkl
Running for image id: 853


 91%|█████████ | 343/379 [3:33:55<36:18, 60.51s/it]

Saved: caption_data\cap_df_853.pkl and tsne_data\tsne_df_853.pkl
Running for image id: 854


 91%|█████████ | 344/379 [3:35:24<40:15, 69.00s/it]

Saved: caption_data\cap_df_854.pkl and tsne_data\tsne_df_854.pkl
Running for image id: 855


 91%|█████████ | 345/379 [3:36:24<37:32, 66.24s/it]

Saved: caption_data\cap_df_855.pkl and tsne_data\tsne_df_855.pkl
Running for image id: 856


 91%|█████████▏| 346/379 [3:37:50<39:43, 72.23s/it]

Saved: caption_data\cap_df_856.pkl and tsne_data\tsne_df_856.pkl
Running for image id: 857


 92%|█████████▏| 347/379 [3:39:41<44:38, 83.69s/it]

Saved: caption_data\cap_df_857.pkl and tsne_data\tsne_df_857.pkl
Running for image id: 858


 92%|█████████▏| 348/379 [3:40:33<38:28, 74.47s/it]

Saved: caption_data\cap_df_858.pkl and tsne_data\tsne_df_858.pkl
Running for image id: 859


 92%|█████████▏| 349/379 [3:42:04<39:40, 79.34s/it]

Saved: caption_data\cap_df_859.pkl and tsne_data\tsne_df_859.pkl
Running for image id: 860


 92%|█████████▏| 350/379 [3:43:04<35:28, 73.40s/it]

Saved: caption_data\cap_df_860.pkl and tsne_data\tsne_df_860.pkl
Running for image id: 861


 93%|█████████▎| 351/379 [3:44:09<33:03, 70.83s/it]

Saved: caption_data\cap_df_861.pkl and tsne_data\tsne_df_861.pkl
Running for image id: 862


 93%|█████████▎| 353/379 [3:45:09<20:30, 47.33s/it]

Saved: caption_data\cap_df_862.pkl and tsne_data\tsne_df_862.pkl
Running for image id: 863
Running for image id: 864


 93%|█████████▎| 354/379 [3:46:46<25:59, 62.37s/it]

Saved: caption_data\cap_df_864.pkl and tsne_data\tsne_df_864.pkl
Running for image id: 865


 94%|█████████▎| 355/379 [3:48:00<26:18, 65.79s/it]

Saved: caption_data\cap_df_865.pkl and tsne_data\tsne_df_865.pkl
Running for image id: 866


 94%|█████████▍| 356/379 [3:49:12<25:58, 67.78s/it]

Saved: caption_data\cap_df_866.pkl and tsne_data\tsne_df_866.pkl
Running for image id: 867


 94%|█████████▍| 357/379 [3:50:37<26:41, 72.81s/it]

Saved: caption_data\cap_df_867.pkl and tsne_data\tsne_df_867.pkl
Running for image id: 868


 94%|█████████▍| 358/379 [3:52:32<29:54, 85.46s/it]

Saved: caption_data\cap_df_868.pkl and tsne_data\tsne_df_868.pkl
Running for image id: 869


 95%|█████████▍| 359/379 [3:53:59<28:42, 86.13s/it]

Saved: caption_data\cap_df_869.pkl and tsne_data\tsne_df_869.pkl
Running for image id: 870


 95%|█████████▍| 360/379 [3:54:53<24:10, 76.34s/it]

Saved: caption_data\cap_df_870.pkl and tsne_data\tsne_df_870.pkl
Running for image id: 871


 95%|█████████▌| 361/379 [3:54:53<16:02, 53.50s/it]

Running for image id: 872


 96%|█████████▌| 362/379 [3:56:25<18:23, 64.92s/it]

Saved: caption_data\cap_df_872.pkl and tsne_data\tsne_df_872.pkl
Running for image id: 873


 96%|█████████▌| 364/379 [3:57:45<12:10, 48.72s/it]

Saved: caption_data\cap_df_873.pkl and tsne_data\tsne_df_873.pkl
Running for image id: 874
Running for image id: 875


 96%|█████████▋| 365/379 [3:58:57<12:59, 55.67s/it]

Saved: caption_data\cap_df_875.pkl and tsne_data\tsne_df_875.pkl
Running for image id: 876


 97%|█████████▋| 366/379 [3:59:57<12:19, 56.90s/it]

Saved: caption_data\cap_df_876.pkl and tsne_data\tsne_df_876.pkl
Running for image id: 877


 97%|█████████▋| 368/379 [4:00:46<07:00, 38.26s/it]

Saved: caption_data\cap_df_877.pkl and tsne_data\tsne_df_877.pkl
Running for image id: 878


 97%|█████████▋| 369/379 [4:00:46<04:28, 26.83s/it]

Running for image id: 879
Running for image id: 880


 98%|█████████▊| 370/379 [4:02:21<07:04, 47.18s/it]

Saved: caption_data\cap_df_880.pkl and tsne_data\tsne_df_880.pkl
Running for image id: 881


 98%|█████████▊| 371/379 [4:04:08<08:40, 65.05s/it]

Saved: caption_data\cap_df_881.pkl and tsne_data\tsne_df_881.pkl
Running for image id: 882


 98%|█████████▊| 372/379 [4:05:27<08:04, 69.22s/it]

Saved: caption_data\cap_df_882.pkl and tsne_data\tsne_df_882.pkl
Running for image id: 883


 98%|█████████▊| 373/379 [4:06:57<07:33, 75.57s/it]

Saved: caption_data\cap_df_883.pkl and tsne_data\tsne_df_883.pkl
Running for image id: 884


 99%|█████████▊| 374/379 [4:08:21<06:29, 77.95s/it]

Saved: caption_data\cap_df_884.pkl and tsne_data\tsne_df_884.pkl
Running for image id: 885


 99%|█████████▉| 375/379 [4:09:40<05:13, 78.27s/it]

Saved: caption_data\cap_df_885.pkl and tsne_data\tsne_df_885.pkl
Running for image id: 886


 99%|█████████▉| 376/379 [4:10:41<03:39, 73.17s/it]

Saved: caption_data\cap_df_886.pkl and tsne_data\tsne_df_886.pkl
Running for image id: 887


 99%|█████████▉| 377/379 [4:11:58<02:28, 74.23s/it]

Saved: caption_data\cap_df_887.pkl and tsne_data\tsne_df_887.pkl
Running for image id: 888


100%|█████████▉| 378/379 [4:13:11<01:14, 74.06s/it]

Saved: caption_data\cap_df_888.pkl and tsne_data\tsne_df_888.pkl
Running for image id: 889


100%|██████████| 379/379 [4:14:54<00:00, 40.35s/it]

Saved: caption_data\cap_df_889.pkl and tsne_data\tsne_df_889.pkl
cap_failed_ids: ['661', '669', '670', '671', '675', '691', '692', '693', '694', '695', '696', '698', '702', '704', '707', '709', '711', '712', '713', '715', '716', '721', '731', '748', '752', '760', '769', '771', '772', '782', '793', '814', '815', '818', '819', '826', '828', '835', '849', '863', '871', '874', '878', '879']
tsne_failed_ids: []
Equal: False





(['661',
  '669',
  '670',
  '671',
  '675',
  '691',
  '692',
  '693',
  '694',
  '695',
  '696',
  '698',
  '702',
  '704',
  '707',
  '709',
  '711',
  '712',
  '713',
  '715',
  '716',
  '721',
  '731',
  '748',
  '752',
  '760',
  '769',
  '771',
  '772',
  '782',
  '793',
  '814',
  '815',
  '818',
  '819',
  '826',
  '828',
  '835',
  '849',
  '863',
  '871',
  '874',
  '878',
  '879'],
 [])

In [15]:
# tsne_df, condensed_cap_df = process_dataset_tsne(id_list, img_paths, cap_paths)
# tsne_df.head(5)

In terms of preprocessing, we will likely want to save the original dataset dataframes without the precison or score breakdown columns. The TSNE dataframe above can be saved as is since it should be possible to filter for the correct img_id when necessary. 

In [16]:
# condensed_cap_df.head(5)

# Feed Forward Network