In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
from datasets import load_dataset

# Load the Flowers BLIP Captions dataset
dataset = load_dataset("pranked03/flowers-blip-captions")

# Save the dataset to your local device
dataset.save_to_disk("flowers-blip-captions")

In [None]:
from datasets import load_from_disk


dataset = load_from_disk("flowers-blip-captions")
print(dataset['train'][0]) 

In [None]:
import os
from tqdm import tqdm  \
from datasets import load_from_disk


dataset = load_from_disk("flowers-blip-captions")
image_output_dir = 'extracted_images_5'
text_output_dir = 'extracted_text_5'
os.makedirs(image_output_dir, exist_ok=True)
os.makedirs(text_output_dir, exist_ok=True)

def save_images_and_text():
    for index in tqdm(range(len(dataset['train'])), desc="Extracting Images and Text"): 
        example = dataset['train'][index]  

        label = example['label']
        
        image = example['image']  
        image.save(os.path.join(image_output_dir, f'image_{index}.png'))

   
        caption = example['text']  
        with open(os.path.join(text_output_dir, f'image_{index}.txt'), 'w') as file:
            file.write(caption)

save_images_and_text()

print(f"Images saved to {image_output_dir} and text files saved to {text_output_dir}.")


In [None]:
import os
from tqdm import tqdm  
from transformers import BertTokenizer, BertModel
import torch

text_output_dir = 'extracted_text_5'

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')
# Dictionary to store text embeddings
embeddings_dict = {}

def create_text_embeddings(
  
    text_files = [f for f in os.listdir(text_output_dir) if f.endswith('.txt')]
    
    for text_filename in tqdm(text_files, desc="Extracting Text Embeddings"):
        text_path = os.path.join(text_output_dir, text_filename)
        with open(text_path, 'r') as file:
            caption = file.read().strip()  

        inputs = tokenizer(caption, return_tensors='pt', padding=True, truncation=True)
        with torch.no_grad():
            outputs = model(**inputs)
            embedding = outputs.last_hidden_state[:, 0, :]

        image_index = text_filename.replace('.txt', '')

        embeddings_dict[image_index] = embedding
create_text_embeddings()

print("Text embeddings processed and stored.")

In [None]:
# for extracting 64*64 images
import torchvision.transforms as transforms
import random
from PIL import Image
image_output_dir = 'extracted_images_5'

image_files = [f for f in os.listdir(image_output_dir) if f.endswith('.png')]

random.shuffle(image_files)

transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),  # Convert PIL image to tensor
    transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])  # Normalize with ImageNet stats
])
image_tensor_dict = {}
def convert_images_to_tensors():
    image_files = [f for f in os.listdir(image_output_dir) if f.endswith('.png')]  # Get all image files
    
    for image_file in tqdm(image_files, desc="Converting Images to Tensors"):
        image_path = os.path.join(image_output_dir, image_file)  # Full path to the image
        image = Image.open(image_path)  # Open the image
        
        # Convert image to tensor and normalize
        image_tensor = transform(image)  # Apply the transform
        
        # Add tensor to dictionary with key as image name without '.png'
        image_index = image_file.split('.')[0]

        image_tensor_dict[image_index] = image_tensor
        
convert_images_to_tensors()

In [None]:
print(len(image_tensor_dict))

ordered_embeddings_dict = {}
for key in image_tensor_dict.keys():
    # image_tensor_dict[key]
    if key in embeddings_dict:
        ordered_embeddings_dict[key] = embeddings_dict[key]

In [None]:
# for extracting 256*256 imagees
import torchvision.transforms as transforms
import random
from PIL import Image
image_output_dir = 'extracted_images'


image_files = [f for f in os.listdir(image_output_dir) if f.endswith('.png')]

random.shuffle(image_files)

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),  
    transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])  
])
image_tensor_dict = {}
def convert_images_to_tensors():
    image_files = [f for f in os.listdir(image_output_dir) if f.endswith('.png')] 
    
    for image_file in tqdm(image_files, desc="Converting Images to Tensors"):
        image_path = os.path.join(image_output_dir, image_file)  
        image = Image.open(image_path)  
        
        image_tensor = transform(image) 
        image_index = image_file.split('.')[0]

        image_tensor_dict[image_index] = image_tensor
        
convert_images_to_tensors()

# arrange this dictionary inorder of the embedding dictionary