In [2]:
import clip
import torch
from PIL import Image
import numpy as np  
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
import h5py
import pandas as pd
from tqdm import tqdm
import os

### Load the model: CLIP

In [3]:

device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Using {device} device")

# Load the model
model, preprocess = clip.load('ViT-B/32', device=device)

Using cuda device


In [4]:
# Load the CSV

data_frame = pd.read_csv('/home/guimcc/OneDrive/General/Projectes/HackUPC2024/index/images_resized_sorted.csv')

# Fit encoder to define the size
encoder = OneHotEncoder(sparse=False)
encoder.fit(data_frame[['season','category','type']])

max_combined_size = 512 + sum(len(categories) for categories in encoder.categories_)

print(f"max_combined_size: {max_combined_size}")

max_combined_size: 523


In [5]:
base_image_path = '/home/guimcc/OneDrive/General/Projectes/HackUPC2024/images_resized'
h5pt_file_path = '../ckp/images_resized_embd.h5'

In [6]:
# Define the processing of every image
def process_and_combine_data(data_row, device, model, preprocess, encoder):
    
    relative_image_path = data_row['path']  # Assuming 'path' column has relative paths or filenames
    full_image_path = os.path.join(base_image_path, relative_image_path)
    try:
        image = preprocess(Image.open(full_image_path)).unsqueeze(0).to(device) # Add the batched image to the device
        with torch.no_grad():
            image_features = model.encode_image(image).cpu().numpy() # Extract features from the image (Embedding)

        # Assuming 'category' is a column in your DataFrame with categorical data
        categorical_data = [data_row[['season', 'category', 'type']].values.tolist()]
        one_hot_features = encoder.transform(categorical_data)

        # Combine image features with one-hot encoded features
        combined_features = np.concatenate((image_features, one_hot_features), axis=1)
        return combined_features
    
    except Exception as e:
        print(f"Failed to process image {full_image_path}: {str(e)}")
        print(data_row)
        print(categorical_data)
        return None

In [7]:
with h5py.File(h5pt_file_path, 'w') as h5f:
    
    # Create the dataset
    dset = h5f.create_dataset("image_embeddings", shape=(0, max_combined_size), maxshape=(None, max_combined_size), dtype='float32')
    
    
    for index, row in tqdm(data_frame.iterrows(), total=len(data_frame), desc="Processing images"):
        result = process_and_combine_data(row, device, model, preprocess, encoder)
        if result is not None:
            dset.resize(dset.shape[0]+1, axis=0)
            dset[-1] = result

Processing images: 100%|██████████| 1249/1249 [00:57<00:00, 21.67it/s]


In [8]:
with h5py.File(h5pt_file_path, 'r') as file:
    combined_features = file['image_embeddings']
    


### Outline
Outline of the overall method of retrieveng images and obtaining their embeddings

In [None]:
images = [preprocess(Image.open(image_path)).unsqueeze(0).to('cuda') for image_path in image_paths]

with torch.no_grad():
    image_features_l = [model.encode_image(image) for image in images]

image_features = torch.stack(image_features_l).squeeze()

tabular_data = np.array([
    ['m', 's', 'v'],
    ['m', 's', 'v'],
    ['m', 'd', 'v'],
    ['m', 's', 'v'],
])

weight = 10.0


encoder = OneHotEncoder()
encoded_categorical = encoder.fit_transform(tabular_data).toarray()
encoded_categorical = torch.tensor(encoded_categorical, device='cuda').float()

combined_features = torch.cat((image_features, weight*encoded_categorical), dim=1)

In [68]:
tabular_data = np.array([
    ['m', 's', 'v'],
    ['m', 's', 'v'],
    ['m', 'd', 'v'],
    ['m', 's', 'v'],
])

weight = 10.0

In [74]:
encoder = OneHotEncoder()
encoded_categorical = encoder.fit_transform(tabular_data).toarray()
encoded_categorical = torch.tensor(encoded_categorical, device='cuda').float()

In [70]:
combined_features = torch.cat((image_features, weight*encoded_categorical), dim=1)

In [None]:
# Save embeddings
with h5py.File('combined_embeddings.hdf5', 'w') as f:
    f.create_dataset('embeddings', data=combined_features.cpu().detach().numpy())

# Load embeddings
with h5py.File('combined_embeddings.hdf5', 'r') as f:
    loaded_embeddings = f['embeddings'][:]