In [1]:
import clip
import torch
from PIL import Image
import numpy as np  
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
import h5py
import pandas as pd
from tqdm import tqdm
import os
import matplotlib.pyplot as plt

### Load the model: CLIP

In [2]:

device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Using {device} device")

# Load the model
model, preprocess = clip.load('ViT-B/32', device=device)

Using cuda device


In [10]:
# Load the CSV

data_frame = pd.read_csv('/home/guimcc/OneDrive/General/Projectes/HackUPC2024/index/images_2.csv')

# Fit encoder to define the size
encoder = OneHotEncoder(sparse=False)
encoder.fit(data_frame[['season','category','type']])

max_combined_size = 512 + sum(len(categories) for categories in encoder.categories_)

print(f"max_combined_size: {max_combined_size}")

max_combined_size: 517


In [11]:
base_image_path = '/home/guimcc/OneDrive/General/Projectes/HackUPC2024/images_2'
h5pt_file_path = '../ckp/images_2.h5'

In [5]:
# Define the processing of every image
def process_and_combine_data(data_row, device, model, preprocess, encoder):
    
    relative_image_path = data_row['path']  # Assuming 'path' column has relative paths or filenames
    full_image_path = os.path.join(base_image_path, relative_image_path)
    try:
        image = preprocess(Image.open(full_image_path)).unsqueeze(0).to(device) # Add the batched image to the device
        with torch.no_grad():
            image_features = model.encode_image(image).cpu().numpy() # Extract features from the image (Embedding)

        # Assuming 'category' is a column in your DataFrame with categorical data
        categorical_data = [data_row[['season', 'category', 'type']].values.tolist()]
        one_hot_features = encoder.transform(categorical_data)

        # Combine image features with one-hot encoded features
        combined_features = np.concatenate((image_features, one_hot_features), axis=1)
        return combined_features
    
    except Exception as e:
        print(f"Failed to process image {full_image_path}: {str(e)}")
        print(data_row)
        print(categorical_data)
        return None

In [12]:
with h5py.File(h5pt_file_path, 'w') as h5f:
    
    # Create the dataset
    dset = h5f.create_dataset("image_embeddings", shape=(0, max_combined_size), maxshape=(None, max_combined_size), dtype='float32')
    
    
    for index, row in tqdm(data_frame.iterrows(), total=len(data_frame), desc="Processing images"):
        result = process_and_combine_data(row, device, model, preprocess, encoder)
        if result is not None:
            dset.resize(dset.shape[0]+1, axis=0)
            dset[-1] = result

Processing images: 100%|██████████| 252/252 [00:10<00:00, 24.42it/s] 


In [27]:
# with h5py.File(h5pt_file_path, 'r') as file:
#     train_embeddings = file['image_embeddings']

h5f = h5py.File(h5pt_file_path, 'r')
train_embeddings = h5f['image_embeddings'][:]

### Change the model's weights

### Find closest data points

In [15]:
from scipy.spatial.distance import cosine

In [18]:
test_dir = '/home/guimcc/OneDrive/General/Projectes/HackUPC2024/images/images_test'

In [24]:
def generate_combined_embedding(relative_image_path, categorical_data, model, preprocess, encoder, device):
    # Process the image
    full_image_path = os.path.join(test_dir, relative_image_path)
    image = preprocess(Image.open(full_image_path)).unsqueeze(0).to(device)
    with torch.no_grad():
        image_embedding = model.encode_image(image).cpu().numpy()
    
    # One-hot encode the categorical data
    # Ensure categorical_data is in the form of a 2D array [[cat1, cat2, ..., catN]]
    one_hot_features = encoder.transform([categorical_data])
    
    # Combine the image embedding and one-hot features
    combined_embedding = np.concatenate((image_embedding.squeeze(0), one_hot_features.squeeze(0)), axis=0)
    
    return combined_embedding

In [22]:
def find_closest_embedding_cosine(new_embedding, embeddings):
    similarities = np.array([cosine(new_embedding, emb) for emb in embeddings])
    closest_index = np.argmin(similarities)
    return closest_index, similarities[closest_index]

In [None]:
def display_images(img_path1, img_path2, title1="Test Image", title2="Closest Match"):
    img1 = Image.open(img_path1)
    img2 = Image.open(img_path2)

    plt.figure(figsize=(10, 5))

    plt.subplot(1, 2, 1)
    plt.imshow(img1)
    plt.title(title1)
    plt.axis('off')

    plt.subplot(1, 2, 2)
    plt.imshow(img2)
    plt.title(title2)
    plt.axis('off')

    plt.show()

In [37]:
data_frame_test = pd.read_csv('/home/guimcc/OneDrive/General/Projectes/HackUPC2024/index/images_resized_test.csv')

# Get the data_frame_test

for index, row in tqdm(data_frame_test.iterrows(), total=len(data_frame_test), desc="Processing test images"):
    
    new_image_path = row['path']  # Adjust the column name as necessary
    categorical_data = row[['season', 'category', 'type']].tolist()
    
    new_embedding = generate_combined_embedding(new_image_path, categorical_data, model, preprocess, encoder, device)

    closest_index, similarity = find_closest_embedding_cosine(new_embedding, train_embeddings)
    
    print(f" {index} w path {new_image_path} : Closest index: {closest_index} w similarity: {similarity} path: {data_frame.loc[closest_index, 'path']}")


Processing test images:  14%|█▎        | 3/22 [00:00<00:01, 10.89it/s]

 0 w path 2007_0_2024_W_0_1.jpeg : Closest index: 689 w similarity: 0.05755240964892805 path: 246_1_2024_V_0_1
 1 w path 2000_1_2024_V_0_2.jpeg : Closest index: 432 w similarity: 0.051806041358309574 path: 154_0_2024_V_0_2
 2 w path 2006_2_2024_V_0_2.jpeg : Closest index: 242 w similarity: 0.10488691111555581 path: 86_2_2023_I_0_2


Processing test images:  32%|███▏      | 7/22 [00:00<00:01, 11.81it/s]

 3 w path 2007_1_2024_W_0_1.jpeg : Closest index: 1097 w similarity: 0.059268646421439586 path: 400_1_2023_I_0_1
 4 w path 2003_2_2024_V_0_3.jpeg : Closest index: 458 w similarity: 0.10829283342717944 path: 162_2_2024_V_0_3
 5 w path 2001_2_2023_I_0_1.jpeg : Closest index: 825 w similarity: 0.05718575111979762 path: 302_0_2023_I_0_2
 6 w path 2001_1_2024_V_0_1.jpeg : Closest index: 947 w similarity: 0.14309539668047377 path: 346_1_2023_I_0_1


Processing test images:  41%|████      | 9/22 [00:00<00:01, 11.49it/s]

 7 w path 2004_2_2024_V_0_3.jpeg : Closest index: 458 w similarity: 0.1121741889375012 path: 162_2_2024_V_0_3
 8 w path 2000_2_2024_V_0_2.jpeg : Closest index: 434 w similarity: 0.05674107029754971 path: 154_2_2024_V_0_2


Processing test images:  50%|█████     | 11/22 [00:01<00:01,  8.39it/s]

 9 w path 2006_0_2024_V_0_2.jpeg : Closest index: 991 w similarity: 0.0706089920893892 path: 363_0_2024_W_0_2
 10 w path 2005_1_2024_V_0_1.jpeg : Closest index: 307 w similarity: 0.12008069176767178 path: 109_1_2024_V_0_1


Processing test images:  59%|█████▉    | 13/22 [00:01<00:00,  9.37it/s]

 11 w path 2003_0_2024_V_0_3.jpeg : Closest index: 456 w similarity: 0.12889154116746404 path: 162_0_2024_V_0_3
 12 w path 2002_1_2024_V_1_1.jpeg : Closest index: 268 w similarity: 0.19635429099440682 path: 95_1_2024_V_1_1


Processing test images:  68%|██████▊   | 15/22 [00:01<00:00,  7.83it/s]

 13 w path 2000_0_2024_V_0_2.jpeg : Closest index: 432 w similarity: 0.04960910499488069 path: 154_0_2024_V_0_2
 14 w path 2004_0_2024_V_0_3.jpeg : Closest index: 629 w similarity: 0.0943853930426084 path: 225_1_2024_V_0_3


Processing test images:  82%|████████▏ | 18/22 [00:02<00:00,  8.37it/s]

 15 w path 2005_2_2024_V_0_1.jpeg : Closest index: 360 w similarity: 0.02206120380359944 path: 127_2_2024_V_0_1
 16 w path 2004_1_2024_V_0_3.jpeg : Closest index: 456 w similarity: 0.17966709639554923 path: 162_0_2024_V_0_3
 17 w path 2001_0_2024_V_0_1.jpeg : Closest index: 947 w similarity: 0.13122329758273898 path: 346_1_2023_I_0_1


Processing test images: 100%|██████████| 22/22 [00:02<00:00,  9.84it/s]

 18 w path 2005_0_2024_V_0_1.jpeg : Closest index: 307 w similarity: 0.14234904679785587 path: 109_1_2024_V_0_1
 19 w path 2003_1_2024_V_0_3.jpeg : Closest index: 728 w similarity: 0.1140681842794623 path: 260_0_2024_V_0_3
 20 w path 2006_1_2024_V_0_2.jpeg : Closest index: 724 w similarity: 0.0575025952358591 path: 258_1_2024_V_0_2
 21 w path 2002_0_2024_V_1_1.jpeg : Closest index: 1187 w similarity: 0.12435205110949943 path: 440_0_2024_V_1_2





: 

In [30]:
print(closest_index)

1187


### Outline
Outline of the overall method of retrieveng images and obtaining their embeddings

In [None]:
images = [preprocess(Image.open(image_path)).unsqueeze(0).to('cuda') for image_path in image_paths]

with torch.no_grad():
    image_features_l = [model.encode_image(image) for image in images]

image_features = torch.stack(image_features_l).squeeze()

tabular_data = np.array([
    ['m', 's', 'v'],
    ['m', 's', 'v'],
    ['m', 'd', 'v'],
    ['m', 's', 'v'],
])

weight = 10.0


encoder = OneHotEncoder()
encoded_categorical = encoder.fit_transform(tabular_data).toarray()
encoded_categorical = torch.tensor(encoded_categorical, device='cuda').float()

combined_features = torch.cat((image_features, weight*encoded_categorical), dim=1)

In [68]:
tabular_data = np.array([
    ['m', 's', 'v'],
    ['m', 's', 'v'],
    ['m', 'd', 'v'],
    ['m', 's', 'v'],
])

weight = 10.0

In [74]:
encoder = OneHotEncoder()
encoded_categorical = encoder.fit_transform(tabular_data).toarray()
encoded_categorical = torch.tensor(encoded_categorical, device='cuda').float()

In [70]:
combined_features = torch.cat((image_features, weight*encoded_categorical), dim=1)

In [None]:
# Save embeddings
with h5py.File('combined_embeddings.hdf5', 'w') as f:
    f.create_dataset('embeddings', data=combined_features.cpu().detach().numpy())

# Load embeddings
with h5py.File('combined_embeddings.hdf5', 'r') as f:
    loaded_embeddings = f['embeddings'][:]