In [1]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np



In [2]:
# 1. Load a pre-trained ResNet50 model (as an example)
model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

def center_crop(img, desired_size):
    """Center crop an image to the desired size."""
    width, height = img.size
    half_width, half_height = desired_size[0] / 2, desired_size[1] / 2

    center_x, center_y = width / 2, height / 2

    left = center_x - half_width
    right = center_x + half_width
    top = center_y - half_height
    bottom = center_y + half_height

    return img.crop((left, top, right, bottom))

def extract_features_batch(image_paths):
    """Extract features from a batch of images using the pre-trained model."""
    batch_images = []
    
    for img_path in image_paths:
        # Load the image
        img = load_img(img_path)
        
        # Center crop to square (200x200)
        img_cropped = center_crop(img, (200, 200))
        
        # Resize to the target size (224x224)
        img_resized = img_cropped.resize((224, 224))
        
        # Convert to numpy array
        img_array = img_to_array(img_resized)
        
        batch_images.append(img_array)
    
    # Convert list of image arrays to a single batch array
    batch_array = np.stack(batch_images, axis=0)
    
    # Preprocess the batch
    preprocessed_batch = preprocess_input(batch_array)
    
    # Extract features for the entire batch
    features_batch = model.predict(preprocessed_batch)
    
    return features_batch



In [3]:
import os
import pandas as pd


# Load the data
data = pd.read_csv('test_candidates.csv')
# Directory paths
left_dir = os.path.join('test', 'left')
right_dir = os.path.join('test', 'right')

# Initialize the cosine similarity object
cosine_similarity_obj = tf.keras.losses.CosineSimilarity()

# Create an empty DataFrame to store the similarity results
similarity_results = pd.DataFrame(columns=data.columns)

# Process each row in the dataframe
for index, row in data.iterrows():
    # Extract paths for the left image and its 20 right images
    left_image_name = row['left']
    right_image_names = row[1:].tolist()  # Extract all right image names
    
    # Create a list of paths for the current batch (1 left + 20 right)
    image_paths = [os.path.join(left_dir, left_image_name + '.jpg')]
    image_paths.extend([os.path.join(right_dir, name + '.jpg') for name in right_image_names])
    
    # Extract features for the entire batch
    features_batch = extract_features_batch(image_paths)
    
    # Extract the features for the left image
    left_features = features_batch[0]
    
    # Compute the similarity scores for the 20 right images
    similarities = [left_image_name]  # Start with the left image name
    for i, right_name in enumerate(right_image_names):
        right_features = features_batch[i+1]
        similarity = cosine_similarity_obj(left_features, right_features).numpy()
        similarities.append(similarity)
    
    # Append the similarities to the results DataFrame
    similarity_results.loc[index] = similarities

# Save the similarity results to a new CSV file
similarity_results.to_csv('similarity_results.csv', index=False)




In [4]:
import pandas as pd

# 1. Read the CSV file into a pandas DataFrame
df = pd.read_csv('similarity_results.csv')

# 2. Apply the transformation to each value in the DataFrame
# Exclude the 'left' column from the transformation
for column in df.columns:
    if column != 'left':
        df[column] = (df[column] + 1) / 2

# 3. Save the transformed DataFrame back to a CSV file
df.to_csv('submission.csv', index=False)
