<a href="https://colab.research.google.com/github/itu-itis23-majidov23/Prediction-of-Coordinates-based-on-Similarity/blob/main/VGG16-Cosine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing import image
from sklearn.metrics.pairwise import cosine_similarity
from PIL import Image

# Pre-trained VGG16 model
from keras.applications.vgg16 import VGG16

vgg16 = VGG16(weights='imagenet', include_top=False, pooling='max', input_shape=(224, 224, 3))

# Disable training for all layers
for model_layer in vgg16.layers:
    model_layer.trainable = False

def load_image(image_path):
    """Load and resize an image to the required input size."""
    input_image = Image.open(image_path).convert('RGB')
    resized_image = input_image.resize((224, 224))
    return resized_image

def get_image_embeddings(object_image):
    """Generate embeddings for a given image using the pre-trained VGG16 model."""
    image_array = np.expand_dims(image.img_to_array(object_image), axis=0)
    image_embedding = vgg16.predict(image_array)
    return image_embedding

def calculate_lat_lon_for_test_images(train_csv, train_folder, test_folder, output_csv, threshold=0.8):
    # Load training dataset
    train_data = pd.read_csv(train_csv, delimiter=';')

    # Compute embeddings for all training images
    train_embeddings = []
    for filename in train_data['filename']:
        image_path = f"{train_folder}/{filename}"
        image_obj = load_image(image_path)
        embedding = get_image_embeddings(image_obj)
        train_embeddings.append(embedding.flatten())
    train_embeddings = np.array(train_embeddings)

    # Get test image embeddings and predict latitude/longitude
    test_filenames = [file for file in os.listdir(test_folder) if file.endswith(('jpeg', 'jpg', 'png'))]
    results = []

    for test_filename in test_filenames:
        test_image_path = f"{test_folder}/{test_filename}"
        test_image = load_image(test_image_path)
        test_embedding = get_image_embeddings(test_image).flatten()

        # Calculate similarity with all training images
        similarities = cosine_similarity([test_embedding], train_embeddings)[0]

        # Filter by threshold
        valid_indices = np.where(similarities >= threshold)[0]

        if len(valid_indices) > 0:
            # Use valid similarities to calculate weighted mean
            valid_similarities = similarities[valid_indices]
            valid_latitudes = train_data.iloc[valid_indices]['latitude'].values
            valid_longitudes = train_data.iloc[valid_indices]['longitude'].values

            # Calculate weights and weighted means
            weights = valid_similarities / valid_similarities.sum()
            predicted_latitude = np.average(valid_latitudes, weights=weights)
            predicted_longitude = np.average(valid_longitudes, weights=weights)
        else:
            # If no similarities meet the threshold, return NaN
            predicted_latitude = np.nan
            predicted_longitude = np.nan

        # Append results
        results.append({
            'filename': test_filename,
            'latitude': predicted_latitude,
            'longitude': predicted_longitude
        })

    # Save results to CSV
    results_df = pd.DataFrame(results)
    results_df.to_csv(output_csv, index=False)
    print(f"Results saved to {output_csv}")

# Example usage
train_csv = '/content/drive/MyDrive/Folder/Extracted/train.csv'  # CSV file containing filename, latitude, longitude
train_folder = '/content/drive/MyDrive/Folder/Extracted/train'  # Folder containing training images
test_folder = '/content/drive/MyDrive/Folder/Extracted/test'  # Folder containing test images
output_csv = '/content/submission_123.csv'  # Output CSV file

calculate_lat_lon_for_test_images(train_csv, train_folder, test_folder, output_csv, threshold=0.8)

Processing test images:   0%|          | 2/800 [1:39:20<660:37:12, 2980.24s/image]
Processing test images:   0%|          | 0/800 [38:46<?, ?image/s]
Processing test images:   0%|          | 0/800 [35:04<?, ?image/s]
Processing test images:   0%|          | 0/800 [1:47:50<?, ?image/s]


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s