<a href="https://colab.research.google.com/github/itu-itis23-majidov23/Prediction-of-Coordinates-based-on-Similarity/blob/main/Weighted_Averaging_of_VGG16andCosine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import zipfile
import os

# Define the zip file path
zip_path = '/content/drive/MyDrive/DatathonAI/datathon-ai-24.zip'

# Define the extraction directory
extract_dir = '/content/drive/My Drive/Folder/Extracted'

# Create the extraction directory if it doesn't exist
os.makedirs(extract_dir, exist_ok=True)

# Extract the zip file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print(f"Extracted to {extract_dir}")


Extracted to /content/drive/My Drive/Folder/Extracted


In [None]:
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing import image
from sklearn.metrics.pairwise import cosine_similarity
from PIL import Image

# Pre-trained VGG16 model
from keras.applications.vgg16 import VGG16

vgg16 = VGG16(weights='imagenet', include_top=False, pooling='max', input_shape=(224, 224, 3))

# Disable training for all layers
for model_layer in vgg16.layers:
    model_layer.trainable = False

def load_image(image_path):
    input_image = Image.open(image_path)
    resized_image = input_image.resize((224, 224))
    return resized_image

def get_image_embeddings(object_image):
    image_array = np.expand_dims(image.img_to_array(object_image), axis=0)
    image_embedding = vgg16.predict(image_array)
    return image_embedding

def calculate_lat_lon_for_test_images(train_csv, train_folder, test_folder, output_csv):
    # Load training dataset
    train_data = pd.read_csv(train_csv, delimiter=';')

    # Compute embeddings for all training images
    train_embeddings = []
    for filename in train_data['filename']:
        image_path = f"{train_folder}/{filename}"
        image_obj = load_image(image_path)
        embedding = get_image_embeddings(image_obj)
        train_embeddings.append(embedding.flatten())
    train_embeddings = np.array(train_embeddings)

    # Get test image embeddings and predict latitude/longitude
    test_filenames = [file for file in os.listdir(test_folder) if file.endswith(('jpeg', 'jpg', 'png'))]
    results = []

    for test_filename in test_filenames:
        test_image_path = f"{test_folder}/{test_filename}"
        test_image = load_image(test_image_path)
        test_embedding = get_image_embeddings(test_image).flatten()

        # Calculate similarity with all training images
        similarities = cosine_similarity([test_embedding], train_embeddings)[0]

        # Get the indices of 4 most similar images
        top_indices = similarities.argsort()[-4:][::-1]

        # Calculate mean latitude and longitude
        top_latitudes = train_data.iloc[top_indices]['latitude']
        top_longitudes = train_data.iloc[top_indices]['longitude']

        predicted_latitude = top_latitudes.mean()
        predicted_longitude = top_longitudes.mean()

        # Append results
        results.append({
            'filename': test_filename,
            'latitude': predicted_latitude,
            'longitude': predicted_longitude
        })

    # Save results to CSV
    results_df = pd.DataFrame(results)
    results_df.to_csv(output_csv, index=False)
    print(f"Results saved to {output_csv}")

# Example usage
train_csv = '/content/drive/MyDrive/Folder/Extracted/train.csv'  # CSV file containing filename, latitude, longitude
train_folder = '/content/drive/MyDrive/Folder/Extracted/train'  # Folder containing training images
test_folder = '/content/drive/MyDrive/Folder/Extracted/test'  # Folder containing test images
output_csv = '/content/submission_123.csv'  # Output CSV file

calculate_lat_lon_for_test_images(train_csv, train_folder, test_folder, output_csv)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step