In [None]:
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.applications import VGG16, ResNet50, InceptionV3
from tensorflow.keras.applications.vgg16 import preprocess_input
from sklearn.metrics.pairwise import cosine_similarity
from pathlib import Path
import matplotlib.pyplot as plt
import csv

# Define the directory containing your reference images and query images
reference_image_directory = "C:/Users/sc23krj/Desktop/Karan MSC Thesis/ChemicalImage"
query_image_directory = "C:/Users/sc23krj/Desktop/Karan MSC Thesis/Extracted Hypercubes Core Images/"
output_csv_path = "C:/Users/sc23krj/Desktop/Karan MSC Thesis/hypercube_identification.csv"

# Load pre-trained VGG16 model without the top layer
base_model = VGG16(weights='imagenet', include_top=False)

# Set a threshold for cosine similarity
COSINE_SIMILARITY_THRESHOLD = 0.55

# Function to load and preprocess TIFF images
def load_and_preprocess_image(image_path):
    img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)  # Load TIFF image
    if img is None:
        raise ValueError(f"Unable to load image at {image_path}")
    
    if len(img.shape) == 2:  # Grayscale image, convert to RGB
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    elif img.shape[2] == 1:  # Single-channel image, convert to RGB
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)

    img = cv2.resize(img, (224, 224))  # Resize to match the input size for VGG16
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    return img

# Function to extract features from an image using VGG16
def extract_features(image_path, model):
    img_data = load_and_preprocess_image(image_path)
    features = model.predict(img_data)
    return features.flatten()

# Extract features for all reference images in the directory
reference_image_paths = [os.path.join(reference_image_directory, fname) for fname in os.listdir(reference_image_directory) if fname.endswith('.tif') and not fname.startswith('._')]
reference_features = []

for img_path in reference_image_paths:
    reference_features.append(extract_features(img_path, base_model))

reference_features = np.array(reference_features)

# Function to identify the closest matching reference image for a query image
def identify_image(query_img_path, model, reference_features, reference_image_paths):
    query_feature = extract_features(query_img_path, model)
    similarities = cosine_similarity([query_feature], reference_features)
    index = np.argmax(similarities)
    
    # Check if the highest similarity exceeds the threshold
    if similarities[0, index] >= COSINE_SIMILARITY_THRESHOLD:
        return reference_image_paths[index]
    else:
        return None  # Indicate no close match found

# Function to display the query image and the identified reference image side by side
def display_images(query_image_path, reference_image_path):
    query_img = cv2.imread(query_image_path, cv2.IMREAD_UNCHANGED)
    if reference_image_path:
        reference_img = cv2.imread(reference_image_path, cv2.IMREAD_UNCHANGED)

        # Convert images to RGB if they are grayscale
        if len(query_img.shape) == 2:  # Grayscale image, convert to RGB
            query_img = cv2.cvtColor(query_img, cv2.COLOR_GRAY2RGB)
        if len(reference_img.shape) == 2:  # Grayscale image, convert to RGB
            reference_img = cv2.cvtColor(reference_img, cv2.COLOR_GRAY2RGB)

        # Resize images to a common size for display
        query_img_resized = cv2.resize(query_img, (224, 224))
        reference_img_resized = cv2.resize(reference_img, (224, 224))

        # Display the images side by side
        plt.figure(figsize=(10, 5))
        plt.subplot(1, 2, 1)
        plt.imshow(cv2.cvtColor(query_img_resized, cv2.COLOR_BGR2RGB))
        plt.title("Query Image")
        plt.axis('off')

        plt.subplot(1, 2, 2)
        plt.imshow(cv2.cvtColor(reference_img_resized, cv2.COLOR_BGR2RGB))
        plt.title("Identified Reference Image")
        plt.axis('off')

        plt.show()
    else:
        # Display only the query image if no match found
        plt.figure(figsize=(5, 5))
        plt.imshow(cv2.cvtColor(query_img, cv2.COLOR_BGR2RGB))
        plt.title("Query Image (No Match Found)")
        plt.axis('off')
        plt.show()

# Function to log the results into a list for CSV writing
def log_result(query_img_name, identified_img_name):
    return [query_img_name, identified_img_name]

# Process all images in the query directory and store results for CSV
results = []
query_image_paths = [os.path.join(query_image_directory, fname) for fname in os.listdir(query_image_directory) if fname.endswith('.tiff') and not fname.startswith('._')]

for query_img_path in query_image_paths:
    identified_image = identify_image(query_img_path, base_model, reference_features, reference_image_paths)
    query_img_name = Path(query_img_path).stem
    identified_img_name = Path(identified_image).stem if identified_image else "No Match"
    
    # Log the result
    results.append(log_result(query_img_name, identified_img_name))
    
    # Display the images
    display_images(query_img_path, identified_image)
    print(f'The image {query_img_name} is identified as: {identified_img_name}')

# Write results to CSV file
with open(output_csv_path, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Original Hypercube Name", "Identified Reference Name"])
    writer.writerows(results)

print(f"Results saved to {output_csv_path}")

In [None]:
import pandas as pd

# Load the CSV file
input_csv_path = 'C:/Users/sc23krj/Desktop/Karan MSC Thesis/hypercube_identification.csv'
output_csv_path = 'C:/Users/sc23krj/Desktop/Karan MSC Thesis/filtered_hypercube_identification.csv'

# Load the CSV file into a pandas DataFrame
df = pd.read_csv(input_csv_path)
print("Original Dataframe ",df)

# Identify duplicates but keep the first occurrence
df_filtered = df.drop_duplicates(subset="Identified Reference Name", keep='first')
print("Filtered Dataframe",df_filtered)

# Save the filtered DataFrame to a new CSV file
df_filtered.to_csv(output_csv_path, index=False)

print(f"Filtered results saved to {output_csv_path}")

In [None]:
import os
import pandas as pd
from pathlib import Path
import shutil

# Paths to input and output directories
input_csv_path = 'C:/Users/sc23krj/Desktop/Karan MSC Thesis/filtered_hypercube_identification.csv'  
hypercube_directory = r"C:\Users\sc23krj\Desktop\Karan MSC Thesis\Extracted Hypercubes"  
output_directory = r"C:\Users\sc23krj\Desktop\Karan MSC Thesis\Renamed Hypercubes"  

# Ensure the output directory exists
Path(output_directory).mkdir(parents=True, exist_ok=True)

# Load the CSV file into a pandas DataFrame
df = pd.read_csv(input_csv_path)

# Iterate over each row in the DataFrame to copy and rename the hypercubes
for index, row in df.iterrows():
    # Remove '_image' from the hypercube name in the CSV to match the original file name
    original_hypercube_name = row['Original Hypercube Name'].replace('_image', '') + '.h5'
    identified_image_name = row['Identified Reference Name'] + '.h5'  # Append .h5 to match the file format

    # Define the full paths
    original_hypercube_path = Path(hypercube_directory) / original_hypercube_name
    new_hypercube_path = Path(output_directory) / identified_image_name

    # Check if the original hypercube file exists
    if original_hypercube_path.exists():
        # Copy and rename the hypercube file
        shutil.copy2(str(original_hypercube_path), str(new_hypercube_path))
        print(f"Copied and renamed {original_hypercube_name} to {identified_image_name}")
    else:
        print(f"File not found: {original_hypercube_path}")

print(f"Copying and renaming completed. Files saved to {output_directory}")