In [20]:
import albumentations as A
import cv2
import numpy as np
import os
from PIL import Image
import imagehash
from imagehash import ImageHash
from helpers import resize_image_greyscale_save
from helpers import resize_image_save

import pandas as pd

In [21]:

def iterate_images_and_save_to_csv(folder_path, output_csv_path):
    data = []

    # Iterate through each folder within the main folder
    for subdir, _, files in os.walk(folder_path):
        for file in files:
            if file.endswith(('png', 'jpg', 'jpeg', 'bmp', 'gif')):
                image_path = os.path.join(subdir, file)
                image = Image.open(image_path)
                image = image.convert('L')
                phash16 = imagehash.phash(image, hash_size=16)
                phash32 = imagehash.phash(image, hash_size=32)
                phash64 = imagehash.phash(image, hash_size=64)
                phash128 = imagehash.phash(image, hash_size=128)
                phash256 = imagehash.phash(image, hash_size=256)
                filename = os.path.basename(image_path)
                data.append([filename, phash16, phash32, phash64, phash128, phash256])

                print(f'Computed phash for {file}')

    # Create a pandas DataFrame
    df = pd.DataFrame(data, columns=['Filename', 'Phash16', 'Phash32', 'Phash64', 'Phash128', 'Phash256'])

    # Save DataFrame to CSV
    df.to_csv(output_csv_path, index=False)

In [22]:
folder_path = 'pokemon-tcg-images'
output_csv_path = 'pokemon-tcg-image-data.csv'
# iterate_images_and_save_to_csv(folder_path, output_csv_path)

In [36]:
hash_size = 16

charizard_image = Image.open('pokemon-tcg-images/base1-4/base1-4.png')
charizard_image_real = Image.open('pokemon-tcg-test-images/basesetcharizard.png')
alakazam_image = Image.open('pokemon-tcg-images/base1-1/base1-1.png')
charizardxy_image_real = Image.open('pokemon-tcg-test-images/xycharizard_resized.png')
groudon_image_real = Image.open('pokemon-tcg-test-images/groudonparadoxrift.png').resize((240, 330)).convert('L')

charizard_image_grey = charizard_image.convert('L')
charizard_image_real_grey = charizard_image_real.convert('L')
alakazam_image = alakazam_image.convert('L')
charizardxy_image_real_grey = charizardxy_image_real.convert('L')

# resize_image_save('pokemon-tcg-test-images/metagross.png', 'metagross')
# resize_image_greyscale_save('pokemon-tcg-test-images/metagross.png', 'metagross')
phash = imagehash.phash(Image.open('pokemon-tcg-test-images/rayquazavmax.png').resize((240, 330)).convert('L'), hash_size=hash_size)


In [37]:
df = pd.read_csv(output_csv_path)

distances = []

filenames = df['Filename'].tolist()
phashes = df[f'Phash{hash_size}'].tolist()


for i in range(len(phashes)):
    distance = phash - imagehash.hex_to_hash(phashes[i])
    distances.append((filenames[i], distance))

distances.sort(key=lambda x: x[1])

print('Top 5 closest images:')
for (filename, distance) in distances[:5]:
    print(f'{filename} with distance {distance}')
    image_path = os.path.join('pokemon-tcg-images', filename.removesuffix('.png'), filename)
    read_image = cv2.imread(image_path)

#     if read_image is not None:
#         cv2.imshow("file", read_image)

# cv2.waitKey(0)
# cv2.destroyAllWindows()

Top 5 closest images:
swsh7-218.png with distance 86
ecard3-24.png with distance 92
swsh6-113.png with distance 96
sm10-120.png with distance 98
swsh12tg-TG19.png with distance 98
