# Libraries

In [81]:
from collections import Counter
import json
import os

import tensorflow as tf
from tensorflow import keras
from keras.models import Model
from keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
from keras.layers import Input, GlobalAveragePooling2D, Dense
from keras.preprocessing import image
import numpy as np
import pandas as pd

import openai

# Functions

In [82]:
# Load pre-trained ResNet50 model + higher level layers
model = ResNet50(weights='imagenet')
# input_shape = (224, 224, 3)
# num_classes = 5
# model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
# x = model.output
# x = GlobalAveragePooling2D()(x)
# x = Dense(1024, activation='relu')(x)
# predictions = Dense(num_classes, activation='softmax', name='custom_output')(x)
# model = Model(inputs=model.input, outputs=predictions)
# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

def classify_cat_species(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    
    # Convert image to array and preprocess
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)

    # Run prediction for top 3 classes
    preds = model.predict(x)
    decoded = decode_predictions(preds, top=3)[0]

    # Filter predictions to only those related to cats
    cat_breeds = [label for label in decoded] # [label for label in decoded if 'cat' in label[1] or 'Egyptian' in label[1] or 'Persian' in label[1]]
    
    # print("Top predictions:")
    # for pred in cat_breeds:
    #     print(f"{pred[1]}: {round(pred[2]*100, 2)}%")

    return [cat_breeds[0][1], cat_breeds[0][2]]


# client = openai.OpenAI()
# def is_same_species(cat1, cat2):
#     response = client.responses.create(
#         model="gpt-4o-mini",
#         input=f"Looking past syntax differences, are these two cat species the same? '{cat1}' and '{cat2}'. Respond with 'yes' or 'no'"
#     )
#     return response.output_text
# Load a pre-trained model (this requires downloading a model such as GoogleNews-vectors)
# similarity = similarity_model.similarity("house", "home")
# print("Cosine similarity using Gensim:", similarity)

# print(is_same_species("Siamese cat", "Bengal cat"))
# print(is_same_species("egypt mau", "Egyptian_mau"))

# Analyze

In [83]:
# get samples
NUM_SAMPLES = 10
IMG_PATH = "./postprocessed_data/labeled_imgs"
samples = {}
for entry in os.listdir(IMG_PATH):
    formatted_entry = entry.replace("_", " ")
    samples[formatted_entry] = []
    species_path = os.path.join(IMG_PATH, entry)
    if os.path.isdir(species_path):
        file_list = os.listdir(species_path)
        for file in file_list:
            if file.endswith('.jpg') and len(samples[formatted_entry]) < NUM_SAMPLES:
                samples[formatted_entry].append(os.path.join(species_path, file))

# print(json.dumps(samples, indent=4))

# test for accuracy
SIMILARITY_EMBEDDINGS = "raw_data/glove.6B.300d.txt"
WORD2VEC_OUTPUT_FILE = "raw_data/glove.6B.300d.word2vec.txt"
# glove2word2vec(SIMILARITY_EMBEDDINGS, WORD2VEC_OUTPUT_FILE)
# similarity_model = KeyedVectors.load_word2vec_format(WORD2VEC_OUTPUT_FILE, binary=False)
all_results = []
for species, img_list in samples.items():
    print(f"Species: {species}")
    results = [(classify_cat_species(x))[0] for x in img_list]
    print(results)

    all_results.extend(results)

print(Counter(all_results).most_common())
    # print([similarity_model.similarity(species, x) for x in results])
# classify_cat_species(img_path)

Species: American Curl
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 857ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
['tabby', 'Egyptian_cat', 'three-toed_sloth', 'koala', 'tabby', 'tiger_cat', 'tabby', 'tub', 'borzoi', 'tabby']
Species: Chausie
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m