<a href="https://colab.research.google.com/github/avkaz/DeepLearningPetIdentification/blob/fixes/finetuned.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Downloading utility file and importing need dependesies

In [36]:

import requests
# Correct raw URL for the utility.py file
url = "https://raw.githubusercontent.com/avkaz/DeepLearningPetIdentification/main/utility.py"

# Fetch and save the file locally
response = requests.get(url)
with open("utility.py", "wb") as f:
    f.write(response.content)



In [None]:
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import io
import requests
import json
import pandas as pd
import faiss
import random
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from google.colab import files
import utility
from itertools import islice


## Getting Data

In [76]:
google_drive_url = "https://drive.google.com/file/d/1VR5GWGrVjEtJHEzTPIB-EHDQMG3UnmZ9/view?usp=sharing"
utility.download_file_from_google_drive(google_drive_url, "./data/pets_pair.json")

In [2]:
data_in = utility.load_json_and_transform_lists_to_tensors('updated_data.json')

Data successfully loaded and transformed.


In [77]:
data = dict(list(data_in.items())[:200])

## Defining base model

In [78]:
model = EfficientNetB0(weights='imagenet', include_top=False, pooling='avg')

In [79]:
data= filter_pets_by_images(data_in)

In [80]:
print(dict(list(data_in.items())[:1]))

{'tanyny-chomutov-2024-12-21': {'Jméno': 'Tanyny', 'Pohlaví': 'Samec', 'Kraj': 'Ústecký', 'Okres': 'Chomutov', 'Plemeno': 'Kříženec', 'Věk': '5 let', 'Barva': 'Černá', 'Velikost': 'Střední - 10-17kg', 'url': 'https://www.psidetektiv.cz/zvire/tanyny-chomutov-2024-12-21', 'images': <tf.Tensor: shape=(5, 224, 224, 3), dtype=float32, numpy=
array([[[[0.53333336, 0.4117647 , 0.3019608 ],
         [0.5294118 , 0.40784314, 0.29411766],
         [0.54901963, 0.42745098, 0.3137255 ],
         ...,
         [0.28235295, 0.22352941, 0.14901961],
         [0.2784314 , 0.21960784, 0.14509805],
         [0.27450982, 0.21568628, 0.13333334]],

        [[0.5411765 , 0.41960785, 0.30980393],
         [0.56078434, 0.4392157 , 0.3254902 ],
         [0.5568628 , 0.43529412, 0.3137255 ],
         ...,
         [0.30588236, 0.25490198, 0.18039216],
         [0.29803923, 0.24705882, 0.17254902],
         [0.29803923, 0.24705882, 0.17254902]],

        [[0.5176471 , 0.40392157, 0.2784314 ],
         [0.541176

## Generatin embeding using base model to simplify and speed up learning for future model

In [81]:
# create a mapping of pet identifiers to integer indices
pet_ids = list(data.keys())
pet_to_idx = {pet_id: idx for idx, pet_id in enumerate(pet_ids)}

# generate embeddings for all pets and store them
embeddings_dict = {
    pet_key: generate_embeddings(model, pet_info['images'])
    for pet_key, pet_info in data.items()
}




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 895ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 111ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6

## Generating triplets for fine tuning

In [82]:
def generate_triplet(pet_key, pet_info, embeddings_dict, data):
    embeddings = embeddings_dict[pet_key]  # use pre-generated embeddings

    # create triplets: anchor, positive, and negative
    for i in range(len(embeddings)):
        anchor = embeddings[i]

        # pick a random positive example from the same pet's observation, excluding the anchor image
        positive_idx = np.random.choice([idx for idx in range(len(embeddings)) if idx != i ])
        positive = embeddings[positive_idx]

        # pick a random negative example (different pet)
        negative_pet_key = np.random.choice([key for key in data if key != pet_key])
        negative_embedding = embeddings_dict[negative_pet_key][0]
        #print(i)
        #print(positive_idx)
        #print(negative_pet_key)
        yield [anchor, positive, negative_embedding], pet_to_idx[pet_key]
    #print(embeddings_dict[pet_key])



In [83]:

# triplet training
triplets = []
labels = []

for pet_key, pet_info in data.items():
    for triplet, label in generate_triplet(pet_key, pet_info, embeddings_dict, data):
        triplets.append(triplet)
        labels.append(label)

# Convert triplets to numpy array
triplets = np.array(triplets)

# Flatten the embeddings for FAISS
flattened_triplets = np.array([
    np.concatenate([anchor.flatten(), positive.flatten(), negative.flatten()])
    for anchor, positive, negative in triplets
])


In [None]:
## Fine tuning the model

In [84]:


# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(flattened_triplets, labels, test_size=0.2, random_state=42)

# Initialize FAISS index for similarity search
embedding_dim = X_train.shape[1]  # The size of the flattened embeddings
index = faiss.IndexFlatL2(embedding_dim)  # L2 distance metric for similarity search

# Add training data embeddings to the FAISS index
index.add(X_train)  # No need to reshape, as X_train is already flat


input_dim = 3840

# Create the model
model = create_model(input_dim)

# Compile and train the model
model.compile(optimizer='adam', loss=lambda y_true, y_pred: triplet_loss(y_true, y_pred, margin=1.0))
model.fit(X_train, np.array(y_train), epochs=60, batch_size=32)

Epoch 1/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 1.0000  
Epoch 2/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 1.0000
Epoch 3/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 1.0000
Epoch 4/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.0000 
Epoch 5/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 1.0000 
Epoch 6/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.0000 
Epoch 7/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.0000 
Epoch 8/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.0000 
Epoch 9/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 1.0000
Epoch 10/60
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 1.0000 
E

<keras.src.callbacks.history.History at 0x333b04430>

## Evaluation

In [85]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Make predictions on the test set
y_pred = model.predict(X_test)

# Convert continuous predictions to discrete labels (if it's multiclass classification)
y_pred_labels = np.argmax(y_pred, axis=1)  # For multiclass, you need to take the class with the highest probability

# Calculate precision, recall, and F1-score with multiclass average
precision = precision_score(y_test, y_pred_labels, average='weighted') 
recall = recall_score(y_test, y_pred_labels, average='weighted')
f1 = f1_score(y_test, y_pred_labels, average='weighted')

# Print the metrics
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-score: {f1:.4f}')


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
Precision: 0.0006
Recall: 0.0247
F1-score: 0.0012


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
