<a href="https://colab.research.google.com/github/avkaz/DeepLearningPetIdentification/blob/streamlit-ui/DatasetCreation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Initial notebook

### Getting data

In [1]:
## 1st -  Download utility.py file from github repository
## 2nd - Imports all functions from utility.py

import requests

# Correct raw URL for the utility.py file
url = "https://raw.githubusercontent.com/avkaz/DeepLearningPetIdentification/main/utility.py"

# Fetch and save the file locally
response = requests.get(url)
with open("utility.py", "wb") as f:
    f.write(response.content)


import utility
print("utility.py downloaded successfully.")



utility.py downloaded successfully.


In [2]:
data = utility.get_data()

In [3]:

first_3_pets = list(data.items())[:3]
first_3_pets

[('tanyny-chomutov-2024-12-21',
  {'Jméno': 'Tanyny',
   'Pohlaví': 'Samec',
   'Kraj': 'Ústecký',
   'Okres': 'Chomutov',
   'Plemeno': 'Kříženec',
   'Věk': '5 let',
   'Barva': 'Černá',
   'Velikost': 'Střední - 10-17kg',
   'url': 'https://www.psidetektiv.cz/zvire/tanyny-chomutov-2024-12-21',
   'images': ['https://www.psidetektiv.cz/data/catalog/big/2024/12/22/img190390.jpg',
    'https://www.psidetektiv.cz/data/catalog/big/2024/12/22/img190391.jpg',
    'https://www.psidetektiv.cz/data/catalog/big/2024/12/22/img190392.jpg',
    'https://www.psidetektiv.cz/data/catalog/big/2024/12/22/img190393.jpg',
    'https://www.psidetektiv.cz/data/catalog/big/2024/12/22/img190394.jpg']}),
 ('haily-tachov-2024-12-21',
  {'Jméno': 'Haily',
   'Pohlaví': 'Samice',
   'Kraj': 'Plzeňský',
   'Okres': 'Tachov',
   'Plemeno': 'Jezevčík',
   'Věk': '5 let',
   'Barva': 'Tmavý divočák',
   'Velikost': 'Střední - 10-17kg',
   'url': 'https://www.psidetektiv.cz/zvire/haily-tachov-2024-12-21',
   'images

In [10]:
print()




In [4]:
import requests
import json
import utility  # assuming the utility module is correct and available

def replace_urls_with_vectors(data, limit=None):
    """
    Replaces URLs with vectors for a limited number of pets in the dataset.

    Args:
        data (dict): The dictionary containing pet information.
        limit (int, optional): The maximum number of pets to process. If None, processes all pets.

    Returns:
        dict: The updated data with URLs replaced by vectors.
    """
    updated_data = {}
    pet_count = 0

    print(f"Starting to replace URLs with vectors... Limit: {limit if limit else 'No limit'}")

    # Iterate through the pets data
    for pet_key, pet_info in data.items():
        if limit and pet_count >= limit:
            print(f"Reached the limit of {limit} pets. Stopping processing.")
            break  # Stop if the limit is reached

        print(f"Processing pet {pet_count + 1}: {pet_key}...")

        # Check each URL in the pet's images
        updated_images = []
        for url in pet_info['images']:
            if not isinstance(url, str) or not url.startswith('http'):
                print(f"Skipping invalid URL: {url}")
                continue

            print(f"Downloading and processing image from: {url}")

            # Replace image URLs with vectors
            try:
                # Assuming utility.download_and_preprocess_image is a valid function to download and process the image
                image_vector = utility.download_and_preprocess_image(url, target_size=(224, 224))
                if image_vector is not None:
                    updated_images.append(image_vector)
                else:
                    print(f"Failed to process image from {url}. Skipping.")
            except Exception as e:
                print(f"Error processing image for {pet_key}: {e}")
                continue

        # Only update the images if there are valid processed vectors
        if updated_images:
            pet_info['images'] = updated_images
            print(f"Processed {len(updated_images)} images for {pet_key}.")
        else:
            print(f"No valid images processed for {pet_key}.")

        # Add updated pet info to the result
        updated_data[pet_key] = pet_info
        pet_count += 1

    print(f"Finished processing {pet_count} pets.")

    return updated_data


In [6]:
# Replace URLs with vectors in the pets data
updated_pets_data = replace_urls_with_vectors(data, 200)


Starting to replace URLs with vectors... Limit: 200
Processing pet 1: tanyny-chomutov-2024-12-21...
Downloading and processing image from: https://www.psidetektiv.cz/data/catalog/big/2024/12/22/img190390.jpg
Uploading model...
Model loaded successfully.
Downloading and processing image from: https://www.psidetektiv.cz/data/catalog/big/2024/12/22/img190391.jpg
Downloading and processing image from: https://www.psidetektiv.cz/data/catalog/big/2024/12/22/img190392.jpg
Downloading and processing image from: https://www.psidetektiv.cz/data/catalog/big/2024/12/22/img190393.jpg
Downloading and processing image from: https://www.psidetektiv.cz/data/catalog/big/2024/12/22/img190394.jpg
Processed 5 images for tanyny-chomutov-2024-12-21.
Processing pet 2: haily-tachov-2024-12-21...
Downloading and processing image from: https://www.psidetektiv.cz/data/catalog/big/2024/12/21/img190383.jpg
Downloading and processing image from: https://www.psidetektiv.cz/data/catalog/big/2024/12/21/img190384.jpg
Pr

In [7]:
import json
import numpy as np
import tensorflow as tf

def tensor_to_list(obj):
    """
    Recursively converts Tensor objects to lists for JSON serialization.

    Args:
        obj: The object to be converted.

    Returns:
        The object converted to a list if it's a Tensor, otherwise returns the object unchanged.
    """
    if isinstance(obj, tf.Tensor):
        return obj.numpy().tolist()  # Convert Tensor to numpy array and then to a list
    elif isinstance(obj, dict):
        return {key: tensor_to_list(value) for key, value in obj.items()}
    elif isinstance(obj, list):
        return [tensor_to_list(item) for item in obj]
    else:
        return obj

def save_dict_to_json(data_dict, file_name):
    """
    Saves a dictionary to a JSON file, handling Tensor objects.

    Args:
        data_dict (dict): The dictionary to be saved.
        file_name (str): The name of the output JSON file.

    Returns:
        None
    """
    try:
        # Convert any Tensor objects in the dictionary to lists
        data_dict = tensor_to_list(data_dict)

        with open(file_name, 'w', encoding='utf-8') as json_file:
            # Use json.dump to write the dictionary to the file with indentation for readability
            json.dump(data_dict, json_file, ensure_ascii=False, indent=4)
        print(f"Data successfully saved to {file_name}.")
    except Exception as e:
        print(f"Error saving data to JSON: {e}")




In [8]:
# Example usage:
save_dict_to_json(updated_pets_data, 'updated_data.json')


Data successfully saved to updated_data.json.
