In [20]:
import numpy as np
import pandas as pd
import sys
import os
import json
import tensorflow as tf

### Load Data

In [39]:
HOME = os.getcwd()
IMAGE_DATA_PATH = f"{HOME}/Data/Weather_Dataset"
IMAGE_TENSOR_PATH = f"{HOME}/Data/Image_Tensors"

def load_jpg_to_tensor(image_filepath):
    """
    Reads a JPG image and converts it to a TensorFlow tensor.

    Args:
        image_path (str): Path to the JPG image file, relative to IMAGE_DATA_PATH

    Returns:
        A TensorFlow tensor representing the image.
    """
    # Read the image file
    absolute_image_filepath = os.sep.join((IMAGE_DATA_PATH, image_filepath))
    image_string = tf.io.read_file(absolute_image_filepath)
    
    # Decode the JPG image to a tensor
    try:
        image_tensor = tf.image.decode_image(image_string, channels=3) 
    except tf.errors.InvalidArgumentError:
        print(f"Invalid image file: {absolute_image_filepath}")
        return None
    
    return image_tensor

def serialize_image_tensor(image_filepath):
    """
    Converts image to tensor, serializes tensor, and saves it to a binary file.

    Args:
        image_filenpath (str): Path to the JPG image file, relative to IMAGE_DATA_PATH
    """
    tensor = load_jpg_to_tensor(image_filepath)
    if tensor.ndim == 4:
        # get list of tensors from GIF
        tensors = [tensor for tensor in tensor]
    else:
        tensors = [tensor]

    for i, tensor in enumerate(tensors):
        
        serialized_tensor = tf.io.serialize_tensor(tensor)

        image_filepath_no_ext = image_filepath.split(".")[0]
        if len(tensors) > 1:
            image_filepath_no_ext = f"{image_filepath_no_ext}_{i+1}"

        serialized_tensor_filepath = os.sep.join((IMAGE_TENSOR_PATH, f"{image_filepath_no_ext}.bin"))

        with open(serialized_tensor_filepath, "wb") as f:
            f.write(serialized_tensor.numpy())

    return None

def deserialize_image_tensor(image_filepath):
    """
    Reads serialized tensor binary file associated with image,
    deserializes the tensor, and returns it.

    Args:
        image_filepath (str): Path to the JPG image file, relative to IMAGE_DATA_PATH

    Returns:
        tf.Tensor: Original Image Tensor
    """
    image_filepath_no_ext = image_filepath.split(".")[0]
    serialized_tensor_filepath = os.sep.join((IMAGE_TENSOR_PATH, f"{image_filepath_no_ext}.bin"))

    with open(serialized_tensor_filepath, "rb") as f:
        deserialized_tensor = tf.io.parse_tensor(f.read(), out_type=tf.uint8)
        
    return deserialized_tensor

In [42]:
def serialize_dataset():
    """
    Serializes all images in the dataset and saves them to binary files.
    """
    if not os.path.exists(IMAGE_TENSOR_PATH):
        os.makedirs(IMAGE_TENSOR_PATH)

    for dirpath, dirnames, filenames in os.walk(IMAGE_DATA_PATH):
        for dirname in dirnames:
            class_path = os.path.join(dirpath, dirname)
            class_tensor_path = os.path.join(IMAGE_TENSOR_PATH, dirname)
            if not os.path.exists(class_tensor_path):
                os.makedirs(class_path)
            for class_dirpath, class_dirnames, class_filenames in os.walk(class_path):
                for class_filename in class_filenames:
                    if class_filename.endswith('.jpg'):
                        jpg_file_path = os.path.join(dirname, class_filename)
                        serialize_image_tensor(jpg_file_path)
    return None
                    
serialize_dataset()                      



Deleted 0 images.
