# Model to recognise hand-written text

pip install tensorflow

pip install stow

pip install mltu==0.1.5

pip install --upgrade protobuf

In [6]:
# Libaries
import tensorflow as tf
import stow
import tarfile
from tqdm import tqdm
from urllib.request import urlopen
from io import BytesIO
from zipfile import ZipFile
import os

# Data collection

In [8]:
dataset_path = r'C:\Users\ljant\Downloads\IAM_Words\IAM_Words'

# Initialize the dataset list and vocabulary set
dataset = []
vocab = set()
max_len = 0

# Path to the words.txt file
words_file_path = os.path.join(dataset_path, "words.txt")

# Reading lines from words.txt
with open(words_file_path, "r") as file:
    words = file.readlines()

for line in tqdm(words):
    if line.startswith("#"):
        continue

    line_split = line.split(" ")
    if line_split[1] == "err":
        continue

    folder1 = line_split[0][:3]
    folder2 = line_split[0][:8]
    file_name = line_split[0] + ".png"
    label = line_split[-1].rstrip('\n')

    # Constructing the relative path to the image
    rel_path = os.path.join(dataset_path, "words", folder1, folder2, file_name)

    # Check if the image file exists
    if not os.path.exists(rel_path):
        continue

    # Append the relative path and label to the dataset list
    dataset.append([rel_path, label])
    
    # Update the vocabulary set with characters from the label
    vocab.update(list(label))
    
    # Update the maximum label length
    max_len = max(max_len, len(label))

# Now, `dataset` is a list of [image_path, label] and `vocab` contains all unique characters in the labels.


100%|██████████| 115338/115338 [00:16<00:00, 7042.14it/s] 


In [11]:
%%writefile configs.py
import os
from datetime import datetime

from mltu.configs import BaseModelConfigs

class ModelConfigs(BaseModelConfigs):
    def __init__(self):
        super().__init__()
        self.model_path = os.path.join(r"C:\Users\ljant\Desktop\Ironhack\Projects\Final-Project-Ironhack-2024", datetime.strftime(datetime.now(), "%Y%m%d%H%M"))
        self.vocab = ""
        self.height = 32
        self.width = 128
        self.max_text_length = 0
        self.batch_size = 16
        self.learning_rate = 0.0005
        self.train_epochs = 1000
        self.train_workers = 20

Overwriting configs.py


In [12]:
from configs import ModelConfigs

In [13]:
configs = ModelConfigs()

configs.vocab = "".join(vocab)
configs.max_text_length = max_len
configs.save()

# Create a data provider for the dataset

In [18]:
%%writefile augmentors.py
import cv2
import typing
import numpy as np
import logging

from . import Image

""" Implemented augmentors:
- RandomBrightness
- RandomRotate
- RandomErodeDilate
- RandomSharpen
- RandomGaussianBlur
- RandomSaltAndPepper
"""


def randomness_decorator(func):
    """ Decorator for randomness """
    def wrapper(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image, typing.Any]:
        """ Decorator for randomness and type checking

        Args:
            image (Image): Image to be adjusted
            annotation (typing.Any): Annotation to be adjusted

        Returns:
            image (Image): Adjusted image
            annotation (typing.Any): Adjusted annotation
        """
        # check if image is Image object
        if not isinstance(image, Image):
            self.logger.error(f"image must be Image object, not {type(image)}, skipping augmentor")
            return image, annotation

        if np.random.rand() > self._random_chance:
            return image, annotation

        # return result of function
        return func(self, image, annotation)

    return wrapper


class Augmentor:
    """ Object that should be inherited by all augmentors

    Args:
        random_chance (float, optional): Chance of applying the augmentor. Where 0.0 is never and 1.0 is always. Defaults to 0.5.
        log_level (int, optional): Log level for the augmentor. Defaults to logging.INFO.
    """
    def __init__(self, random_chance: float=0.5, log_level: int = logging.INFO) -> None:
        self._random_chance = random_chance
        self._log_level = log_level

        self.logger = logging.getLogger(self.__class__.__name__)
        self.logger.setLevel(logging.INFO)

        assert 0 <= self._random_chance <= 1.0, "random chance must be between 0.0 and 1.0"

    @randomness_decorator
    def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image, typing.Any]:
        # do the augmentation here
        return image, annotation


class RandomBrightness(Augmentor):
    """ Randomly adjust image brightness """
    def __init__(
        self, 
        random_chance: float = 0.5,
        delta: int = 100,
        log_level: int = logging.INFO,
        ) -> None:
        """ Randomly adjust image brightness

        Args:
            random_chance (float, optional): Chance of applying the augmentor. Where 0.0 is never and 1.0 is always. Defaults to 0.5.
            delta (int, optional): Integer value for brightness adjustment. Defaults to 100.
            log_level (int, optional): Log level for the augmentor. Defaults to logging.INFO.
        """
        super(RandomBrightness, self).__init__(random_chance, log_level)

        assert 0 <= delta <= 255.0, "Delta must be between 0.0 and 255.0"

        self._delta = delta

    @randomness_decorator
    def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image, typing.Any]:
        """ Randomly adjust image brightness

        Args:
            image (Image): Image to be adjusted
            annotation (typing.Any): Annotation to be adjusted

        Returns:
            image (Image): Adjusted image
            annotation (typing.Any): Adjusted annotation if necessary
        """
        value = 1 + np.random.uniform(-self._delta, self._delta) / 255

        hsv = np.array(image.HSV(), dtype = np.float32)

        hsv[:, :, 1] = hsv[:, :, 1] * value
        hsv[:, :, 2] = hsv[:, :, 2] * value

        hsv = np.uint8(np.clip(hsv, 0, 255))

        img = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)

        image.update(img)

        return image, annotation


class RandomRotate(Augmentor):
    """ Randomly rotate image"""
    def __init__(
        self, 
        random_chance: float = 0.5,
        angle: typing.Union[int, typing.List]=30, 
        borderValue: typing.Tuple[int, int, int]=None,
        log_level: int = logging.INFO,
        ) -> None:
        """ Randomly rotate image 

        Args:
            random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
            angle (int, list): Integer value or list of integer values for image rotation
            borderValue (tuple): Tuple of 3 integers, setting border color for image rotation
            log_level (int): Log level for the augmentor. Defaults to logging.INFO.
        """
        super(RandomRotate, self).__init__(random_chance, log_level)

        self._angle = angle
        self._borderValue = borderValue

    @randomness_decorator
    def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image, typing.Any]:
        """ Randomly rotate image

        Args:
            image (Image): Image to be adjusted
            annotation (typing.Any): Annotation to be adjusted

        Returns:
            image (Image): Adjusted image
            annotation (typing.Any): Adjusted annotation
        """
        # check if angle is list of angles or signle angle value
        if isinstance(self._angle, list):
            angle = float(np.random.choice(self._angle))
        else:
            angle = float(np.random.uniform(-self._angle, self._angle))

        # generate random border color
        borderValue = np.random.randint(0, 255, 3) if self._borderValue is None else self._borderValue
        borderValue = [int(v) for v in borderValue]

        # grab the dimensions of the image and then determine the centre
        center_x, center_y = image.center

        # grab the rotation matrix (applying the negative of the
        # angle to rotate clockwise), then grab the sine and cosine
        # (i.e., the rotation components of the matrix)
        M = cv2.getRotationMatrix2D((center_x, center_y), angle, 1.0)
        cos = np.abs(M[0, 0])
        sin = np.abs(M[0, 1])

        # compute the new bounding dimensions of the image
        nW = int((image.height * sin) + (image.width * cos))
        nH = int((image.height * cos) + (image.width * sin))

        # adjust the rotation matrix to take into account translation
        M[0, 2] += (nW / 2) - center_x
        M[1, 2] += (nH / 2) - center_y

        # perform the actual rotation and return the image
        img = cv2.warpAffine(image.numpy(), M, (nW, nH), borderValue=borderValue)
        image.update(img)

        return image, annotation


class RandomErodeDilate(Augmentor):
    """ Randomly erode and dilate image"""
    def __init__(
        self, 
        random_chance: float = 0.5,
        kernel_size: typing.Tuple[int, int]=(1, 1), 
        log_level: int = logging.INFO,
        ) -> None:
        """ Randomly erode and dilate image
        
        Args:
            random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
            kernel_size (tuple): Tuple of 2 integers, setting kernel size for erosion and dilation
            log_level (int): Log level for the augmentor. Defaults to logging.INFO.
        """
        super(RandomErodeDilate, self).__init__(random_chance, log_level)
        self._kernel_size = kernel_size

    @randomness_decorator
    def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image, typing.Any]:
        """ Randomly erode and dilate image

        Args:
            image (Image): Image to be eroded and dilated
            annotation (typing.Any): Annotation to be adjusted

        Returns:
            image (Image): Eroded and dilated image
            annotation (typing.Any): Adjusted annotation if necessary
        """
        kernel = np.ones(self._kernel_size, np.uint8)

        if np.random.rand() <= 0.5:
            img = cv2.erode(image.numpy(), kernel, iterations=1)
        else:
            img = cv2.dilate(image.numpy(), kernel, iterations=1)

        image.update(img)

        return image, annotation


class RandomSharpen(Augmentor):
    """ Randomly sharpen image"""
    def __init__(
        self, 
        random_chance: float = 0.5,
        alpha: float = 0.25,
        lightness_range: typing.Tuple = (0.75, 2.0),
        kernel: np.ndarray = None,
        kernel_anchor: np.ndarray = None,
        log_level: int = logging.INFO,
        ) -> None:
        """ Randomly sharpen image
        
        Args:
            random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
            alpha (float): Float between 0.0 and 1.0 setting bounds for random probability
            lightness_range (tuple): Tuple of 2 floats, setting bounds for random lightness change
            kernel (np.ndarray): Numpy array of kernel for image convolution
            kernel_anchor (np.ndarray): Numpy array of kernel anchor for image convolution
            log_level (int): Log level for the augmentor. Defaults to logging.INFO.
        """
        super(RandomSharpen, self).__init__(random_chance, log_level)

        self._alpha_range = (alpha, 1.0)
        self._ligtness_range = lightness_range
        self._lightness_anchor = 8

        self._kernel = np.array([[-1, -1, -1], [-1,  1, -1], [-1, -1, -1]], dtype=np.float32) if kernel is None else kernel
        self._kernel_anchor = np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]], dtype=np.float32) if kernel_anchor is None else kernel_anchor

        assert 0 <= alpha <= 1.0, "Alpha must be between 0.0 and 1.0"

    @randomness_decorator
    def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image, typing.Any]:
        """ Randomly sharpen image

        Args:
            image (Image): Image to be sharpened
            annotation (typing.Any): Annotation to be adjusted

        Returns:
            image (Image): Sharpened image
            annotation (typing.Any): Adjusted annotation if necessary
        """
        lightness = np.random.uniform(*self._ligtness_range)
        alpha = np.random.uniform(*self._alpha_range)

        kernel = self._kernel_anchor  * (self._lightness_anchor + lightness) + self._kernel
        kernel -= self._kernel_anchor
        kernel = (1 - alpha) * self._kernel_anchor + alpha * kernel

        # Apply sharpening to each channel
        r, g, b = cv2.split(image.numpy())
        r_sharp = cv2.filter2D(r, -1, kernel)
        g_sharp = cv2.filter2D(g, -1, kernel)
        b_sharp = cv2.filter2D(b, -1, kernel)

        # Merge the sharpened channels back into the original image
        image.update(cv2.merge([r_sharp, g_sharp, b_sharp]))

        return image, annotation
    

class RandomGaussianBlur(Augmentor):
    """ Randomly erode and dilate image"""
    def __init__(
        self, 
        random_chance: float = 0.5,
        log_level: int = logging.INFO,
        sigma: typing.Union[int, float] = 0.5,
        ) -> None:
        """ Randomly erode and dilate image
        
        Args:
            random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
            log_level (int): Log level for the augmentor. Defaults to logging.INFO.
            sigma (int, float): standard deviation of the Gaussian kernel
        """
        super(RandomGaussianBlur, self).__init__(random_chance, log_level)
        self.sigma = sigma

    @randomness_decorator
    def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image, typing.Any]:
        """ Randomly blurs an image with a Gaussian filter

        Args:
            image (Image): Image to be blurred
            annotation (typing.Any): Annotation to be blurred

        Returns:
            image (Image): Blurred image
            annotation (typing.Any): Blurred annotation if necessary
        """
        img = cv2.GaussianBlur(image.numpy(), (0, 0), self.sigma)

        image.update(img)

        return image, annotation
    

class RandomSaltAndPepper(Augmentor):
    """ Randomly add Salt and Pepper noise to image"""
    def __init__(
        self, 
        random_chance: float = 0.5,
        log_level: int = logging.INFO,
        salt_vs_pepper: float = 0.5,
        amount: float = 0.1,
        ) -> None:
        """ Randomly add Salt and Pepper noise to image
        
        Args:
            random_chance (float): Float between 0.0 and 1.0 setting bounds for random probability. Defaults to 0.5.
            log_level (int): Log level for the augmentor. Defaults to logging.INFO.
            salt_vs_pepper (float): ratio of salt vs pepper. Defaults to 0.5.
            amount (float): proportion of the image to be salted and peppered. Defaults to 0.1.
        """
        super(RandomSaltAndPepper, self).__init__(random_chance, log_level)
        self.salt_vs_pepper = salt_vs_pepper
        self.amount = amount
        
        assert 0 <= salt_vs_pepper <= 1.0, "salt_vs_pepper must be between 0.0 and 1.0"
        assert 0 <= amount <= 1.0, "amount must be between 0.0 and 1.0"

    @randomness_decorator
    def __call__(self, image: Image, annotation: typing.Any) -> typing.Tuple[Image, typing.Any]:
        """ Randomly add salt and pepper noise to an image

        Args:
            image (Image): Image to be noised
            annotation (typing.Any): Annotation to be noised

        Returns:
            image (Image): Noised image
            annotation (typing.Any): Noised annotation if necessary
        """
        img = image.numpy()
        height, width, channels = img.shape

        # Salt mode
        num_salt = int(self.amount * height * width * self.salt_vs_pepper)
        row_coords = np.random.randint(0, height, size=num_salt)
        col_coords = np.random.randint(0, width, size=num_salt)
        img[row_coords, col_coords, :] = [255, 255, channels]

        # Pepper mode
        num_pepper = int(self.amount * height * width * (1.0 - self.salt_vs_pepper))
        row_coords = np.random.randint(0, height, size=num_pepper)
        col_coords = np.random.randint(0, width, size=num_pepper)
        img[row_coords, col_coords, :] = [0, 0, channels]

        image.update(img)

        return image, annotation

Writing augmentors.py


In [21]:
%%writefile dataProvider.py
import os
import copy
import typing
import numpy as np
import pandas as pd
from tqdm import tqdm

from augmentors import Augmentor
from transformers import Transformer

import logging
logging.basicConfig(format="%(asctime)s %(levelname)s %(name)s: %(message)s")


class DataProvider:
    def __init__(
            self,
            dataset: typing.Union[str, list, pd.DataFrame],
            data_preprocessors: typing.List[typing.Callable] = None,
            batch_size: int = 4,
            shuffle: bool = True,
            initial_epoch: int = 1,
            augmentors: typing.List[Augmentor] = None,
            transformers: typing.List[Transformer] = None,
            skip_validation: bool = True,
            limit: int = None,
            use_cache: bool = False,
            log_level: int = logging.INFO,
    ) -> None:
        """ Standardised object for providing data to a model while training.

        Attributes:
            dataset (str, list, pd.DataFrame): Path to dataset, list of data or pandas dataframe of data.
            data_preprocessors (list): List of data preprocessors. (e.g. [read image, read audio, etc.])
            batch_size (int): The number of samples to include in each batch. Defaults to 4.
            shuffle (bool): Whether to shuffle the data. Defaults to True.
            initial_epoch (int): The initial epoch. Defaults to 1.
            augmentors (list, optional): List of augmentor functions. Defaults to None.
            transformers (list, optional): List of transformer functions. Defaults to None.
            skip_validation (bool, optional): Whether to skip validation. Defaults to True.
            limit (int, optional): Limit the number of samples in the dataset. Defaults to None.
            use_cache (bool, optional): Whether to cache the dataset. Defaults to False.
            log_level (int, optional): The log level. Defaults to logging.INFO.
        """
        self._dataset = dataset
        self._data_preprocessors = [] if data_preprocessors is None else data_preprocessors
        self._batch_size = batch_size
        self._shuffle = shuffle
        self._epoch = initial_epoch
        self._augmentors = [] if augmentors is None else augmentors
        self._transformers = [] if transformers is None else transformers
        self._skip_validation = skip_validation
        self._limit = limit
        self._use_cache = use_cache
        self._step = 0
        self._cache = {}
        self._on_epoch_end_remove = []

        self.logger = logging.getLogger(self.__class__.__name__)
        self.logger.setLevel(log_level)

        # Validate dataset
        if not skip_validation:
            self._dataset = self.validate(dataset)
        else:
            self.logger.info("Skipping Dataset validation...")

        if limit:
            self.logger.info(f"Limiting dataset to {limit} samples.")
            self._dataset = self._dataset[:limit]

    def __len__(self):
        """ Denotes the number of batches per epoch """
        return int(np.ceil(len(self._dataset) / self._batch_size))

    @property
    def augmentors(self) -> typing.List[Augmentor]:
        """ Return augmentors """
        return self._augmentors

    @augmentors.setter
    def augmentors(self, augmentors: typing.List[Augmentor]):
        """ Decorator for adding augmentors to the DataProvider """
        for augmentor in augmentors:
            if isinstance(augmentor, Augmentor):
                if self._augmentors is not None:
                    self._augmentors.append(augmentor)
                else:
                    self._augmentors = [augmentor]

            else:
                self.logger.warning(f"Augmentor {augmentor} is not an instance of Augmentor.")

    @property
    def transformers(self) -> typing.List[Transformer]:
        """ Return transformers """
        return self._transformers

    @transformers.setter
    def transformers(self, transformers: typing.List[Transformer]):
        """ Decorator for adding transformers to the DataProvider """
        for transformer in transformers:
            if isinstance(transformer, Transformer):
                if self._transformers is not None:
                    self._transformers.append(transformer)
                else:
                    self._transformers = [transformer]

            else:
                self.logger.warning(f"Transformer {transformer} is not an instance of Transformer.")

    @property
    def epoch(self) -> int:
        """ Return Current Epoch"""
        return self._epoch

    @property
    def step(self) -> int:
        """ Return Current Step"""
        return self._step

    def on_epoch_end(self):
        """ Shuffle training dataset and increment epoch counter at the end of each epoch. """
        self._epoch += 1
        if self._shuffle:
            np.random.shuffle(self._dataset)

        # Remove any samples that were marked for removal
        for remove in self._on_epoch_end_remove:
            self.logger.warning(f"Removing {remove} from dataset.")
            self._dataset.remove(remove)
        self._on_epoch_end_remove = []

    def validate_list_dataset(self, dataset: list) -> list:
        """ Validate a list dataset """
        validated_data = [data for data in tqdm(dataset, desc="Validating Dataset") if os.path.exists(data[0])]
        if not validated_data:
            raise FileNotFoundError("No valid data found in dataset.")

        return validated_data

    def validate(self, dataset: typing.Union[str, list, pd.DataFrame]) -> typing.Union[list, str]:
        """ Validate the dataset and return the dataset """

        if isinstance(dataset, str):
            if os.path.exists(dataset):
                return dataset
        elif isinstance(dataset, list):
            return self.validate_list_dataset(dataset)
        elif isinstance(dataset, pd.DataFrame):
            return self.validate_list_dataset(dataset.values.tolist())
        else:
            raise TypeError("Dataset must be a path, list or pandas dataframe.")

    def split(self, split: float = 0.9, shuffle: bool = True) -> typing.Tuple[typing.Any, typing.Any]:
        """ Split current data provider into training and validation data providers. 
        
        Args:
            split (float, optional): The split ratio. Defaults to 0.9.
            shuffle (bool, optional): Whether to shuffle the dataset. Defaults to True.

        Returns:
            train_data_provider (tf.keras.utils.Sequence): The training data provider.
            val_data_provider (tf.keras.utils.Sequence): The validation data provider.
        """
        if shuffle:
            np.random.shuffle(self._dataset)
            
        train_data_provider, val_data_provider = copy.deepcopy(self), copy.deepcopy(self)
        train_data_provider._dataset = self._dataset[:int(len(self._dataset) * split)]
        val_data_provider._dataset = self._dataset[int(len(self._dataset) * split):]

        return train_data_provider, val_data_provider

    def to_csv(self, path: str, index: bool = False) -> None:
        """ Save the dataset to a csv file 

        Args:
            path (str): The path to save the csv file.
            index (bool, optional): Whether to save the index. Defaults to False.
        """
        df = pd.DataFrame(self._dataset)
        df.to_csv(path, index=index)

    def get_batch_annotations(self, index: int) -> typing.List:
        """ Returns a batch of annotations by batch index in the dataset

        Args:
            index (int): The index of the batch in 

        Returns:
            batch_annotations (list): A list of batch annotations
        """
        self._step = index
        start_index = index * self._batch_size

        # Get batch indexes
        batch_indexes = [i for i in range(start_index, start_index + self._batch_size) if i < len(self._dataset)]

        # Read batch data
        batch_annotations = [self._dataset[index] for index in batch_indexes]

        return batch_annotations
    
    def __iter__(self):
        """ Create a generator that iterate over the Sequence."""
        for item in (self[i] for i in range(len(self))):
            yield item

    def process_data(self, batch_data):
        """ Process data batch of data """
        if self._use_cache and batch_data[0] in self._cache:
            data, annotation = copy.deepcopy(self._cache[batch_data[0]])
        else:
            data, annotation = batch_data
            for preprocessor in self._data_preprocessors:
                data, annotation = preprocessor(data, annotation)
            
            if data is None or annotation is None:
                self.logger.warning("Data or annotation is None, marking for removal on epoch end.")
                self._on_epoch_end_remove.append(batch_data)
                return None, None
            
            if self._use_cache and batch_data[0] not in self._cache:
                self._cache[batch_data[0]] = (copy.deepcopy(data), copy.deepcopy(annotation))

        # Then augment, transform and postprocess the batch data
        for objects in [self._augmentors, self._transformers]:
            for _object in objects:
                data, annotation = _object(data, annotation)

        # Convert to numpy array if not already
        if not isinstance(data, np.ndarray):
            data = data.numpy()

        # Convert to numpy array if not already
        # TODO: This is a hack, need to fix this
        if not isinstance(annotation, (np.ndarray, int, float, str, np.uint8, float)):
            annotation = annotation.numpy()

        return data, annotation

    def __getitem__(self, index: int):
        """ Returns a batch of data by batch index"""
        dataset_batch = self.get_batch_annotations(index)
        
        # First read and preprocess the batch data
        batch_data, batch_annotations = [], []
        for index, batch in enumerate(dataset_batch):

            data, annotation = self.process_data(batch)

            if data is None or annotation is None:
                self.logger.warning("Data or annotation is None, skipping.")
                continue

            batch_data.append(data)
            batch_annotations.append(annotation)

        return np.array(batch_data), np.array(batch_annotations)

Overwriting dataProvider.py


In [22]:
import sys

# Print the current search path
print(sys.path)

# Replace '/path/to/your/module' with the actual path to the directory containing your modules
module_path = r'C:\Users\ljant\Desktop\Ironhack\Projects\Final-Project-Ironhack-2024\dataProvider.py'
if module_path not in sys.path:
    sys.path.append(module_path)

# Try importing again
from augmentors import Augmentor
from transformers import Transformer

data_provider = DataProvider(
    dataset=dataset,
    skip_validation=True,
    batch_size=configs.batch_size,
    data_preprocessors=[ImageReader(CVImage)],
    transformers=[
        ImageResizer(configs.width, configs.height, keep_aspect_ratio=False),
        LabelIndexer(configs.vocab),
        LabelPadding(max_word_length=configs.max_text_length, padding_value=len(configs.vocab)),
        ],
)

['C:\\Users\\ljant\\Desktop\\Ironhack\\Projects\\Final-Project-Ironhack-2024', 'C:\\Users\\ljant\\anaconda3\\python39.zip', 'C:\\Users\\ljant\\anaconda3\\DLLs', 'C:\\Users\\ljant\\anaconda3\\lib', 'C:\\Users\\ljant\\anaconda3', '', 'C:\\Users\\ljant\\anaconda3\\lib\\site-packages', 'C:\\Users\\ljant\\anaconda3\\lib\\site-packages\\locket-0.2.1-py3.9.egg', 'C:\\Users\\ljant\\anaconda3\\lib\\site-packages\\win32', 'C:\\Users\\ljant\\anaconda3\\lib\\site-packages\\win32\\lib', 'C:\\Users\\ljant\\anaconda3\\lib\\site-packages\\Pythonwin', 'C:\\Users\\ljant\\anaconda3\\lib\\site-packages\\IPython\\extensions', 'C:\\Users\\ljant\\.ipython', 'C:\\Users\\ljant\\Desktop\\Ironhack\\Projects\\Final-Project-Ironhack-2024\\dataProvider.py']


ImportError: attempted relative import with no known parent package

In [23]:
data_provider = DataProvider(
    dataset=dataset,
    skip_validation=True,
    batch_size=configs.batch_size,
    data_preprocessors=[ImageReader(CVImage)],
    transformers=[
        ImageResizer(configs.width, configs.height, keep_aspect_ratio=False),
        LabelIndexer(configs.vocab),
        LabelPadding(max_word_length=configs.max_text_length, padding_value=len(configs.vocab)),
        ],
)

NameError: name 'DataProvider' is not defined

In [29]:
%%writefile model.py
from keras import layers
from keras.models import Model

from mltu.tensorflow.model_utils import residual_block


def train_model(input_dim, output_dim, activation="leaky_relu", dropout=0.2):
    
    inputs = layers.Input(shape=input_dim, name="input")

    # normalize images here instead in preprocessing step
    input = layers.Lambda(lambda x: x / 255)(inputs)

    x1 = residual_block(input, 16, activation=activation, skip_conv=True, strides=1, dropout=dropout)

    x2 = residual_block(x1, 16, activation=activation, skip_conv=True, strides=2, dropout=dropout)
    x3 = residual_block(x2, 16, activation=activation, skip_conv=False, strides=1, dropout=dropout)

    x4 = residual_block(x3, 32, activation=activation, skip_conv=True, strides=2, dropout=dropout)
    x5 = residual_block(x4, 32, activation=activation, skip_conv=False, strides=1, dropout=dropout)

    x6 = residual_block(x5, 64, activation=activation, skip_conv=True, strides=2, dropout=dropout)
    x7 = residual_block(x6, 64, activation=activation, skip_conv=True, strides=1, dropout=dropout)

    x8 = residual_block(x7, 64, activation=activation, skip_conv=False, strides=1, dropout=dropout)
    x9 = residual_block(x8, 64, activation=activation, skip_conv=False, strides=1, dropout=dropout)

    squeezed = layers.Reshape((x9.shape[-3] * x9.shape[-2], x9.shape[-1]))(x9)

    blstm = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(squeezed)
    blstm = layers.Dropout(dropout)(blstm)

    output = layers.Dense(output_dim + 1, activation="softmax", name="output")(blstm)

    model = Model(inputs=inputs, outputs=output)
    return model

Writing model.py


In [30]:
from model import train_model