In [None]:
# default_exp dataloader

# dataloader

> API details.

1. read_image(path: str, channels: int=3)
2. clf.load_from_folder
3. clf.load_from_csv
4. detect.load_from_xml
5. detect.load_from_csv
6. detect.load_from_json
7. detect.load_from_tfrecord

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#hide
import sys
sys.path.append('../')

In [None]:
#export
import tensorflow as tf
import pathlib
import os

from typing import Union

from chitra.core import remove_dsstore
from chitra.image import read_image, resize_image


In [None]:
#export
AUTOTUNE = tf.data.experimental.AUTOTUNE

# CLF

``DataLoader class for loading dataset for image classification tasks.``
All private functions use primitive datatypes


## clf.load_from_folder
## clf.load_from_csv

In [None]:
#export
def get_basename(path: tf.string):
    assert isinstance(path, tf.Tensor)
    return tf.strings.split(path, os.path.sep)[-1]

In [None]:
#export
class Clf(object):
    def __init__(self):
        self.CLASS_NAMES = None
    
    def _get_image_list(self, path: str):
        """`path`: pathlib.Path
        Returns: list of images
        """
        assert isinstance(path, str)
        list_images = tf.data.Dataset.list_files(f'{path}/*/*')
        return list_images
    
    def _process_path(self, path:str, size:Union[None, tuple] = None):
        """`path` :str
        `size`: None or tuple
        """
        assert isinstance(path, (str, tf.Tensor)), f'type of path is {type(path)}, expected type str'
        img = read_image(path)
        img = tf.py_function(resize_image, [img, (160, 160)], [tf.float32])
        
        label = tf.strings.split(path, os.path.sep)[-2]
        return img, label

    
    def from_folder(self, path: Union[str, pathlib.Path]):
        """Load dataset from given path.
        Args:
            path: string, path of folder containing dataset.
        Returns: tf.data.Dataset
        """
        assert isinstance(path, (str, pathlib.Path))
        path = pathlib.Path(path)
        remove_dsstore(path)
        
        list_folders = tf.data.Dataset.list_files(str(path/'*'))
        list_images = self._get_image_list(str(path))
                
        self.CLASS_NAMES = tuple(get_basename(e).numpy() for e in list_folders)
        
        data = list_images.map(self._process_path, num_parallel_calls=AUTOTUNE)
        return data

In [None]:
#hide
path = '/Users/aniketmaurya/Pictures/cats'

clf = Clf()
data = clf.from_folder(path)
data

<DatasetV1Adapter shapes: (<unknown>, ()), types: (tf.float32, tf.string)>

In [None]:
#hide
for e in data.take(1): print(e[0].shape)

(1, 160, 160, 3)
