In [7]:
import os
import zipfile
import joblib as pickle
from torchvision import datasets, transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

In [8]:
# In utils.py
def create_pickle(data = None, filename = None):
    if data is not None and filename is not None:
        pickle.dump(value=data, filename=filename)
    else:
        raise ValueError("No data provided".capitalize())

In [13]:
class Loader:
    def __init__(self, image_path = None, batch_size = 64, image_size = 64):
        self.image_path = image_path
        self.batch_size = batch_size
        self.image_size = image_size
        self.to_extract = "../data/raw/"
        self.to_processed = "../data/processed/"

    def unzip_folder(self):
        with zipfile.ZipFile(self.image_path, 'r') as zip_ref:
            if os.path.exists(self.to_extract):
                zip_ref.extractall(path=self.to_extract)
            else:
                print("There is no path to extract the dataset".capitalize())
                os.makedirs(self.to_extract)
                zip_ref.extractall(path=self.to_extract)

    def _do_normalization(self, transform = False):
        if transform:
            return transforms.Compose(
                [
                    transforms.Resize((self.image_size)),
                    transforms.CenterCrop(self.image_size),
                    transforms.ToTensor(),
                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                ]
            )
        else:
            raise Exception("Error while doing the normalization".capitalize())

    def create_dataloader(self):
        if os.path.exists(self.to_extract):
            dataset = ImageFolder(root = self.to_extract, transform=self._do_normalization(transform=True))
            dataloader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)

            try:
                if os.path.exists(self.to_processed):
                    create_pickle(data=dataset, filename=os.path.join(self.to_processed, "dataloader.pkl"))
                else:
                    print("There is no data path named processed & creating the data path...".capitalize())
                    os.makedirs(self.to_processed)
                    create_pickle(
                        data=dataloader,
                        filename=os.path.join(self.to_processed, "dataloader.pkl"),
                    )

            except Exception as e:
                print("Error caught in the section # {}".format(e))
        else:
            raise Exception("There is no path to create the dataloader".capitalize())    


if __name__ == "__main__":
    loader = Loader(
        image_path="/Users/shahmuhammadraditrahman/Desktop/alzheimer_dataset.zip",
        batch_size=64,
        image_size=64
    )
    loader.unzip_folder()
    loader.create_dataloader()

In [20]:
# unittest
data = pickle.load("../data/processed/dataloader.pkl")

def total_data(data = None):
    return sum(image.shape[0] for image, _ in data) if data is not None else "Data is empty".capitalize()

if __name__ == "__main__":
    print("Quantity of the dataset # {} ".format(total_data(data = data)))

Quantity of the dataset # 34557 
