## 1. Simple Dataset Download

In [None]:
import torch, torch.nn as nn
import torchvision, torchinfo, torchmetrics
import datasets as huggingface_datasets
from tqdm import tqdm

device        = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE    = 5
# IMAGE_RESIZE  = 28,28

def DOWNLOAD_DATASETS():
    # Download
    dataset            = huggingface_datasets.load_dataset("cifar10", ) # streaming = True)
    training_dataset   = dataset['train']
    validation_dataset = dataset['test']

    # Transform
    transformations_group = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(), # Converts every pixel into value between 0 & 1. 
        # torchvision.transforms.Resize(size=config.IMAGE_RESIZE)
    ])

    def transform_datasets(examples):
        examples["image_tensors"] = []

        for image in examples['img']:
            transformed_image = transformations_group(image)
            examples['image_tensors'].append(transformed_image)

        return examples

    training_dataset       = training_dataset   .map(transform_datasets  , batched= True)
    validation_dataset     = validation_dataset .map(transform_datasets  , batched= True)

    # Convert
    new_training_dataset   = training_dataset   .with_format("torch", columns=['label', 'image_tensors'], dtype = torch.float32)
    new_validation_dataset = validation_dataset .with_format("torch", columns=['label', 'image_tensors'], dtype = torch.float32)

    TOTAL_BATCHES = len(training_dataset) / BATCH_SIZE
    
    training_dataloader   = torch.utils.data.DataLoader( dataset= new_training_dataset   , batch_size= BATCH_SIZE, shuffle= True )
    validation_dataloader = torch.utils.data.DataLoader( dataset= new_validation_dataset , batch_size= BATCH_SIZE, shuffle= True )
    
    return training_dataset, validation_dataset, training_dataloader, validation_dataloader

training_dataset, validation_dataset, training_dataloader, validation_dataloader = DOWNLOAD_DATASETS();
assert next(iter(training_dataloader)) is not None
assert next(iter(validation_dataloader)) is not None

# 2. Simple Model Training Pipeline

In [None]:
lr      = 0.001 # learning_rate
epochs  = 10 # How much to train a model
device  = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def TRAIN_MODEL(model, training_dataloader, validation_dataloader):

    model.train(mode=True)
    OPTIMIZER = torch.optim.SGD ( params= model.parameters(), lr= lr ) # Using torch.optimizer algorithm
    metric    = torchmetrics.Accuracy(task="multiclass", num_classes= 10 ).to(device)
    
    for epoch_no in range(epochs):        
        for batch_no, batch_dictionary in enumerate(progress_bar := tqdm(training_dataloader)):
            x_actual = batch_dictionary['image_tensors'].to(device)
            y_actual = batch_dictionary['label'].to(device)

            y_predicted_LOGITS = model.forward               (x_actual)
            y_predicted_probs  = nn.functional.softmax       (y_predicted_LOGITS, dim= 1)
            loss               = nn.functional.cross_entropy (y_predicted_LOGITS, y_actual.to(torch.int64))
            
            OPTIMIZER.zero_grad()
            loss.backward()
            # dError_dParameters    = torch.autograd.grad( outputs = ERROR_FUNC( y_predicted, y_actual ), inputs = model.parameters())
            # Parameters of layer 1 are not dependent on any other parameters
            # Parameters of layer 2 are dependent on layer 1 parameters
            # Parameters of layer 3 are dependent on layer 2 parameters which are dependent on layer 1 parameters
            # Finding complicated rate of change of such nested parameters is done automatically when we do loss.backward()
            OPTIMIZER.step()
            """
            for (name, weight), gradient in zip(model.named_parameters(), dError_dWeights):
                weight = weight - gradient * LEARNING_RATE
                print(f"Parameters of layer: {name} have these many {torch.count_nonzero(gradient)} updates out of {torch.count(gradient)})
            """

            loss_batch      = loss.item()
            accuracy_batch  = metric(y_predicted_LOGITS, y_actual)
            training_accuracy_avg_epoch = metric.compute() # calculates average accuracy across epoch automatically

            metrics_per_batch = {
                "loss_batch": loss_batch,
                "accuracy_running_average": training_accuracy_avg_epoch,
            }
            progress_bar.set_description(f'batch_no = {batch_no},\t loss_batch = {loss_batch:0.4f},\t accuracy_avg = {training_accuracy_avg_epoch:0.4f}')

        metric.reset()
        
        loss_validation, accuracy_validation = EVALUATE_MODEL(model, validation_dataloader)
        print(f'epoch_no = {epoch_no}, training_loss = {loss_batch:0.4f}, validation_loss = {loss_validation:0.4f},\t training_accuracy = {accuracy_batch:0.4f}, validation_accuracy = {accuracy_validation:0.4f}')
        model.train(mode=False)

def EVALUATE_MODEL(model, validation_dataloader):
    model.eval()
    metric = torchmetrics.Accuracy(task="multiclass", num_classes=10).to(device)
    with torch.no_grad():
        for batch_no, batch_dictionary in enumerate(validation_dataloader):
            x_actual = batch_dictionary['image_tensors'].to(device)
            y_actual = batch_dictionary['label'].to(device)

            y_predicted_LOGITS = model.forward                 (x_actual)
            loss               = nn.functional.cross_entropy   (y_predicted_LOGITS, y_actual.to(torch.int64)).item()
            accuracy_batch     = metric                        (y_predicted_LOGITS, y_actual).item()

        testing_accuracy_avg = metric.compute().item()
        return loss, testing_accuracy_avg

# 3. Simple Model Architecture

## Experiment 1

In [None]:
# FORMATS 2d: Batch, Channels, H, W
# FORMATS 1d: Batch, Dim

model_random_parameters = torch.nn.Sequential(
    
    torch.nn.Flatten(start_dim=1),         # Dim:BCHW -> (0:B , 1:C, 2:H, 3:W)

    torch.nn.Linear(in_features = 32*32*3  , out_features   = 40   ), torch.nn.ReLU(),                 # LAYER 1: 1st Hidden Layer
    torch.nn.Linear(in_features = 40       , out_features = 30   ), torch.nn.ReLU(),                 # LAYER 2: 2nd Hidden Layer

    torch.nn.Linear(in_features = 30       , out_features = 10   ),                                  # OUTPUT LAYER
)

model = model_random_parameters
model = model.to(device)                    # Model Size / Number of Parameters are important

torchinfo.summary(model, input_size= (1,3*32*32), verbose=2);

In [None]:
TRAIN_MODEL (model, training_dataloader, validation_dataloader)

## Experiment 2: Convolution Neural Network

In [None]:
# CONVOLUTION DATA FORMAT: Batch Size,Channels ,Height , Width
model = nn.Sequential(
    
    nn.Conv2d(in_channels = 3,  out_channels  = 40,  kernel_size = (16,16) ), nn.ReLU(),
    nn.Conv2d(in_channels = 40,  out_channels = 40,  kernel_size = (16,16) ), nn.ReLU(),
    
    nn.Flatten(start_dim = 1),
    
    torch.nn.Linear(in_features = 40*2*2       , out_features = 10   ),
    
)
model = model.to(device)  # Model Size / Number of Parameters are important
torchinfo.summary(model, input_size=(1,3,32,32), verbose=2 , col_names=["input_size", "output_size", "kernel_size", "num_params", "params_percent"] );

In [None]:
TRAIN_MODEL (model, training_dataloader, validation_dataloader)

## Experiment 3: Better Convolution Neural Network

In [None]:
feature_extractor = nn.Sequential(
    nn.Conv2d    ( in_channels = 3, out_channels = 50,   kernel_size = (3,3) , padding="same"), # input = 3, 32, 32, output: (50, 32, 32)
    nn.ReLU(),
    nn.MaxPool2d ( kernel_size=(2,2), stride = 2),                                              # input: (50, 32, 32) -> output: (50, 16, 16),
    
    nn.Conv2d    ( in_channels = 50, out_channels = 100, kernel_size = (3,3), padding="same"),  # input: (50, 16, 16) -> output: (100, 16, 16),
    nn.ReLU(),
    nn.MaxPool2d ( kernel_size=(2,2), stride = 2),                                              # input: (100, 16, 16) -> output: (100, 8, 8),
    
)
# feature_map = 100, 8, 8

decision_maker = nn.Sequential(
    nn.Flatten(start_dim=1),
    nn.Linear(in_features = 100*8*8    , out_features = 50), nn.ReLU(),
    nn.Linear(in_features = 50     , out_features = 10)
)

model = nn.Sequential(
  feature_extractor,
  decision_maker
)

test_output_logits = feature_extractor(torch.randn((1,3,32,32)))
print(test_output_logits.shape)
torchinfo.summary(model, input_size=(1,3,32,32), verbose=2 , col_names=["input_size", "output_size", "kernel_size", "num_params", "params_percent"] );

In [None]:
TRAIN_MODEL (model, training_dataloader, validation_dataloader)

# Colored Images t0 Real Life Data - Imagenet & Model


In [None]:
import timm
# timm.list_models("*vgg*")
model = timm.create_model("vgg11", pretrained = True)

In [None]:
model

In [None]:
model.features(torch.randn((1,3,224,224))).shape

In [None]:
### Work in Progress

# !pip install py7zr
# import py7zr

# with py7zr.SevenZipFile('/kaggle/input/cifar-10/test.7z' , mode='r') as z:
#     z.extractall("/kaggle/working/")
    
# from torch.utils.tensorboard import SummaryWriter
# writer = SummaryWriter()
# writer.add_scalar('Loss/train', np.random.random(), n_iter)
# writer.add_scalar('Loss/test', np.random.random(), n_iter)
# writer.add_scalar('Accuracy/train', np.random.random(), n_iter)
# writer.add_scalar('Accuracy/test', np.random.random(), n_iter)

# import torch, torchvision
# import datasets

# datasets.utils.logging.set_verbosity(datasets.logging.DEBUG)
# cifar10 = datasets.load_dataset("imagefolder", data_dir="/kaggle/working/test", drop_labels=True)
# dataset = torchvision.datasets.ImageFolder("/kaggle/working/test", )


# import os
# from PIL import Image

# transformation_list = torchvision.transforms.Compose([
# torchvision.transforms.ToTensor(),
# torchvision.transforms.Resize(size=(224,224))
# ])

# submission_file = open("submission.csv", "w")
# submission_file.write("id,label")

# in_memory_data = torch.randn(size=(10000,3,32,32))
# index = 0
# for name in os.listdir("/kaggle/working/test"):
#     image_with_path = "/kaggle/working/test/" + name
#     image = Image.open(image_with_path)    
#     transformed_image = transformation_list(image)
    
#     in_memory_data[index] = transformed_image
#     index = index + 1

