# Image Data Loss Exploratory Analysis

## Setup Environment

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt # visualization
import seaborn as sns # visualization
# machine learning
import torch 
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import transforms, datasets, models

!pip3 install progressbar
import progressbar

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

## Load MNIST Dataset

In [3]:
# import mnist dataset
BATCH_SIZE = 250

train_loader = torch.utils.data.DataLoader(
    torchvision.datasets.MNIST('/kaggle/working',
                               train=True,
                               download=True,
                               transform=torchvision.transforms.ToTensor()),
    batch_size=BATCH_SIZE,shuffle=True)

test_loader = torch.utils.data.DataLoader(
    torchvision.datasets.MNIST('/kaggle/working',
                               train=False,
                               download=True,
                               transform=torchvision.transforms.ToTensor()),
    batch_size=BATCH_SIZE,shuffle=True)

## MNIST Classifier

In [4]:
class BasicClassifier(nn.Module):
    def __init__(self,num_classes) -> None:
        self.num_classes = num_classes
        super(BasicClassifier, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1,64,kernel_size=7),
            nn.ReLU(inplace=True),
            nn.Conv2d(64,128,kernel_size=11),
            nn.ReLU(inplace=True),
            nn.Conv2d(128,182,kernel_size=3),
            nn.ReLU(inplace=True),
            nn.Conv2d(182,256,kernel_size=5),
        )
        self.classifier = nn.Sequential(
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096,2048),
            nn.ReLU(inplace=True),
            nn.Linear(2048,1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024,num_classes),
        )
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

## Metrics Class

In [42]:
class Metrics:
    def __init__(self):
        pass
    
    def update_confusion_matrix(self, outputs: torch.Tensor, labels: torch.Tensor) -> None:       
        for (guess,label) in zip(outputs.argmax(1),labels):
            self.confusion_matrix[guess,label] += 1
            
    def get_confusion_matrix(self,norm=False) -> np.array:
        if norm:
            return self.confusion_matrix / self.confusion_matrix.sum()
        return self.confusion_matrix
    
    def reset_confusion_matrix(self,num_classes: int) -> None:
        self.confusion_matrix = np.zeros((num_classes,num_classes))
    
    def classification_metrics(self) -> str:
        s = "Accuracy: {}\nPrecision: {}\nRecall: {}\nf1-score: {}\nSupport: {}".format(self.accuracy(),self.precision(),self.recall(),self.f1_score(),self.confusion_matrix.sum())
        return s
    def accuracy(self) -> int:
        dim = self.confusion_matrix.shape[0]
        correct = 0.0
        total = self.confusion_matrix.sum()
        for i in range(dim):
            correct += self.confusion_matrix[i,i]
        return correct/total
    def precision(self) -> int:
        return 0
    def recall(self) -> int:
        return 0
    def f1_score(self) -> int:
        return 0

In [6]:
model = BasicClassifier(10)
model.to(device)

In [43]:
metric = Metrics()

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr=3e-5)

In [8]:
widgets = [
    ' [', progressbar.Timer(), '] ',
    progressbar.Percentage(), ' ',
    progressbar.Bar(),
    ' (', progressbar.ETA(), ') ',
]

## Train Model

In [9]:
# training
NUM_EPOCHS = 10
bar = progressbar.ProgressBar(NUM_EPOCHS*len(train_loader),widgets=widgets).start()
for epoch in range(NUM_EPOCHS):
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(inputs.to(device))
        loss = criterion(outputs, labels.to(device))
        loss.backward()
        optimizer.step()
        # statistics
        # progressbar
        bar.update(epoch*len(train_loader)+i)

## Test Model

In [44]:
# testing
metric.reset_confusion_matrix(10)
bar = progressbar.ProgressBar(len(test_loader),widgets=widgets).start()
with torch.no_grad():
    running_loss = 0.0
    for i, (inputs, labels) in enumerate(test_loader):
        outputs = model(inputs.to(device))
        # statistics
        metric.update_confusion_matrix(outputs.to('cpu'),labels)
        # progressbar
        bar.update(i)
m = metric.get_confusion_matrix(norm=True)
sns.heatmap(m,square=True,cmap='Greys')
plt.show()
print(metric.classification_metrics())

# Create Data Loss Class




In [None]:
class DataLoss:
    def __init__(self):
        pass
    def random(inputs,proportion: float) -> torch.Tensor:
        return inputs