### Creating a CNN with PyTorch
Here you'll be creating a dataloader with PyTorch and then you'll be making a CNN

#### Some usefull links

* [DATA LOADING AND PROCESSING TUTORIAL](https://pytorch.org/tutorials/beginner/data_loading_tutorial.html)
* [A detailed example of how to generate your data in parallel with PyTorch](https://stanford.edu/~shervine/blog/pytorch-how-to-generate-data-parallel)
* [Detailed explanation of the dataloader](https://www.youtube.com/watch?v=myYMrZXpn6U)

*** There are video explanations of this Kernel in my YouTube channel ***
* [Data Science related content](https://youtube.com/jhonatandasilva?sub_confirmation=1)


### Series of videos

1. [Introduction](https://youtu.be/j3n2m61Fxhk)
2. [How to create the Class](https://www.youtube.com/watch?v=adgKmNlwcdw)

Don't forget to ****[Subscribe](https://youtube.com/jhonatandasilva?sub_confirmation=1)**** to the next video of the series coming next week :D

### Importing the Libraries

In [None]:
from __future__ import print_function, division
import os
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

# PyTorch related 
import torch
import torch.nn as nn
import torchvision.transforms as transforms, utils
from torch.autograd import Variable
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")
plt.ion()   # interactive mode

In [None]:
df = pd.read_csv('../input/train.csv')

In [None]:
df.head()

In [None]:
whales_label = np.array(df['Id'])
whales_label[:5]

In [None]:
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(whales_label)

onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
onehot_encoded = onehot_encoder.fit_transform(integer_encoded)

In [None]:
whales_label[:5],onehot_encoded[:5]

In [None]:
df.iloc[0,1]

In [None]:
class WhalesDS(Dataset):
    """ Humpback Whale Identification Challenge dataset. """
    def __init__(self, csv_file, root_dir, transform=None,test=False):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.labels = []
        if type(test) == pd.core.frame.DataFrame:
            self.whales_frame = test
            labels = np.zeros((5005,))
        else:
            self.whales_frame,self.labels = self.one_hot_encoder()
        self.root_dir = root_dir
        self.transform = transform
        
    def __len__(self):
        """ Returns the length of the dataset """
        return len(self.whales_frame)

    def __getitem__(self, idx):
        """ Get one record from the dataset """
        img_name = os.path.join(self.root_dir,
                                self.whales_frame.iloc[idx, 0])
        image = io.imread(img_name)
        label = self.labels[idx]
        sample = {'image': image, 'label': label}

        if self.transform:
            sample = self.transform(sample)

        return sample

    def one_hot_encoder(self):
        """ 
            Got this function from this Kernel, https://www.kaggle.com/pestipeti/keras-cnn-starter
            chaged a little bit, but the essence is the same 
            from the kernel linked, amazing keras kernel btw if you are reading this :D 
        """
        df = pd.read_csv('../input/train.csv')
        whales_label = np.array(df['Id'])
        label_encoder = LabelEncoder()
        integer_encoded = label_encoder.fit_transform(whales_label)

        onehot_encoder = OneHotEncoder(sparse=False)
        integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
        onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
        return df,onehot_encoded

    def encode(self):
        """ One of the ways to make the encoding """
        df = pd.read_csv('../input/train.csv')
        unique_classes = pd.unique(df['Id'])
        encoding = dict(enumerate(unique_classes))
        encoding = {value: key for key, value in encoding.items()}
        df = df.replace(encoding)
        return df 

In [None]:
dataset = WhalesDS(csv_file='../input/train.csv',
                               root_dir='../input/train/',
                               test=False)

In [None]:
plt.imshow(dataset[100]['image'])
plt.show()

In [None]:
# batch_size, epoch and iteration
batch_size = 4
num_epochs = 5
image_size = 32
channels = 3

In [None]:
class Rescale(object):
    """Rescale the image in a sample to a given size.
    Args:
        output_size (tuple or int): Desired output size. If tuple, output is
            matched to output_size. If int, smaller of image edges is matched
            to output_size keeping aspect ratio the same.
    """

    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size

    def __call__(self, sample):
        image, label = sample['image'], sample['label']

        h, w = image.shape[:2]
        if isinstance(self.output_size, int):
            if h > w:
                new_h, new_w = self.output_size * h / w, self.output_size
            else:
                new_h, new_w = self.output_size, self.output_size * w / h
        else:
            new_h, new_w = self.output_size

        new_h, new_w = int(new_h), int(new_w)
        img = transform.resize(image, (new_h, new_w))

        return {'image': img, 'label': label}


class RandomCrop(object):
    """Crop randomly the image in a sample.

    Args:
        output_size (tuple or int): Desired output size. If int, square crop
            is made.
    """

    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        if isinstance(output_size, int):
            self.output_size = (output_size, output_size)
        else:
            assert len(output_size) == 2
            self.output_size = output_size

    def __call__(self, sample):
        image, label = sample['image'], sample['label']

        h, w = image.shape[:2]
        new_h, new_w = self.output_size

        top = np.random.randint(0, h - new_h)
        left = np.random.randint(0, w - new_w)

        image = image[top: top + new_h,
                      left: left + new_w]

        return {'image': image, 'label': label}


class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image, label = sample['image'], sample['label']

        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        """ The original code didn't expect gray scale images """
        gray_scale_image = torch.zeros([image_size,image_size]).shape == image.shape
        if gray_scale_image:
            image = np.stack((image,)*3, axis=-1)
        image = image.transpose((2, 0, 1))
        return {'image': torch.from_numpy(image).double(),
                'label': torch.tensor(label).double()}

In [None]:
transformed_dataset = WhalesDS(csv_file='../input/train.csv',
                                           root_dir='../input/train/',
                                           transform=transforms.Compose([
                                               Rescale(int(image_size*1.25)),
                                               RandomCrop(image_size),
                                               ToTensor()
                                           ]),
                              test=False)

In [None]:
transformed_dataset[0]['image'].shape

In [None]:
transformed_dataset[0]['label'].shape

In [None]:
dataloader = DataLoader(transformed_dataset, batch_size=4,
                        shuffle=True, num_workers=4)

In [None]:
class Whales_CNN(nn.Module):
    def __init__(self):
        super(Whales_CNN, self).__init__()
        # Default stride = 1, padding = 0
        self.pool = nn.MaxPool2d(2,2)
        self.dropout = nn.Dropout(0.25)
        # (In_Channels,Out_channels,Kernel_size) 
        # 3x32x32
        self.conv1 = nn.Conv2d(3,16,3,padding=1)
        # 16x16x16
        self.conv2 = nn.Conv2d(16,32,3,padding=1)
        # 32x8x8
        self.conv3 = nn.Conv2d(32,64,3,padding=1)
        # Dense layer
        self.fc1 = nn.Linear(64 * 4 * 4, 5005) 
    
    def forward(self, out):
        # Conv 1 
        out = self.pool(F.relu(self.conv1(out)))
        out = self.pool(F.relu(self.conv2(out)))
        out = self.pool(F.relu(self.conv3(out)))
        
        # Dense Layer 
        out = out.view(out.size(0), -1)
        out = self.dropout(out)
        out = self.fc1(out)
        
        return out

model = Whales_CNN()

error = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
model

In [None]:
temp = 0
total_iterations = len(dataloader)
for epoch in range(3):
    for i_batch, sample_batched in enumerate(dataloader):
        images = sample_batched['image']
        labels = sample_batched['label']
        train = Variable(images.view(images.shape[0],channels,image_size,image_size)).float()
        labels = Variable(labels).type(torch.LongTensor)
        optimizer.zero_grad()
        outputs = model(train)
        loss = error(outputs, torch.max(labels, 1)[1])
        loss.backward()
        optimizer.step()
        if temp % 500 == 0:
            print('Iter {} out of {}'.format(i_batch,total_iterations))
            temp = 0
        temp += 1
    print('Iter {} out of {}'.format(i_batch,total_iterations))
    print('Loss: {} '.format(loss))

I'm making adjustments to the evaluation part, to make the submission :D 