# Customer DataLoader

In [29]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import torch.nn.functional as F
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

### Create a 'Dataset' class

You must creae a 'Dataset' class for each dataset and define:
1. `__init__`
2. `__getitem__`
3. `__len__`

Define a set of parameters for a random dataset

In [2]:
# Parameters and DataLoaders
input_size = 5
output_size = 2

batch_size = 30
data_size = 100

In [3]:
class RandomDataset(Dataset):

    def __init__(self, size, length):
        self.len = length
        self.data = torch.randn(length, size)

    def __getitem__(self, index):
        return self.data[index]

    def __len__(self):
        return self.len

In [6]:
dataset=RandomDataset(input_size, data_size);dataset.data.shape

torch.Size([100, 5])

In [7]:
rand_loader = DataLoader(dataset,
                         batch_size=batch_size, shuffle=True)

In [10]:
batch = next(iter(rand_loader))
x=batch #there is only one value in this dataset

In [11]:
batch.shape

torch.Size([30, 5])

In [14]:
x.size()

torch.Size([30, 5])

# Custom DataSet

Custom dataset from a cvs file that contains independent and dependent variables

In [15]:
class California_Dataset(Dataset):
    """California House Prices Dataset."""

    def __init__(self,csv_file,normalize=False):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.raw_data = pd.read_csv(csv_file)
        self.normalize = normalize

    def __len__(self):
        return len(self.raw_data)

    def __getitem__(self, idx):
        
        data = self.raw_data.iloc[:,:8].values
        data = torch.from_numpy(data).float()
        targets = self.raw_data.iloc[:,-1:].values
        targets = torch.from_numpy(targets).float().view(-1,1)
        if self.normalize: 
            data = F.normalize(data,dim=0)
            targets = F.normalize(targets,dim=0)
        sample = (data[idx],targets[idx])

        return sample

In [21]:
trainset = California_Dataset('~/notebooks/California/california_housing_train.csv',normalize=False)

trainloader = DataLoader(trainset,batch_size=100,shuffle=True)

In [22]:
batch = next(iter(trainloader))
x,y=batch

In [23]:
x[0],y[0]

(tensor([-122.1500,   37.7200,   47.0000, 1190.0000,  251.0000,  540.0000,
          266.0000,    3.3750]), tensor([198300.]))

## Dataset from X and Y

In [14]:
X = torch.randn(1000,3,28,28)
y = torch.randint(0,10,(1000,))
classes = {'A':0,'B':1,'C':2,'D':3,'E':4,'F':5,'G':6,'H':7,'I':8,'J':9}

In [30]:
tfms = transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))

In [38]:
class MyDataSet(Dataset):
    def __init__(self,x,y,classes,transform=None):
        self.data = x
        self.targets = y
        self.classes_to_idx = classes
        self.classes = list(classes.keys())
        self.transform = transform
    def __len__(self):
        return len(self.data)
    def __getitem__(self,idx):
        img,target = self.data[idx],self.targets[idx]
        if self.transform is not None:
            img = self.transform(img)
        return img,target

The aboce is a simple normally distributed dataset.

To iterate through the data, all we need to do is 

In [39]:
# create the dataloader
xy_dataset = MyDataSet(X,y,classes,transform=tfms)
xy_loader = DataLoader(xy_dataset,batch_size=100,shuffle=True,drop_last=True)

In [40]:
xy_loader.dataset.classes_to_idx

{'A': 0,
 'B': 1,
 'C': 2,
 'D': 3,
 'E': 4,
 'F': 5,
 'G': 6,
 'H': 7,
 'I': 8,
 'J': 9}

In [41]:
img,label = next(iter(xy_loader))
img.shape,label.shape,

(torch.Size([100, 3, 28, 28]), torch.Size([100]))

### Dataset with limited number of datapoints

To create a dataset with n number of datapoints from a list of big data

In [60]:
# create random data of 100 points
x = torch.randn(1000,2,32,32)
y = torch.randint(0,10,(1000,))

In [61]:
class LimiteDataset(Dataset):
    def __init__(self,x,y,n=None,transform=None):
        self.data = x
        self.targets = y
        self.classes = torch.unique(y)
        self.transform = transform
        self.max_size = n #define size
    def __len__(self):
        return self.max_size if self.max_size is not None else len(self.data) #this line to control size
    def __getitem__(self,idx):
        img,targets = self.data[idx],self.targets[idx]
        if transform is not None:
            img = self.transform(img)
        return img,targets

In [62]:
limit_dataset = LimiteDataset(x,y,n=300,transform=tfms)

In [63]:
limit_loader = DataLoader(limit_dataset,batch_size=10)

In [64]:
len(limit_dataset)

300

## Build the NN

In [24]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.lin1 = nn.Linear(8,100)
        self.bn1 = nn.BatchNorm1d(100)
        self.lin2 = nn.Linear(100,100)
        self.bn2 = nn.BatchNorm1d(100)
        self.lin3 = nn.Linear(100,100)
        self.bn3 = nn.BatchNorm1d(100)
        self.lin4 = nn.Linear(100,1)
        
    def forward(self,x):
        x = F.relu(self.bn1(self.lin1(x)))
        x = F.relu(self.bn2(self.lin2(x)))
        x = F.relu(self.bn3(self.lin3(x)))
        x = self.lin4(x)
        return x
    
    def learn(self,epochs,dataloader,lr):
        loss_fn = nn.MSELoss()
        optimizer = torch.optim.Adam(self.parameters(),lr=lr)
        for epoch in range(epochs):
            losses = []
            correct=0
            total=0
            for b,data in enumerate(dataloader):
                optimizer.zero_grad()
                x,y = data
                output = self(x)
                loss = loss_fn(output,y)
                loss.backward()
                losses.append(loss.item())
                with torch.no_grad():
                    predict = self(x)
                    for idx,i in enumerate(predict):
                        if i==y[idx]: correct+=1
                        total+=1
                optimizer.step()
            #if epoch%(epochs/10)==0:
            print('Epoch: {} Loss: {}'.format(epoch,round(loss.item(),8)))
            print('Average Loss: {} Accuracy {}'.format(loss.mean(),correct/total*100))
        
                    
            
net = Net()    

In [69]:
def learn(net,epochs,train,val,lr):
    loss_fn = nn.MSELoss()
    optimizer = torch.optim.Adam(net.parameters(),lr=lr)
    losses = []
    for epoch in range(epochs):
        correct=0
        total=0
        eloss=[]
        for b,data in enumerate(train):
            bloss=[]
            optimizer.zero_grad()
            x,y = data
            output = net(x)
            loss = loss_fn(output,y)
            loss.backward()
            losses.append(loss.item())
            eloss.append(loss.item())
            bloss.append(loss.item())
            for idx,i in enumerate(output):
                if i==y[idx]: correct+=1
                total+=1
            optimizer.step()
            #if b%(len(train)/10)==0:print('Avg Loss {}'.format(np.mean(bloss)))
        accuracy=round(correct/total*100,5)
        print('Epoch:{} Avg.Training Loss: {} Accuracy {}%'.format(epoch,round(np.mean(eloss),8),accuracy))
    return losses


In [70]:
losses = learn(net,50,trainloader,trainloader,1e-1)

Epoch:0 Avg.Training Loss: 3655968906.5411763 Accuracy 0.0%
Epoch:1 Avg.Training Loss: 3631510902.964706 Accuracy 0.0%
Epoch:2 Avg.Training Loss: 3615545394.4470587 Accuracy 0.0%
Epoch:3 Avg.Training Loss: 3568090072.847059 Accuracy 0.00588%
Epoch:4 Avg.Training Loss: 3611755713.5058823 Accuracy 0.0%
Epoch:5 Avg.Training Loss: 3544755456.0 Accuracy 0.0%
Epoch:6 Avg.Training Loss: 3583977121.882353 Accuracy 0.0%
Epoch:7 Avg.Training Loss: 3607246887.152941 Accuracy 0.0%
Epoch:8 Avg.Training Loss: 3550331577.9764705 Accuracy 0.0%
Epoch:9 Avg.Training Loss: 3581355556.141177 Accuracy 0.0%
Epoch:10 Avg.Training Loss: 3668213839.8117647 Accuracy 0.0%
Epoch:11 Avg.Training Loss: 3608610880.0 Accuracy 0.0%
Epoch:12 Avg.Training Loss: 3560448795.105882 Accuracy 0.0%
Epoch:13 Avg.Training Loss: 3545008928.3764706 Accuracy 0.0%
Epoch:14 Avg.Training Loss: 3589204509.364706 Accuracy 0.0%
Epoch:15 Avg.Training Loss: 3608514659.388235 Accuracy 0.0%
Epoch:16 Avg.Training Loss: 3496246485.082353 Accu

In [45]:
torch.unique(y)

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])