# Data Loader

* for epoch = 1 : backward and forward pass for whole training samples
* batch_size : number of training sample in 1 forward and backward pass but not whole dataset
* number_of_iterations : ceil(total_training_sample / batch_size) for completing 1 epoch
* for 20 epochs : total iterations required = number_of_iterations * 20

In [40]:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import dataset,dataloader
import torchvision
import math

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler



In [18]:
xy = np.loadtxt("./data/wine/wine.csv",delimiter=',',skiprows=1)

In [19]:
x = xy[:,1:]
y = xy[:,[0]]
x,y,x.shape,y.shape

(array([[1.423e+01, 1.710e+00, 2.430e+00, ..., 1.040e+00, 3.920e+00,
         1.065e+03],
        [1.320e+01, 1.780e+00, 2.140e+00, ..., 1.050e+00, 3.400e+00,
         1.050e+03],
        [1.316e+01, 2.360e+00, 2.670e+00, ..., 1.030e+00, 3.170e+00,
         1.185e+03],
        ...,
        [1.327e+01, 4.280e+00, 2.260e+00, ..., 5.900e-01, 1.560e+00,
         8.350e+02],
        [1.317e+01, 2.590e+00, 2.370e+00, ..., 6.000e-01, 1.620e+00,
         8.400e+02],
        [1.413e+01, 4.100e+00, 2.740e+00, ..., 6.100e-01, 1.600e+00,
         5.600e+02]]),
 array([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.

In [44]:
# implementing dataloader

class WineDataSet(dataset.Dataset):

    def __init__(self):
        xy = np.loadtxt("./data/wine/wine.csv",delimiter=",",skiprows=1,dtype=np.float32)

        # spliting the data to input and label
        x = xy[:,1:]
        y = xy[:,[0]] # for converting to shape of (n_sample,1)

        # data normalization
        scaler = StandardScaler()
        x = scaler.fit_transform(x)

        self.x = torch.tensor(x)
        self.y = torch.tensor(y)

        self.n_sample = x.shape[0]
    
    def __getitem__(self, index):
        '''
        helps to access the each tuple of dataset
        '''
        return (self.x[index],self.y[index])

    
    def __len__(self):
        '''
        return the number of samples in the dataset
        '''
        return (self.n_sample)

In [45]:
wine_data_set = WineDataSet()

row1 = wine_data_set[0]

feature,label =  row1

print(f"feature:{feature} \n label:{label}")


feature:tensor([ 1.5186, -0.5622,  0.2321, -1.1696,  1.9139,  0.8090,  1.0348, -0.6596,
         1.2249,  0.2517,  0.3622,  1.8479,  1.0130]) 
 label:tensor([1.])


# DataLoader

In [47]:
wine_data_laoder = dataloader.DataLoader(dataset=wine_data_set,batch_size=4,shuffle=True,)

# num_worker = 2 wil cause error because of windows



## don't use train_test_split because it finnaly converts it to list

In [58]:

train,test = train_test_split(wine_data_set,test_size=.2,random_state=21)

In [57]:
type(train)

list

In [51]:
data_iter = iter(wine_data_laoder)

first_batch = data_iter.next()

feature,label = first_batch

print(f"batch_features:{feature} \n label:{label}")

batch_features:tensor([[ 0.3698, -0.5533, -0.8280, -0.7492, -0.4031,  0.1681,  0.1614, -0.7401,
         -0.4221, -0.4793,  0.2744,  0.2236,  1.7136],
        [-0.3466, -0.5263, -0.3162,  0.9024, -1.1053, -1.4663, -0.2703,  0.9520,
          0.0685, -0.7605, -0.3398, -0.2707, -0.8244],
        [ 1.4321,  0.1559,  0.4148,  0.1517, -0.6138, -0.9856, -1.3345,  0.6297,
         -0.6148,  2.0080, -1.4805, -1.2736, -0.2767],
        [ 0.9010, -0.7508,  1.2190,  0.9024,  0.0884,  1.1295,  1.2256, -0.5790,
          1.3826,  0.2777,  1.0203,  0.1389,  1.7136]]) 
 label:tensor([[1.],
        [2.],
        [3.],
        [1.]])


# Model Designing

In [38]:
model = nn.Linear(in_features=13,out_features=1)


# Loss and Criterian

In [39]:
loss_function = nn.MSELoss()

criterian = torch.optim.SGD(params=model.parameters(),lr=.01)

# Training Loop

In [65]:
number_of_epochs = 100

# number of iteration = total sample size / total batch size
num_total_sample = len(wine_data_set)
number_of_iteration  = math.ceil(num_total_sample / wine_data_laoder.batch_size)

for epoch in range(number_of_epochs):

    for i,(features,label) in enumerate(wine_data_laoder):

        y_predict = model(features)

        loss = loss_function(y_predict,label)

        loss.backward()

        criterian.step()

        criterian.zero_grad()
        if i % 10 == 0:
            print(f"---------------- epoch : {epoch +1 } and iteration : {i+1} ----------------")

            print(f"loss : {loss}")

# NOTE: there is iteration upto 45
print(f"number of iteration:{number_of_iteration}")

---------------- epoch : 1 and iteration : 1 ----------------
loss : 0.028632476925849915
---------------- epoch : 1 and iteration : 11 ----------------
loss : 0.03781381994485855
---------------- epoch : 1 and iteration : 21 ----------------
loss : 0.06151648983359337
---------------- epoch : 1 and iteration : 31 ----------------
loss : 0.022843612357974052
---------------- epoch : 1 and iteration : 41 ----------------
loss : 0.03822172060608864
---------------- epoch : 2 and iteration : 1 ----------------
loss : 0.08707098662853241
---------------- epoch : 2 and iteration : 11 ----------------
loss : 0.05347324162721634
---------------- epoch : 2 and iteration : 21 ----------------
loss : 0.004531173966825008
---------------- epoch : 2 and iteration : 31 ----------------
loss : 0.054783109575510025
---------------- epoch : 2 and iteration : 41 ----------------
loss : 0.1157364621758461
---------------- epoch : 3 and iteration : 1 ----------------
loss : 0.03612862527370453
----------

In [62]:
number_of_epochs = 100

# number of iteration = total sample size / total batch size
num_total_sample = len(wine_data_set)
number_of_iteration  = num_total_sample / wine_data_laoder.batch_size

for epoch in range(number_of_epochs):

    for i,(features,label) in enumerate(train):

        y_predict = model(features)

        loss = loss_function(y_predict,label)

        loss.backward()

        criterian.step()

        criterian.zero_grad()
        if i % 10 == 0:
            print(f"---------------- epoch : {epoch +1 } and iteration : {i+1} ----------------")

            print(f"loss : {loss}")

#ERROR: look there is iteration upto total training size

---------------- epoch : 1 and iteration : 1 ----------------
loss : 0.12505996227264404
---------------- epoch : 1 and iteration : 11 ----------------
loss : 0.021922564134001732
---------------- epoch : 1 and iteration : 21 ----------------
loss : 0.07766139507293701
---------------- epoch : 1 and iteration : 31 ----------------
loss : 0.011426279321312904
---------------- epoch : 1 and iteration : 41 ----------------
loss : 0.036220937967300415
---------------- epoch : 1 and iteration : 51 ----------------
loss : 0.03864816948771477
---------------- epoch : 1 and iteration : 61 ----------------
loss : 0.006207630503922701
---------------- epoch : 1 and iteration : 71 ----------------
loss : 0.0033814841881394386
---------------- epoch : 1 and iteration : 81 ----------------
loss : 0.008251902647316456
---------------- epoch : 1 and iteration : 91 ----------------
loss : 0.04013736918568611
---------------- epoch : 1 and iteration : 101 ----------------
loss : 0.3840094208717346
----