#### Linear Regression using StandardScaler vs MinMaxScaler for scaling y val

In [26]:
import torch
import torch.nn as nn

In [27]:
from torch.utils.data import Dataset
class Data(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y)
        self.len = self.X.shape[0]
    
    def __getitem__(self, index):
        return self.X[index], self.y[index]
    
    def __len__(self):
        return self.len

In [28]:
class LinearReg(nn.Module):
    def __init__(self,input_dim,output_dim):
        super(LinearReg,self).__init__()
        self.layer = nn.Linear(input_dim,output_dim)
        print(self.layer.weight)
        print(self.layer.bias)

    def forward(self,X):
        X = self.layer(X) # layer has the w and b (random at start)
        return X

In [29]:
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [51]:
X, y = make_regression(n_samples= 1000, n_features= 2, n_informative= 2, n_targets= 1, noise= 30, random_state=13)

Using StandardScaler

In [52]:
X_train , X_test, y_train, y_test = train_test_split(X, y,test_size=0.2,random_state=13)

In [53]:
from sklearn.preprocessing import StandardScaler
scaler_y = StandardScaler()
y_train = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()
y_test = scaler_y.transform(y_test.reshape(-1, 1)).flatten()

In [54]:
traindata = Data(X_train,y_train)
testdata = Data(X_test,y_test)

In [55]:
from torch.utils.data import DataLoader
batch_size = 64
num_workers = 0

train_loader = DataLoader(
    traindata,
    batch_size = batch_size, 
    shuffle = True, 
    num_workers = num_workers)

In [56]:
epochs = 100
lr = 0.01
X_dim = X_train.shape[1]
y_dim = y_train.shape[1] if len(y_train.shape) > 1 else 1
model = LinearReg(X_dim,y_dim)
criterion = nn.MSELoss()
# optimizer = torch.optim.SGD(model.parameters(),lr=lr)

Parameter containing:
tensor([[0.0435, 0.5273]], requires_grad=True)
Parameter containing:
tensor([-0.0418], requires_grad=True)


In [57]:
for epoch in range(epochs):
    running_loss = 0.0
    for i,(xi, yi) in enumerate(train_loader):
        outputs = model(xi)
        yi = yi.view(-1,1) # done to match the output shape
        
        loss = criterion(outputs,yi)
        
        model.zero_grad()
        loss.backward()

        with torch.no_grad():
            for param in model.parameters():
                param -= lr * param.grad

        # optimizer.step()
        running_loss += loss.item()
    if not ((epoch + 1) % (epochs // 10)):
        print(f'Epochs:{epoch + 1:5d} | ' \
            f'Batches per epoch: {i + 1:3d} | ' \
            f'Loss: {running_loss / (i + 1):.10f}')

Epochs:   10 | Batches per epoch:  13 | Loss: 0.1146976724
Epochs:   20 | Batches per epoch:  13 | Loss: 0.1102002716
Epochs:   30 | Batches per epoch:  13 | Loss: 0.1084873120
Epochs:   40 | Batches per epoch:  13 | Loss: 0.1084082602
Epochs:   50 | Batches per epoch:  13 | Loss: 0.1078679000
Epochs:   60 | Batches per epoch:  13 | Loss: 0.1091840600
Epochs:   70 | Batches per epoch:  13 | Loss: 0.1104319216
Epochs:   80 | Batches per epoch:  13 | Loss: 0.1084396186
Epochs:   90 | Batches per epoch:  13 | Loss: 0.1069400482
Epochs:  100 | Batches per epoch:  13 | Loss: 0.1081726161


In [58]:
ypred = model(torch.FloatTensor(X_test))

In [59]:
r2_score(y_test,ypred.detach().numpy())

0.9008026435059427

In [60]:
mean_absolute_error(y_test,ypred.detach().numpy())

0.24817594646872024

In [61]:
mean_squared_error(y_test,ypred.detach().numpy())

0.10280250739739649

Using MinMax Scaler

In [62]:
X_train , X_test, y_train, y_test = train_test_split(X, y,test_size=0.2,random_state=13)

In [63]:
from sklearn.preprocessing import MinMaxScaler
scaler_y = MinMaxScaler()
y_train = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()
y_test = scaler_y.transform(y_test.reshape(-1, 1)).flatten()

In [64]:
traindata = Data(X_train,y_train)
testdata = Data(X_test,y_test)

In [65]:
from torch.utils.data import DataLoader
batch_size = 64
num_workers = 0

train_loader = DataLoader(
    traindata,
    batch_size = batch_size, 
    shuffle = True, 
    num_workers = num_workers)

In [66]:
epochs = 100
lr = 0.01
X_dim = X_train.shape[1]
y_dim = y_train.shape[1] if len(y_train.shape) > 1 else 1
model = LinearReg(X_dim,y_dim)
criterion = nn.MSELoss()
# optimizer = torch.optim.SGD(model.parameters(),lr=lr)

Parameter containing:
tensor([[ 0.4001, -0.3783]], requires_grad=True)
Parameter containing:
tensor([-0.4122], requires_grad=True)


In [67]:
for epoch in range(epochs):
    running_loss = 0.0
    for i,(xi, yi) in enumerate(train_loader):
        outputs = model(xi)
        yi = yi.view(-1,1) # done to match the output shape
        
        loss = criterion(outputs,yi)
        
        model.zero_grad()
        loss.backward()

        with torch.no_grad():
            for param in model.parameters():
                param -= lr * param.grad

        # optimizer.step()
        running_loss += loss.item()
    if not ((epoch + 1) % (epochs // 10)):
        print(f'Epochs:{epoch + 1:5d} | ' \
            f'Batches per epoch: {i + 1:3d} | ' \
            f'Loss: {running_loss / (i + 1):.10f}')

Epochs:   10 | Batches per epoch:  13 | Loss: 0.0109123116
Epochs:   20 | Batches per epoch:  13 | Loss: 0.0032261905
Epochs:   30 | Batches per epoch:  13 | Loss: 0.0032324750
Epochs:   40 | Batches per epoch:  13 | Loss: 0.0031755640
Epochs:   50 | Batches per epoch:  13 | Loss: 0.0031834805
Epochs:   60 | Batches per epoch:  13 | Loss: 0.0031587689
Epochs:   70 | Batches per epoch:  13 | Loss: 0.0032177895
Epochs:   80 | Batches per epoch:  13 | Loss: 0.0032254524
Epochs:   90 | Batches per epoch:  13 | Loss: 0.0032235899
Epochs:  100 | Batches per epoch:  13 | Loss: 0.0032200247


In [68]:
ypred = model(torch.FloatTensor(X_test))

In [69]:
r2_score(y_test,ypred.detach().numpy())

0.9009169793642788

In [70]:
mean_absolute_error(y_test,ypred.detach().numpy())

0.04258436561800623

In [71]:
mean_squared_error(y_test,ypred.detach().numpy())

0.003026536834517021