In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import sklearn
import torch
import random
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

In [3]:
def set_seed(seed_value = 42):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed_value)
set_seed()

In [4]:
X, y = make_regression(
    n_samples = 10000,
    n_features = 40,
    n_informative = 35,
    bias=0.895
)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size = 0.3)

In [6]:
class customDataset(Dataset):
    def __init__(self, X_type, y_type):
        self.X_type = X_type
        self.y_type = y_type
        
    def __len__(self):
        return len(self.X_type)
    
    def __getitem__(self, index):
        return {
            "x": torch.Tensor(self.X_type[index]),
            "y": torch.Tensor([self.y_type[index]])
            }

In [7]:
train_dataset = customDataset(X_train, y_train)
test_dataset  = customDataset(X_test, y_test)

In [12]:
train_dataloader = DataLoader(
    dataset = train_dataset,
    batch_size = 1000,
    num_workers = 4
)
test_dataloader = DataLoader(
    dataset = test_dataset,
    batch_size = 1000,
    num_workers = 4
)

In [13]:
W = torch.nn.Parameter(torch.randn((X.shape[1], 1), dtype = torch.float32))
b = torch.nn.Parameter(torch.randn(1, dtype = torch.float32))

In [14]:
model = lambda X,W,b : torch.matmul(X,W) + b

In [15]:
num_epochs = 15
loss_function = torch.nn.MSELoss()
learning_rate = 1e-1

for epoch in range(num_epochs):
    epoch_loss = 0
    for batch in train_dataloader:
        output = model(X= batch["x"], W= W, b= b)
        loss = loss_function(output, batch["y"])
        loss.backward()
        
        with torch.no_grad():
            W = W - learning_rate*W.grad
            b = b - learning_rate*b.grad
        
        W.requires_grad_(True)
        b.requires_grad_(True)
        
        epoch_loss += loss.item()
    print(f"epoch = {epoch}, loss = {epoch_loss/len(train_dataloader)}")
    
        

epoch = 0, loss = 37840.38071986607
epoch = 1, loss = 1805.4407566615514
epoch = 2, loss = 91.29671805245536
epoch = 3, loss = 4.833731617246356
epoch = 4, loss = 0.26535344389932497
epoch = 5, loss = 0.014993599349898952
epoch = 6, loss = 0.000867343696882017
epoch = 7, loss = 5.1142833269425735e-05
epoch = 8, loss = 3.0700227122386944e-06
epoch = 9, loss = 1.8774064106505e-07
epoch = 10, loss = 1.2429019718191675e-08
epoch = 11, loss = 2.890777173192305e-09
epoch = 12, loss = 2.3364945533899117e-09
epoch = 13, loss = 2.1871178519055387e-09
epoch = 14, loss = 2.1891487353034987e-09


#### Evaluating on Test Dataset

In [16]:
from sklearn.metrics import mean_squared_error

for batch in test_dataloader:
    error = 0
    with torch.no_grad():
        y_pred = model(X = batch["x"] , W = W, b = b)
        error += mean_squared_error(batch["y"].numpy(), y_pred.numpy())
        
print(f"Mean Squared Error :- {error/len(test_dataloader):.12f}")


Mean Squared Error :- 0.000000000732


#### Evaluating on Train Set

In [17]:
for batch in train_dataloader:
    error = 0
    with torch.no_grad():
        y_pred = model(X = batch["x"] , W = W, b = b)
        error += mean_squared_error(batch["y"].numpy(), y_pred.numpy())
    
print(f"Mean Squared Error :- {error/len(train_dataloader):.12f}")

Mean Squared Error :- 0.000000000300
