In [1]:
import numpy as np
from tqdm import tqdm
from time import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from sklearn.datasets import load_boston

In [2]:
torch.manual_seed(42)

n_fts = 32
n_hid = 2 * n_fts

## Neural Network / MLP

# Define the model
class MLP(nn.Module):
    def __init__(self, n_fts, n_hid): # Define layers in the constructor
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(13, n_hid, bias=True)     # set bias = True to include it
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(n_hid, n_fts, bias=True)    # set bias = True to include it
        self.fc3 = nn.Linear(n_fts, 1, bias=True)
        
    def forward(self, x): # Define forward pass in the forward method
        # print('lin', self.fc1.weight.dtype)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = MLP(n_fts, n_hid)

# Dataloader helper
class MLPDataset(torch.utils.data.Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X)
        self.y = torch.from_numpy(y)

    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, i):
        return self.X[i], self.y[i]

# Hyperparams for model
lr = 1e-2
reg_val = 1e-4
criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=reg_val)

# Define function to call for each training epoch (one complete pass over the training set)
# (sourced from /lecture/fmnist_mlp_torch.ipynb of Course Github)
def train(model, trainloader, criterion, optimizer, device, disable=True): #disable controls tqdm visibility
    model.train() # set model to training mode
    running_loss = 0
    with tqdm(total=len(trainloader), desc=f"Train", unit="batch", disable=disable) as pbar:
        # print(trainloader)
        for n_batch, (samples, labels) in enumerate(trainloader): # Iterate over batches
            samples, labels = samples.to(device), labels.to(device) # Move batch to device
            optimizer.zero_grad()
            samples = samples.to(torch.float32)
            labels = labels.to(torch.float32)
            # print('Sample', labels.dtype)
            output = model(samples) # Forward pass
            loss = criterion(output, labels) # Compute loss
            loss.backward() # Backward pass
            optimizer.step() # Update weights
            running_loss += loss.item()
            pbar.set_postfix({'loss': loss.item()})
            pbar.update() # Update progress bar
    return np.sqrt(running_loss) / len(trainloader) # return RMSE loss

# Define function to call for each validation epoch (one complete pass over the validation set)
# (sourced from /lecture/fmnist_mlp_torch.ipynb of Course Github)
def validate(model, valloader, criterion, device, disable=True): #disable controls tqdm visibility
    model.eval() # set model to evaluation mode (e.g. turn off dropout, batchnorm, etc.)
    running_loss = 0
    with torch.no_grad(): # no need to compute gradients for validation
        with tqdm(total=len(valloader), desc=f"Eval", unit="batch", disable=disable) as pbar:
            for n_batch, (samples, labels) in enumerate(valloader): # Iterate over batches
                samples, labels = samples.to(device), labels.to(device) # Move batch to device
                samples = samples.to(torch.float32)
                labels = labels.to(torch.float32)
                output = model(samples) # Forward pass
                loss = criterion(output, labels) # Compute loss
                running_loss += loss.item() 
                pbar.set_postfix({'loss': loss.item()})
                pbar.update() # Update progress bar
    return np.sqrt(running_loss) / len(valloader)  # return RMSE loss

## Set device for training
# (sourced from /lecture/fmnist_mlp_torch.ipynb of Course Github)

device = torch.device("cpu")
print(f'Using device: {device}')
model.to(device) # Move model to device

Using device: cpu


MLP(
  (fc1): Linear(in_features=13, out_features=64, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=1, bias=True)
)

In [3]:
X, y = load_boston(return_X_y=True)
# dataset = MLPDataset(X, y)

X_train1, X_test, y_train1, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

trainset = MLPDataset(X_train, y_train)
valset = MLPDataset(X_val, y_val)
testset = MLPDataset(X_test, y_test)

# Run training and validation loop
# Save the quickest model to converge

n_epochs = 30

## Vary batchsizes for smallest run-time
# Sample batchsizes for training  
batchsizes_dict = {16: None, 32: None, 64: None, 128: None, 256: None} # to store runtimes
rts = [] # for runtimes

for B in batchsizes_dict:

    print("-----------------------------------------------------------------------")
    print(f"Batch size = {B}")

    seeder = 33

    # refresh model for a fair iteration
    model = MLP(n_fts, n_hid)
    model.to(device)
    # for refreshed model
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=reg_val)

    best_rmse = 1000
    train_loss_history = []
    val_loss_history = []

    # time marker
    start = time()
    # Iterate over epochs
    # (sourced from /lecture/fmnist_mlp_torch.ipynb of Course Github)
    for epoch in range(n_epochs):
        print(f"Epoch {epoch+1} of {n_epochs}")
        seeder += 1
        # Shuffle the data at the start of each epoch (only useful for training set)
        trainloader = torch.utils.data.DataLoader(trainset, batch_size=B, shuffle=True, worker_init_fn=lambda id: np.random.seed(id+seeder))
        valloader = torch.utils.data.DataLoader(valset, batch_size=B, shuffle=False, worker_init_fn=lambda id: np.random.seed(id+seeder))
        
        train_loss = train(model, trainloader, criterion, optimizer, device, disable=True) # Train
        val_loss = validate(model, valloader, criterion, device, disable=True) # Validate
        # mark the end of this epoch's training
        runtime = time() - start
        train_loss_history.append(train_loss)
        val_loss_history.append(val_loss)
        if val_loss < best_rmse: # Save best model
            # print("Updating best model")
            best_loss = val_loss
            torch.save(model.state_dict(), "best_model_pts.pt") # saving model parameters ("state_dict") saves memory
        # if val_loss <= 3 and train_loss <= 3: # stop at 80% accuracy
        #     runtime_model = runtime
        #     print("Attained Stopping Condition")
        #     break
    
    # rts.append(runtime_model)
    
    print("-----------------------------------------------------------------------")

# B_opt = min(batchsizes_dict, key=batchsizes_dict.get)
# print(f"Batch size of {B_opt} converges the fastest")


    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np


        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_h

-----------------------------------------------------------------------
Batch size = 16
Epoch 1 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eeed220>
Epoch 2 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eef01f0>
Epoch 3 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eefa070>
Epoch 4 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eeed220>
Epoch 5 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eef0190>
Epoch 6 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eefa070>
Epoch 7 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eeed220>
Epoch 8 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eef0160>
Epoch 9 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eefa070>
Epoch 10 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eeed220>
Epoch 11 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eef01c0>
Epoch 12 of 30
<torch.utils.data.dataloader.DataLoader object a

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 25 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eeed8b0>
Epoch 26 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eeed280>
Epoch 27 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eef0130>
Epoch 28 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eefa0a0>
Epoch 29 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eeed280>
Epoch 30 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eeed8b0>
-----------------------------------------------------------------------
-----------------------------------------------------------------------
Batch size = 64
Epoch 1 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eefa070>
Epoch 2 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eeed280>
Epoch 3 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eef0070>
Epoch 4 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eefa070>
Epoch 5 of 30
<torch.utils.data.dataloader.DataLoader object at 

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 29 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eeed430>
Epoch 30 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eeed220>
-----------------------------------------------------------------------
-----------------------------------------------------------------------
Batch size = 128
Epoch 1 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eefa040>
Epoch 2 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eeed430>
Epoch 3 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eeed8b0>
Epoch 4 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eefa040>
Epoch 5 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eeed220>
Epoch 6 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eeed400>
Epoch 7 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eefa040>
Epoch 8 of 30
<torch.utils.data.dataloader.DataLoader object at 0x14eeed8b0>
Epoch 9 of 30
<torch.utils.data.dataloader.DataLoader object at 0x1

  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


In [4]:
a = np.array([1, 2, 3, 4])
b = np.array([5, 6, 7])

a = a.reshape(len(a), 1)
b = b.reshape(len(b), 1)

c = np.vstack((a,b))
c

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7]])

In [5]:
d = np.array([[0]])
d = np.vstack((d, c))
d

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7]])

In [31]:
np_a = np.array([1, 2, 3, 4])
np_d = np.array([1, 2, 3, 4])
np_b = np.array([3, 4, 5, 6])
np_c = np.array([4, 5, 6, 7])

a_ = np_a.reshape(4, 1)
b_ = np_b.reshape(4, 1)
c_ = np_c.reshape(4, 1)
d_ = np_d.reshape(4, 1)

a = torch.from_numpy(a_)
b = torch.from_numpy(b_)
c = torch.from_numpy(c_)
d = torch.from_numpy(d_)

a = a.to(torch.float32)
b = b.to(torch.float32)
c = c.to(torch.float32)
d = d.to(torch.float32)

ab = np.vstack((a,b))
cd = np.vstack((c,d))

mean_squared_error(np_a, np_b, squared=False)
mean_squared_error(np_d, np_c, squared=False)
mean_squared_error(ab, cd, squared=False)

2.5495098

In [39]:
crit = nn.MSELoss()
loss1 = crit(a, b) * 4
loss2 = crit(c, d) * 4

loss = np.sqrt((loss2.item() + loss1.item())/8)
loss
# np.sqrt(loss)

2.5495097567963922

In [4]:
arr = np.array([1,2,3,4,5,6,7,8,9])

In [9]:
x = None
x == None

True

In [6]:
def testrelu(x):
    for i in range(len(x)):
        x[i] = np.max(0 ,x[i])
    return x

AxisError: axis 1 is out of bounds for array of dimension 0

In [35]:
lll = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]
ll = np.array(lll)
ll = np.floor(ll/5).astype(int)
ll = ll * 5
ll = ll.tolist()
ll

[0, 0, 0, 0, 5, 5, 5, 5, 5, 10, 10, 10, 10, 10, 15, 15, 15]

In [37]:
ll = [1,2,3,4,5]
min(ll)

1