In [1]:
import torch

In [2]:
import torch.nn as nn
torch.manual_seed(42)

<torch._C.Generator at 0x28f4293d650>

In [3]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

housing = fetch_california_housing(as_frame=True)
housing_df = housing.frame
X, y = housing_df.drop(columns=["MedHouseVal"]), housing_df["MedHouseVal"]
X_train_full, X_test, y_train_full, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, test_size=0.25, random_state=42
)
X_train = torch.tensor(X_train.values, dtype=torch.float32)
X_valid = torch.tensor(X_valid.values, dtype=torch.float32)
X_test = torch.tensor(X_test.values, dtype=torch.float32)
means = torch.tensor(X_train.mean(dim=0, keepdim=True), dtype=torch.float32) # compute means of each column
stds = torch.tensor(X_train.std(dim=0, keepdim=True), dtype=torch.float32) # compute stds of each column
X_train = (X_train - means) / stds
X_valid = (X_valid - means) / stds
X_test = (X_test - means) / stds

  means = torch.tensor(X_train.mean(dim=0, keepdim=True), dtype=torch.float32) # compute means of each column
  stds = torch.tensor(X_train.std(dim=0, keepdim=True), dtype=torch.float32) # compute stds of each column


In [4]:
n_features = X_train.shape[1]

In [7]:
model = nn.Linear(in_features=n_features, out_features=1)

In [8]:
model.bias

Parameter containing:
tensor([0.0523], requires_grad=True)

In [9]:
model.weight

Parameter containing:
tensor([[-0.2594,  0.3073,  0.0662,  0.2612,  0.0479,  0.1705, -0.0499,  0.2725]],
       requires_grad=True)

In [10]:
model(X_train[:2])

tensor([[ 0.4442],
        [-0.6848]], grad_fn=<AddmmBackward0>)

In [11]:
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
mse = nn.MSELoss()

## simple training

In [12]:
y_train = torch.FloatTensor(y_train.values).reshape(-1, 1)
y_valid = torch.FloatTensor(y_valid.values).reshape(-1, 1)
y_test = torch.FloatTensor(y_test.values).reshape(-1, 1)

In [13]:
def train_bgd(model, optimizer, loss_fn, X_train, y_train, n_epochs):
    for epoch in range(n_epochs):
        y_pred = model(X_train)
        loss = loss_fn(y_pred, y_train)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        if (epoch+1) % 100 == 0:
            print(f"Epoch {epoch+1}/{n_epochs}, Loss: {loss.item():.4f}")

In [14]:
train_bgd(model, optimizer, mse, X_train, y_train, n_epochs=1000)

Epoch 100/1000, Loss: 0.7340
Epoch 200/1000, Loss: 0.6056
Epoch 300/1000, Loss: 0.5784
Epoch 400/1000, Loss: 0.5604
Epoch 500/1000, Loss: 0.5478
Epoch 600/1000, Loss: 0.5388
Epoch 700/1000, Loss: 0.5325
Epoch 800/1000, Loss: 0.5279
Epoch 900/1000, Loss: 0.5246
Epoch 1000/1000, Loss: 0.5221


In [15]:
X_new = X_test[:3]  # pretend these are new instances
with torch.no_grad():
    y_pred = model(X_new)  # use the trained model to make predictions
y_pred

tensor([[0.8070],
        [1.7034],
        [2.6691]])

In [16]:
y_test[:3]

tensor([[0.4770],
        [0.4580],
        [5.0000]])

## Implementing a Regression MLP

In [17]:
torch.manual_seed(42)
model = nn.Sequential(
    nn.Linear(in_features=n_features, out_features=50),
    nn.ReLU(),
    nn.Linear(in_features=50, out_features=40),
    nn.ReLU(),
    nn.Linear(in_features=40, out_features=1)
)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
train_bgd(model, optimizer, mse, X_train, y_train, n_epochs=1000)

Epoch 100/1000, Loss: 0.6420
Epoch 200/1000, Loss: 0.5797
Epoch 300/1000, Loss: 0.5353
Epoch 400/1000, Loss: 0.5015
Epoch 500/1000, Loss: 0.4755
Epoch 600/1000, Loss: 0.4561
Epoch 700/1000, Loss: 0.4412
Epoch 800/1000, Loss: 0.4291
Epoch 900/1000, Loss: 0.4189
Epoch 1000/1000, Loss: 0.4103


## Implementing Mini-Batch Gradient Descent Using DataLoaders

In [18]:
from torch.utils.data import TensorDataset, DataLoader

train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [24]:
device = torch.device("cpu")

In [25]:
for X_batch, y_batch in train_loader:
    X_batch, y_batch = X_batch.to(device), y_batch.to(device)
    print(X_batch.shape, y_batch.shape)
    break

torch.Size([32, 8]) torch.Size([32, 1])


In [27]:
torch.manual_seed(42)
model = nn.Sequential(
    nn.Linear(in_features=n_features, out_features=50),
    nn.ReLU(),
    nn.Linear(in_features=50, out_features=40),
    nn.ReLU(),
    nn.Linear(in_features=40, out_features=1)
)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [28]:
def train(model, optimizer, criterion, train_loader, n_epochs):
    model.train()
    for epoch in range(n_epochs):
        total_loss = 0.
        for X_batch, y_batch in train_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            y_pred = model(X_batch)
            loss = criterion(y_pred, y_batch)
            total_loss += loss.item()
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

        mean_loss = total_loss / len(train_loader)
        print(f"Epoch {epoch + 1}/{n_epochs}, Loss: {mean_loss:.4f}")