In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np

import torch
import torch.nn as nn
from torch import Tensor

In [None]:
RANDOM_SEED = 71
np.random.seed(RANDOM_SEED)

## Define a Linear Regression

In [None]:
class LinearRegression(nn.Module):
    """PyTorch implementation of Linear Regression."""

    def __init__(self, input_dim: int) -> None:
        super().__init__()
        self.input_dim = input_dim
        # Linear regression.
        self.fc1 = nn.Linear(self.input_dim, 1)

    def forward(self, x: Tensor) -> Tensor:
        x = self.fc1(x)
        return x

## California Housing Dataset

In [None]:
from sklearn.datasets import fetch_california_housing

In [None]:
# Read housing data.
housing = fetch_california_housing()
X = housing.data
y = housing.target

print(X[:3])
print(y[:3])

[[ 8.32520000e+00  4.10000000e+01  6.98412698e+00  1.02380952e+00
   3.22000000e+02  2.55555556e+00  3.78800000e+01 -1.22230000e+02]
 [ 8.30140000e+00  2.10000000e+01  6.23813708e+00  9.71880492e-01
   2.40100000e+03  2.10984183e+00  3.78600000e+01 -1.22220000e+02]
 [ 7.25740000e+00  5.20000000e+01  8.28813559e+00  1.07344633e+00
   4.96000000e+02  2.80225989e+00  3.78500000e+01 -1.22240000e+02]]
[4.526 3.585 3.521]


In [None]:
print(housing.feature_names)

['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']


In [None]:
from typing import Any, Tuple
from torch.utils.data import Dataset, random_split, DataLoader
import torchvision.transforms as transforms
from torchvision.transforms import ToTensor, Lambda

batch_size = 32
is_shuffle = True


def housing_data_reader():
    housing = fetch_california_housing()
    examples, labels = housing.data, housing.target
    return (examples, labels)

# Build housing dataset.
class CustomDataset(Dataset):
    def __init__(
        self, 
        data_reader: Any = None, 
        transform: Any = None, 
        target_transform: Any = None
    ) -> None:
        self.examples, self.labels = data_reader()
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self) -> int:
        return len(self.labels)

    def __getitem__(self, idx: int) -> Tuple[Tensor, float]:
        example = self.examples[idx, :]
        label = self.labels[idx]
        if self.transform:
            example = self.transform(example)
        if self.target_transform:
            label = self.target_transform(self)
        return example, label

In [None]:
dataset = CustomDataset(
    data_reader=housing_data_reader,
)

In [None]:
# Split dataset into training and test datasets.
def split_dataset(dataset: Dataset, train_dataset_ratio: float = 0.8) -> Tuple[Dataset, Dataset]:
    num_examples = len(dataset)
    num_train_examples = int(train_dataset_ratio * num_examples)
    num_test_examples = num_examples - num_train_examples
    train_dataset, test_dataset = random_split(dataset, [num_train_examples, num_test_examples])
    return train_dataset, test_dataset

In [None]:
train_dataset, test_dataset = split_dataset(dataset, train_dataset_ratio=0.8) 

In [None]:
print(len(train_dataset), len(test_dataset))

16512 4128


In [None]:
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=is_shuffle)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=is_shuffle)

In [None]:
train_examples, train_labels = next(iter(train_dataloader))

In [None]:
train_examples[:3]

tensor([[ 2.2417e+00,  1.8000e+01,  5.1502e+00,  1.0120e+00,  1.1660e+03,
          3.5015e+00,  3.2770e+01, -1.1552e+02],
        [ 1.9028e+00,  3.6000e+01,  3.2949e+00,  9.7863e-01,  1.7110e+03,
          3.6560e+00,  3.3960e+01, -1.1822e+02],
        [ 5.5413e+00,  2.6000e+01,  6.5684e+00,  1.0711e+00,  1.0620e+03,
          2.7947e+00,  3.2810e+01, -1.1721e+02]], dtype=torch.float64)

In [None]:
1 + 1

2

In [None]:
class 