In [2]:
import pandas as pd

url = 'http://archive.ics.uci.edu/ml/' \
'machine-learning-databases/auto-mpg/auto-mpg.data'
column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower',
'Weight', 'Acceleration', 'Model Year', 'Origin']

df = pd.read_csv(url, names=column_names,
na_values = "?", comment='\t',
sep=" ", skipinitialspace=True)

df.head()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Origin
0,18.0,8,307.0,130.0,3504.0,12.0,70,1
1,15.0,8,350.0,165.0,3693.0,11.5,70,1
2,18.0,8,318.0,150.0,3436.0,11.0,70,1
3,16.0,8,304.0,150.0,3433.0,12.0,70,1
4,17.0,8,302.0,140.0,3449.0,10.5,70,1


In [3]:
df = df.dropna()
df = df.reset_index(drop=True)

In [4]:
import sklearn
import sklearn.model_selection

df_train, df_test = sklearn.model_selection.train_test_split(
df, train_size=0.8, random_state=1)

train_stats = df_train.describe().transpose()
numeric_column_names = [
'Cylinders', 'Displacement',
'Horsepower', 'Weight',
'Acceleration']

In [5]:
df_train_norm, df_test_norm = df_train.copy(), df_test.copy()
for col_name in numeric_column_names:
    mean = train_stats.loc[col_name, 'mean']
    std = train_stats.loc[col_name, 'std']
    df_train_norm.loc[:, col_name] = \
    (df_train_norm.loc[:, col_name] - mean)/std
    df_test_norm.loc[:, col_name] = \
    (df_test_norm.loc[:, col_name] - mean)/std
df_train_norm.tail()

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Origin
203,28.0,-0.824303,-0.90102,-0.736562,-0.950031,0.255202,76,3
255,19.4,0.351127,0.4138,-0.340982,0.29319,0.548737,78,1
72,13.0,1.526556,1.144256,0.713897,1.339617,-0.625403,72,1
235,30.5,-0.824303,-0.89128,-1.053025,-1.072585,0.475353,77,1
37,14.0,1.526556,1.563051,1.636916,1.47042,-1.35924,71,1


In [10]:
import torch

boundaries = torch.tensor([73, 76, 79])
v = torch.tensor(df_train_norm['Model Year'].values)
df_train_norm['Model Year Bucketized'] = torch.bucketize(
    v, boundaries, right = True
)

v = torch.tensor(df_test_norm['Model Year'].values)
df_test_norm['Model Year Bucketized'] = torch.bucketize(
    v, boundaries, right = True
)

In [7]:

numeric_column_names.append('Model Year Bucketized')

In [11]:
from torch.nn.functional import one_hot

total_origin = len(set(df_train_norm['Origin']))
origin_encoded = one_hot(torch.from_numpy(
            df_train_norm['Origin'].values) % total_origin)
x_train_numeric = torch.tensor(
    df_train_norm[numeric_column_names].values
)
x_train = torch.cat([x_train_numeric, origin_encoded], 1).float()


origin_encoded = one_hot(torch.from_numpy(
            df_test_norm['Origin'].values) % total_origin)
x_test_numeric = torch.tensor(
    df_test_norm[numeric_column_names].values
)
x_test = torch.cat([x_test_numeric, origin_encoded], 1).float()

In [12]:
y_train = torch.tensor(df_train_norm['MPG'].values).float()
y_test = torch.tensor(df_test_norm['MPG'].values).float()

In [14]:
from torch.utils.data import TensorDataset, DataLoader

train_ds = TensorDataset(x_train, y_train)
batch_size = 8
torch.manual_seed(1)
train_dl = DataLoader(train_ds, batch_size, shuffle=True)

In [31]:
input_size = x_train.shape[1]
output_shape = 1

import torch.nn as nn
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.l1 = nn.Linear(input_size, 8)
        self.a1 = nn.ReLU()
        self.l2 = nn.Linear(8, 4)
        self.a2 = nn.ReLU()
        self.l3 = nn.Linear(4, 1)
    def forward(self, x):
        x = self.l1(x)
        x = self.a1(x)
        x = self.l2(x)
        x = self.a2(x)
        x = self.l3(x)

        return x
    
model = Model()
model

Model(
  (l1): Linear(in_features=9, out_features=8, bias=True)
  (a1): ReLU()
  (l2): Linear(in_features=8, out_features=4, bias=True)
  (a2): ReLU()
  (l3): Linear(in_features=4, out_features=1, bias=True)
)

In [32]:
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters() , lr= 0.001)

In [33]:
num_epochs = 200
log_epochs = 20

loss_hist = [0]*num_epochs

for epoch in range(num_epochs):
    for x_batch, y_batch in train_dl:
        pred = model(x_batch)[:, 0]
        loss = loss_fn(pred, y_batch)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        loss_hist[epoch] += loss.item()
    if epoch % log_epochs == 0 :
        print(f'Epoch {epoch} Loss : {loss_hist[epoch]/len(train_dl):4f}')

Epoch 0 Loss : 316.284234
Epoch 20 Loss : 8.762207
Epoch 40 Loss : 7.380253
Epoch 60 Loss : 7.700083
Epoch 80 Loss : 7.119501
Epoch 100 Loss : 6.661405
Epoch 120 Loss : 6.529685
Epoch 140 Loss : 6.567150
Epoch 160 Loss : 6.618022
Epoch 180 Loss : 8.609287


In [34]:
with torch.no_grad():
    pred = model(x_test.float())[:, 0]
    loss = loss_fn(pred, y_test)
    print(f'Test MSE: {loss.item():.4f}')
    print(f'Test MAE: {nn.L1Loss()(pred, y_test).item():.4f}')


Test MSE: 12.6813
Test MAE: 2.6612
