In [12]:
import pandas as pd

url = 'http://archive.ics.uci.edu/ml/' 'machine-learning-databases/auto-mpg/auto-mpg.data'

coloumn_names =  ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration', 'Model Year', 'Origin']

df = pd.read_csv(url, names=coloumn_names, na_values='?', comment='\t', sep=' ', skipinitialspace=True)

df = df.dropna()
df = df.reset_index(drop=True)



In [13]:
import sklearn
import sklearn.model_selection
import torch

df_train,df_test = sklearn.model_selection.train_test_split(df,train_size=0.8 , random_state=42)

train_stats = df_train.describe().transpose()

numeric_coloumn_names=['Cylinders','Displacement','Horsepower','Weight','Acceleration']

df_train_norm, df_test_norm = df_train.copy(), df_test.copy()


for col_name in numeric_coloumn_names:
    mean = train_stats.loc[col_name,'mean']
    std = train_stats.loc[col_name,'std']
    df_train_norm.loc[:, col_name] = \
    (df_train_norm.loc[:, col_name] - mean)/std
    df_test_norm.loc[:, col_name] = \
    (df_test_norm.loc[:, col_name] - mean)/std


boundaries = torch.tensor([73, 76, 79])
v = torch.tensor(df_train_norm['Model Year'].values)
df_train_norm['Model Year Bucketed'] = torch.bucketize(
    v, boundaries, right=True
)
v = torch.tensor(df_test_norm['Model Year'].values)
df_test_norm['Model Year Bucketed'] = torch.bucketize(
    v, boundaries, right=True
)

numeric_coloumn_names.append('Model Year Bucketed')



 -0.87178774  0.30437417  0.30437417  1.48053607  0.30437417 -0.87178774
  0.30437417 -0.87178774 -0.87178774  0.30437417  0.30437417 -0.87178774
  1.48053607 -0.87178774  0.30437417  1.48053607  0.30437417  1.48053607
 -0.87178774 -0.87178774 -0.87178774 -0.87178774 -0.87178774 -0.87178774
  1.48053607 -0.87178774 -0.87178774 -0.87178774 -0.87178774 -0.87178774
 -0.87178774  1.48053607  0.30437417 -0.87178774  0.30437417 -0.87178774
  0.30437417  0.30437417 -0.87178774 -0.87178774 -0.87178774 -0.87178774
 -0.87178774  0.30437417 -0.87178774 -0.87178774 -0.87178774 -0.87178774
 -0.87178774  1.48053607 -0.87178774 -0.87178774 -0.87178774  0.30437417
  1.48053607  1.48053607  0.30437417 -0.87178774 -0.87178774 -0.87178774
 -0.87178774 -0.87178774  1.48053607  0.30437417 -0.87178774  1.48053607
 -0.87178774  1.48053607 -0.87178774 -0.87178774 -0.87178774 -0.87178774
  1.48053607 -0.87178774 -0.87178774  1.48053607 -0.87178774 -0.87178774
  1.48053607  1.48053607  1.48053607  1.48053607  0

In [14]:
from torch.nn.functional import one_hot
total_origin = len(set(df_train_norm['Origin']))
origin_encoded = one_hot(
    torch.from_numpy(df_train_norm['Origin'].values) % total_origin
)

x_train_numeric = torch.tensor(
    df_train_norm[numeric_coloumn_names].values)
x_train = torch.cat([x_train_numeric, origin_encoded], 1).float()
#testset

origin_encoded = one_hot(torch.from_numpy(
    df_test_norm['Origin'].values) % total_origin)
x_test_numeric = torch.tensor(
    df_test_norm[numeric_coloumn_names].values)
x_test = torch.cat([x_test_numeric, origin_encoded], 1).float()

y_train = torch.tensor(df_train_norm['MPG'].values).float()
y_test = torch.tensor(df_test_norm['MPG'].values).float()


In [15]:
from torch.utils.data import TensorDataset, DataLoader



train_ds = TensorDataset(x_train,y_train)
batch_size= 8
torch.manual_seed(42)
train_dl = DataLoader(train_ds,batch_size,shuffle=True)


In [22]:
import torch.nn as nn
hidden_units =[8,4]
input_size = x_train.shape[1]
all_layers=[]

for input in hidden_units:
  layer = torch.nn.Linear(input_size,input)
  all_layers.append(layer)
  all_layers.append(torch.nn.ReLU())
  input_size = input
all_layers.append(torch.nn.Linear(input_size,1))
model = nn.Sequential(*all_layers)

loss_fn = nn.MSELoss()
optimizer =torch.optim.Adam(model.parameters(),lr=0.001)

torch.manual_seed(42)

num_epochs=200
log_epochs=20

for epoch in range(num_epochs):
   loss_hist_train = 0
   for x_batch,y_batch in train_dl:
    y_pred = model(x_batch)[:,0]
    loss = loss_fn(y_pred,y_batch)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    loss_hist_train += loss.item()

    if epoch % log_epochs==0:
      print(f'Epoch {epoch}  Loss '
f'{loss_hist_train/len(train_dl):.4f}')

Epoch 0  Loss 21.9548
Epoch 0  Loss 36.6743
Epoch 0  Loss 53.7708
Epoch 0  Loss 66.7633
Epoch 0  Loss 80.7989
Epoch 0  Loss 98.9014
Epoch 0  Loss 111.3136
Epoch 0  Loss 135.6047
Epoch 0  Loss 148.2592
Epoch 0  Loss 167.4387
Epoch 0  Loss 180.4427
Epoch 0  Loss 199.4265
Epoch 0  Loss 211.2907
Epoch 0  Loss 224.7955
Epoch 0  Loss 242.4589
Epoch 0  Loss 256.7277
Epoch 0  Loss 270.4195
Epoch 0  Loss 296.0091
Epoch 0  Loss 312.3442
Epoch 0  Loss 325.8993
Epoch 0  Loss 345.8016
Epoch 0  Loss 359.3058
Epoch 0  Loss 377.1546
Epoch 0  Loss 390.9140
Epoch 0  Loss 410.7626
Epoch 0  Loss 420.6164
Epoch 0  Loss 437.8029
Epoch 0  Loss 460.5438
Epoch 0  Loss 473.5818
Epoch 0  Loss 489.6910
Epoch 0  Loss 500.4035
Epoch 0  Loss 520.4336
Epoch 0  Loss 534.4771
Epoch 0  Loss 553.4636
Epoch 0  Loss 571.1034
Epoch 0  Loss 587.1389
Epoch 0  Loss 600.4144
Epoch 0  Loss 609.8326
Epoch 0  Loss 624.6244
Epoch 0  Loss 630.8022
Epoch 20  Loss 0.5570
Epoch 20  Loss 0.7238
Epoch 20  Loss 0.8120
Epoch 20  Loss 1.142