In [1]:
import numpy as np 
import pandas as pd 
import torch.nn as nn 
import matplotlib.pyplot as plt  
import seaborn as sns 
import torch.optim as optim
import torch

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
housing = pd.DataFrame(pd.read_csv("/content/drive/MyDrive/Colab Notebooks/Housing.csv")) 
housing.head() 

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [4]:
m = len(housing) 
m 

545

In [5]:
housing.shape

(545, 13)

In [6]:
housing.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 545 entries, 0 to 544
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   price             545 non-null    int64 
 1   area              545 non-null    int64 
 2   bedrooms          545 non-null    int64 
 3   bathrooms         545 non-null    int64 
 4   stories           545 non-null    int64 
 5   mainroad          545 non-null    object
 6   guestroom         545 non-null    object
 7   basement          545 non-null    object
 8   hotwaterheating   545 non-null    object
 9   airconditioning   545 non-null    object
 10  parking           545 non-null    int64 
 11  prefarea          545 non-null    object
 12  furnishingstatus  545 non-null    object
dtypes: int64(6), object(7)
memory usage: 55.5+ KB


In [7]:
housing.describe()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,parking
count,545.0,545.0,545.0,545.0,545.0,545.0
mean,4766729.0,5150.541284,2.965138,1.286239,1.805505,0.693578
std,1870440.0,2170.141023,0.738064,0.50247,0.867492,0.861586
min,1750000.0,1650.0,1.0,1.0,1.0,0.0
25%,3430000.0,3600.0,2.0,1.0,1.0,0.0
50%,4340000.0,4600.0,3.0,1.0,2.0,0.0
75%,5740000.0,6360.0,3.0,2.0,2.0,1.0
max,13300000.0,16200.0,6.0,4.0,4.0,3.0


In [8]:
 varlist =  ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning',]  
def binary_map(x): 
    return x.map({'yes': 1, "no": 0}) 
housing[varlist] = housing[varlist].apply(binary_map) 
housing.head() 

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,1,0,0,0,1,2,yes,furnished
1,12250000,8960,4,4,4,1,0,0,0,1,3,no,furnished
2,12250000,9960,3,2,2,1,0,1,0,0,2,yes,semi-furnished
3,12215000,7500,4,2,2,1,0,1,0,1,3,yes,furnished
4,11410000,7420,4,1,2,1,1,1,0,1,2,no,furnished


In [9]:

from sklearn.model_selection import train_test_split 
np.random.seed(0) 
df_train, df_test = train_test_split(housing, train_size = 0.8, test_size = 0.2, random_state = 42)
df_train.shape 

(436, 13)

In [10]:
df_test.shape

(109, 13)

In [11]:
num_vars = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking','price'] 
df_Newtrain = df_train[num_vars] 
df_Newtest = df_test[num_vars] 
df_Newtrain.head() 

Unnamed: 0,area,bedrooms,bathrooms,stories,parking,price
46,6000,3,2,4,1,7525000
93,7200,3,2,1,3,6300000
335,3816,2,1,1,2,3920000
412,2610,3,1,2,0,3430000
471,3750,3,1,2,0,3010000


In [12]:
df_Newtrain.shape

(436, 6)

In [13]:
import warnings 
warnings.filterwarnings('ignore') 
 
from sklearn.preprocessing import MinMaxScaler, StandardScaler  
scaler = MinMaxScaler() 
df_Newtrain[num_vars] = scaler.fit_transform(df_Newtrain[num_vars]) 
df_Newtrain.head(20) 
df_Newtest[num_vars] = scaler.fit_transform(df_Newtest[num_vars])
df_Newtest.head(20)

Unnamed: 0,area,bedrooms,bathrooms,stories,parking,price
316,0.365217,0.666667,0.5,0.333333,0.333333,0.2
77,0.417391,0.333333,0.5,0.666667,0.0,0.424242
360,0.203478,0.0,0.0,0.0,0.0,0.169697
90,0.286957,0.333333,0.0,0.333333,0.0,0.406061
493,0.196522,0.333333,0.0,0.0,0.0,0.090909
209,0.436522,0.333333,0.0,0.0,0.0,0.272727
176,0.593043,0.333333,0.0,0.0,0.666667,0.30303
249,0.286087,0.666667,0.5,0.333333,0.0,0.241818
516,0.133913,0.0,0.0,0.0,0.333333,0.060606
426,0.086957,0.333333,0.0,0.0,0.0,0.138788


In [14]:
y_Newtrain = df_Newtrain.pop('price') 
x_Newtrain = df_Newtrain 
y_Newtest = df_Newtest.pop('price')
x_Newtest = df_Newtest

In [15]:
y_Newtrain = torch.tensor(y_Newtrain.values).float()
y_Newtrain = y_Newtrain[:,None]
x_Newtrain = torch.tensor(x_Newtrain.values).float()
y_Newtest = torch.tensor(y_Newtest.values).float()
y_Newtest = y_Newtest[:,None]
x_Newtest = torch.tensor(x_Newtest.values).float()


In [16]:
def training_loop(n_epochs, optimizer, model, loss_fn, t_u_train, t_u_val,
                  t_c_train, t_c_val):
    for epoch in range(1, n_epochs + 1):
        t_p_train = model(t_u_train) # <1>
        loss_train = loss_fn(t_p_train, t_c_train)

        t_p_val = model(t_u_val) # <1>
        loss_val = loss_fn(t_p_val, t_c_val)
        
        optimizer.zero_grad()
        loss_train.backward() # <2>
        optimizer.step()

        if epoch == 1 or epoch % 10 == 0:
            print(f"Epoch {epoch}, Training loss {loss_train.item():.4f},"
                  f" Validation loss {loss_val.item():.4f}")

In [17]:
seq_model_0 = nn.Sequential(
            nn.Linear(5, 8), # <1>
            nn.Tanh(),
            nn.Linear(8, 5)) # <2>
seq_model_0

Sequential(
  (0): Linear(in_features=5, out_features=8, bias=True)
  (1): Tanh()
  (2): Linear(in_features=8, out_features=5, bias=True)
)

In [18]:
seq_model_1 = nn.Sequential(
            nn.Linear(5, 8), # <1>
            nn.Tanh(),
            nn.Linear(8, 4), # <2>
            nn.Tanh(),
            nn.Linear(4, 2), # <2>
            nn.Tanh(),
            nn.Linear(2, 1)) # <2>
seq_model_1

Sequential(
  (0): Linear(in_features=5, out_features=8, bias=True)
  (1): Tanh()
  (2): Linear(in_features=8, out_features=4, bias=True)
  (3): Tanh()
  (4): Linear(in_features=4, out_features=2, bias=True)
  (5): Tanh()
  (6): Linear(in_features=2, out_features=1, bias=True)
)

In [19]:
optimizer = optim.SGD(seq_model_0.parameters(), lr=1e-3)

training_loop(200,optimizer,seq_model_0,nn.MSELoss(), x_Newtrain, x_Newtest, y_Newtrain, y_Newtest)

Epoch 1, Training loss 0.0418, Validation loss 0.0474
Epoch 10, Training loss 0.0414, Validation loss 0.0471
Epoch 20, Training loss 0.0411, Validation loss 0.0467
Epoch 30, Training loss 0.0407, Validation loss 0.0464
Epoch 40, Training loss 0.0403, Validation loss 0.0461
Epoch 50, Training loss 0.0400, Validation loss 0.0458
Epoch 60, Training loss 0.0397, Validation loss 0.0455
Epoch 70, Training loss 0.0393, Validation loss 0.0452
Epoch 80, Training loss 0.0390, Validation loss 0.0449
Epoch 90, Training loss 0.0387, Validation loss 0.0446
Epoch 100, Training loss 0.0383, Validation loss 0.0443
Epoch 110, Training loss 0.0380, Validation loss 0.0440
Epoch 120, Training loss 0.0377, Validation loss 0.0438
Epoch 130, Training loss 0.0374, Validation loss 0.0435
Epoch 140, Training loss 0.0371, Validation loss 0.0432
Epoch 150, Training loss 0.0369, Validation loss 0.0430
Epoch 160, Training loss 0.0366, Validation loss 0.0427
Epoch 170, Training loss 0.0363, Validation loss 0.0425
Epo

In [20]:
optimizer = optim.SGD(seq_model_1.parameters(), lr=1e-3)

training_loop(200,optimizer,seq_model_1,nn.MSELoss(), x_Newtrain, x_Newtest, y_Newtrain, y_Newtest)

Epoch 1, Training loss 0.3811, Validation loss 0.3747
Epoch 10, Training loss 0.3526, Validation loss 0.3467
Epoch 20, Training loss 0.3235, Validation loss 0.3182
Epoch 30, Training loss 0.2969, Validation loss 0.2922
Epoch 40, Training loss 0.2726, Validation loss 0.2685
Epoch 50, Training loss 0.2504, Validation loss 0.2468
Epoch 60, Training loss 0.2301, Validation loss 0.2270
Epoch 70, Training loss 0.2115, Validation loss 0.2089
Epoch 80, Training loss 0.1946, Validation loss 0.1924
Epoch 90, Training loss 0.1791, Validation loss 0.1774
Epoch 100, Training loss 0.1650, Validation loss 0.1637
Epoch 110, Training loss 0.1521, Validation loss 0.1512
Epoch 120, Training loss 0.1404, Validation loss 0.1398
Epoch 130, Training loss 0.1297, Validation loss 0.1294
Epoch 140, Training loss 0.1199, Validation loss 0.1199
Epoch 150, Training loss 0.1109, Validation loss 0.1113
Epoch 160, Training loss 0.1028, Validation loss 0.1034
Epoch 170, Training loss 0.0954, Validation loss 0.0963
Epo