In [1]:
import pandas as pd
import torch
import torch.nn.functional as F
from torch import nn,optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader,TensorDataset
from sklearn.metrics import r2_score

In [2]:
data = pd.read_csv('Real estate.csv')
data.head()

Unnamed: 0,No,X1 transaction date,X2 house age,X3 distance to the nearest MRT station,X4 number of convenience stores,X5 latitude,X6 longitude,Y house price of unit area
0,1,2012.917,32.0,84.87882,10,24.98298,121.54024,37.9
1,2,2012.917,19.5,306.5947,9,24.98034,121.53951,42.2
2,3,2013.583,13.3,561.9845,5,24.98746,121.54391,47.3
3,4,2013.5,13.3,561.9845,5,24.98746,121.54391,54.8
4,5,2012.833,5.0,390.5684,5,24.97937,121.54245,43.1


In [3]:
x_train = data.drop('Y house price of unit area', axis=1).iloc[50:]
x_test = data.drop('Y house price of unit area', axis=1).iloc[:50]
x_train.head()

Unnamed: 0,No,X1 transaction date,X2 house age,X3 distance to the nearest MRT station,X4 number of convenience stores,X5 latitude,X6 longitude
50,51,2013.417,21.7,512.5487,4,24.974,121.53842
51,52,2013.083,31.3,1758.406,1,24.95402,121.55282
52,53,2013.583,32.1,1438.579,3,24.97419,121.5175
53,54,2013.083,13.3,492.2313,5,24.96515,121.53737
54,55,2013.083,16.1,289.3248,5,24.98203,121.54348


In [4]:
y_train= data['Y house price of unit area'].iloc[50:]
y_test= data['Y house price of unit area'].iloc[:50]
y_train

50     44.2
51     20.7
52     27.0
53     38.9
54     51.7
       ... 
409    15.4
410    50.0
411    40.6
412    52.5
413    63.9
Name: Y house price of unit area, Length: 364, dtype: float64

💠preprocessing

In [5]:
#convert to arrany
print(x_train.shape)
x_train.values[0]

(364, 7)


array([  51.     , 2013.417  ,   21.7    ,  512.5487 ,    4.     ,
         24.974  ,  121.53842])

In [6]:
x_train = torch.tensor(x_train.values , dtype=torch.float32)
y_train = torch.FloatTensor(y_train.values)

x_test = torch.tensor(x_test.values , dtype=torch.float32)
y_test = torch.FloatTensor(y_test.values)

In [7]:
x_train

tensor([[5.1000e+01, 2.0134e+03, 2.1700e+01,  ..., 4.0000e+00, 2.4974e+01,
         1.2154e+02],
        [5.2000e+01, 2.0131e+03, 3.1300e+01,  ..., 1.0000e+00, 2.4954e+01,
         1.2155e+02],
        [5.3000e+01, 2.0136e+03, 3.2100e+01,  ..., 3.0000e+00, 2.4974e+01,
         1.2152e+02],
        ...,
        [4.1200e+02, 2.0132e+03, 1.8800e+01,  ..., 7.0000e+00, 2.4979e+01,
         1.2154e+02],
        [4.1300e+02, 2.0130e+03, 8.1000e+00,  ..., 5.0000e+00, 2.4967e+01,
         1.2154e+02],
        [4.1400e+02, 2.0135e+03, 6.5000e+00,  ..., 9.0000e+00, 2.4974e+01,
         1.2154e+02]])

💠normalization

In [8]:
mu = x_train.mean(dim=0)
std = x_train.std(dim=0)
mu

tensor([ 232.5000, 2013.1508,   17.5654, 1064.2855,    4.0659,   24.9687,
         121.5335])

In [9]:
x_train = (x_train-mu) / std

In [10]:
mu = x_train.mean(dim=0)
std = x_train.std(dim=0)
print(mu)
print(std)

tensor([ 4.2575e-09, -3.8348e-04, -1.0938e-07, -1.3755e-08, -4.4540e-08,
         7.3157e-06,  1.0397e-04])
tensor([1., 1., 1., 1., 1., 1., 1.])


In [11]:
#notice
# we should use mu and std of train data for test too (not measure them again for test)
x_test = (x_test-mu) / std

💠dataloader

In [12]:
train_dataset = TensorDataset(x_train,y_train)
train_dataset.tensors[0].shape

torch.Size([364, 7])

In [13]:
train_loader = DataLoader(train_dataset, batch_size = 50 , shuffle= True)
for i , (x_batch , y_batch) in enumerate(train_loader):
    print(i , x_batch.shape , y_batch.shape)
    

0 torch.Size([50, 7]) torch.Size([50])
1 torch.Size([50, 7]) torch.Size([50])
2 torch.Size([50, 7]) torch.Size([50])
3 torch.Size([50, 7]) torch.Size([50])
4 torch.Size([50, 7]) torch.Size([50])
5 torch.Size([50, 7]) torch.Size([50])
6 torch.Size([50, 7]) torch.Size([50])
7 torch.Size([14, 7]) torch.Size([14])


In [14]:
len(train_loader)

8

In [15]:
#getting one sample batch
next(iter(train_loader))

[tensor([[ 1.1880e-01, -2.4516e-01, -1.1184e+00,  4.0153e-01, -3.5899e-01,
           2.8126e-01, -1.1393e+00],
         [-6.2249e-01,  9.6340e-01, -1.5389e+00, -6.2497e-01,  6.5136e-01,
           7.1748e-01,  7.3264e-01],
         [-1.5063e+00,  5.8749e-02, -3.2989e-01,  2.4452e+00, -1.3693e+00,
          -2.2309e+00, -1.9638e+00],
         [-1.6489e+00,  1.2638e+00,  1.1157e+00,  2.7923e+00, -1.0326e+00,
          -1.5984e+00, -2.5183e+00],
         [ 4.0391e-01,  5.8749e-02,  8.7038e-01, -5.4311e-01,  9.8814e-01,
           3.5678e-01,  4.2586e-01],
         [ 1.2022e+00,  5.8749e-02, -1.4426e+00, -7.0552e-01,  6.5136e-01,
          -2.4616e-01,  4.8893e-01],
         [-8.9810e-01,  1.2638e+00, -3.4741e-01, -6.0384e-01,  6.5136e-01,
          -3.0852e-01,  6.1306e-01],
         [-4.4192e-01, -1.4502e+00,  1.2121e+00,  3.1034e-01, -3.5899e-01,
           3.4048e-01, -1.0712e+00],
         [ 1.1642e+00,  9.6340e-01, -4.7007e-01,  2.3973e-01, -1.0326e+00,
          -1.3691e+00,  9.869

In [16]:
print(x_test.shape)
print(y_test.shape)

test_dataset = TensorDataset(x_test,y_test)
#test can have bigger batch cause it does not need gradiant so cpu is free and bigger batch can be solved faste 

test_loader = DataLoader(test_dataset, batch_size = 50 , shuffle= False)

torch.Size([50, 7])
torch.Size([50])


 💠define model

In [33]:
num_feats=7
out_feat=1
h1=68
h2=32

model = nn.Sequential(nn.Linear(num_feats,h1),
                      nn.ReLU(),
                      nn.Linear(h1,h2),
                      nn.ReLU(),
                      nn.Linear(h2,out_feat))

In [34]:
loss_fn= nn.MSELoss()
optimizer = optim.SGD(model.parameters(),lr=0.01)

💠train loop

In [84]:
#we add losses of each batch and divide to len in irder to find a mean of each epoch's loss wich is more reliable
num_epocks=50
for epoch in range(num_epocks):
    train_loss = 0
    test_loss = 0
    for x_batch,y_batch in train_loader:
        yp = model(x_batch)
        loss = loss_fn(yp.squeeze(),y_batch)
        train_loss += loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()  
    print(f'Epock: {epoch} : loss train = {train_loss/len(train_loader):.4} ')
    # i use test instead of validation data
    for x_batch, y_batch in test_loader:
        yp = model(x_batch)
        loss_t = loss_fn(yp.squeeze() , y_batch)
        test_loss += loss_t
    print(f'Epock: {epoch} : loss test = {test_loss/len(test_loader):.4}\n')
    
print('done')

Epock: 0 : loss train = 74.17 
Epock: 0 : loss test = 889.4

Epock: 1 : loss train = 78.25 
Epock: 1 : loss test = 2.063e+04

Epock: 2 : loss train = 76.34 
Epock: 2 : loss test = 1.366e+05

Epock: 3 : loss train = 75.9 
Epock: 3 : loss test = 1.428e+03

Epock: 4 : loss train = 77.05 
Epock: 4 : loss test = 3.87e+05

Epock: 5 : loss train = 86.22 
Epock: 5 : loss test = 2.444e+04

Epock: 6 : loss train = 70.4 
Epock: 6 : loss test = 2.761e+04

Epock: 7 : loss train = 69.3 
Epock: 7 : loss test = 1.802e+05

Epock: 8 : loss train = 81.88 
Epock: 8 : loss test = 6.762e+03

Epock: 9 : loss train = 69.86 
Epock: 9 : loss test = 5.76e+04

Epock: 10 : loss train = 81.38 
Epock: 10 : loss test = 2.235e+05

Epock: 11 : loss train = 75.49 
Epock: 11 : loss test = 1.8e+03

Epock: 12 : loss train = 73.41 
Epock: 12 : loss test = 9.842e+04

Epock: 13 : loss train = 71.11 
Epock: 13 : loss test = 3.624e+04

Epock: 14 : loss train = 70.06 
Epock: 14 : loss test = 2.364e+03

Epock: 15 : loss train = 7

In [85]:
#استفاده از معیار ارزیابی r_score

In [91]:
yp_total=[]
yt_total=[]
# with torch.no_grad = detach : delete the autograd from tensor 
with torch.no_grad():
    for x, y in test_loader:
        yp = model(x)
        yp_total.append(yp.squeeze())
        yt_total.append(y)

yp_total= torch.cat(yp_total)
yt_total= torch.cat(yt_total)

r2_score(yp_total,yt_total)


-0.9127480999614395

In [92]:
yp_total

tensor([ 46.6728,  46.6728,  46.6728,  46.6728,  46.6728, 140.6326,  46.6728,
         46.6728, 201.2061,  96.4900,  46.6728,  46.6728,  46.6728, 155.5869,
         47.6224,  46.6728,  46.6728,  46.6728,  46.6728,  46.6728, 148.8643,
         46.6728,  47.0815,  46.6728,  46.6728,  62.0924,  46.6728,  46.6728,
         46.6728,  46.6728, 190.8156,  46.6728,  46.6728,  46.6728,  46.6728,
        184.6516, 118.2506,  47.0798,  46.6728,  46.6728, 184.8112, 184.7497,
         46.6728,  46.6728,  46.6728,  46.6728,  46.6728,  46.6728, 192.4851,
        191.3396])

In [90]:
yt_total

tensor([37.9000, 42.2000, 47.3000, 54.8000, 43.1000, 32.1000, 40.3000, 46.7000,
        18.8000, 22.1000, 41.4000, 58.1000, 39.3000, 23.8000, 34.3000, 50.5000,
        70.1000, 37.4000, 42.3000, 47.7000, 29.3000, 51.6000, 24.6000, 47.9000,
        38.8000, 27.0000, 56.2000, 33.6000, 47.0000, 57.1000, 22.1000, 25.0000,
        34.2000, 49.3000, 55.1000, 27.3000, 22.9000, 25.3000, 47.7000, 46.2000,
        15.9000, 18.2000, 34.7000, 34.1000, 53.9000, 38.3000, 42.0000, 61.5000,
        13.4000, 13.2000])