### **Import Library**

In [None]:
import numpy as np
import os
import pandas as pd
import seaborn as sns
from scipy.stats import entropy
import matplotlib.pyplot as plt
from tqdm import tqdm

%matplotlib notebook
plt.style.use('../test/deeplearning.mplstyle')

### **Import Dataset**

In [42]:
ROOT_DIR = os.path.dirname(os.getcwd())
DATA_DIR = os.path.join(ROOT_DIR, "data")
DATASET_PATH = os.path.join(DATA_DIR, "housing.csv")

In [43]:
housing_dataset = pd.read_csv(DATASET_PATH)
housing_dataset.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [44]:
housing_dataset.columns

Index(['price', 'area', 'bedrooms', 'bathrooms', 'stories', 'mainroad',
       'guestroom', 'basement', 'hotwaterheating', 'airconditioning',
       'parking', 'prefarea', 'furnishingstatus'],
      dtype='object')

In [45]:
housing_dataset = housing_dataset[['area', 'price']]
housing_dataset

Unnamed: 0,area,price
0,7420,13300000
1,8960,12250000
2,9960,12250000
3,7500,12215000
4,7420,11410000
...,...,...
540,3000,1820000
541,2400,1767150
542,3620,1750000
543,2910,1750000


### **Normalize the Dataset**

In [46]:
mean = np.array(housing_dataset.mean())
std = np.array(housing_dataset.std())
print(mean)
print(std)

housing_dataset = (housing_dataset - mean)/std
housing_dataset.head()

[   5150.5412844  4766729.24770642]
[   2170.14102251 1870439.61565739]


Unnamed: 0,area,price
0,1.045766,4.562174
1,1.755397,4.000809
2,2.216196,4.000809
3,1.08263,3.982096
4,1.045766,3.551716


In [47]:
sns.relplot(
    data=housing_dataset,
    x="price",
    y="area",
)

<IPython.core.display.Javascript object>

<seaborn.axisgrid.FacetGrid at 0x1647b505e50>

### **Simple Linear Regression**
#### Equation of a line: y = mx + b
#### m is a slop
#### b is bias

#### Lets define our problem: y   = w * x + b
#### w is called a weight
#### b is called a bias
#### x is an input variable
#### y is an output variable
#### w, b are parameters

### **Random House Price Predictor**

In [48]:
"""
w is weight, a real number
b is a bias, a real number
w, b are called parameters.
X is feature
y_pred = X * w  + b
"""
def get_house_price(X, w, b):
    y_pred = X * w + b
    return y_pred

In [49]:
w = np.random.randint(100,200)
b = np.random.randint(100,200)

X = housing_dataset.iloc[0]['area']
y_true = housing_dataset.iloc[0]['price']
y_pred = get_house_price(X, w, b)

print(f'Weight: {w}, Bias: {b}, Actual Price: {y_true}, Predicted Price: {y_pred}')

Weight: 150, Bias: 115, Actual Price: 4.5621738765912685, Predicted Price: 271.86483219689865


In [50]:
housing_dataset['price_pred_rand'] = get_house_price(housing_dataset['area'], w, b)
housing_dataset.head()

Unnamed: 0,area,price,price_pred_rand
0,1.045766,4.562174,271.864832
1,1.755397,4.000809,378.309528
2,2.216196,4.000809,447.42946
3,1.08263,3.982096,277.394427
4,1.045766,3.551716,271.864832


### **Visualize PDFs**

In [51]:
__ = housing_dataset.melt(
    value_vars=["price", "price_pred_rand"], 
    var_name="Type", 
    value_name="Price"
)

__['Price'] = __.groupby('Type')['Price'].transform(
    lambda x: (x - x.min()) / (x.max() - x.min())
)

__.head()

Unnamed: 0,Type,Price
0,price,1.0
1,price,0.909091
2,price,0.909091
3,price,0.906061
4,price,0.836364


In [52]:
sns.displot(
    data=__,
    x="Price", 
    hue="Type", 
    kind="hist",
    stat='probability',
    kde=True,
    col='Type',
    fill=True,
)

<IPython.core.display.Javascript object>

  with pd.option_context('mode.use_inf_as_na', True):
  data_subset = grouped_data.get_group(pd_key)
  data_subset = grouped_data.get_group(pd_key)


<seaborn.axisgrid.FacetGrid at 0x1647a6e9950>

### **Calculate KL Divergence**

In [53]:
price_hist, __ = np.histogram(housing_dataset['price'], bins=50, density=True)
price_pred_hist, __ = np.histogram(housing_dataset['price_pred_rand'], bins=50, density=True)
kl_divergence = entropy(price_hist+1e-10, price_pred_hist+1e-10)
print(f'KL divergence: {kl_divergence}')

KL divergence: 0.36437184375001674


### **Cost Function**


In [54]:
def cost_function(x, y_true, w, b):
    y_pred = get_house_price(x, w, b)
    mse = np.mean((y_true - y_pred)**2)/2
    return mse

X = housing_dataset['area']
y_true = housing_dataset['price']
y_pred = housing_dataset['price_pred_rand']
mse = cost_function(X, y_true, w, b)
print(f'MSE is: {mse:0.2f} (Random House Price Prediction)')


MSE is: 17762.10 (Random House Price Prediction)


In [55]:
mse_1 = cost_function(X, y_true, w=100, b=125)
mse_2 = cost_function(X, y_true, w=200, b=200)
mse_3 = cost_function(X, y_true, w=142, b=145)
mse_4 = cost_function(X, y_true, w=115, b=245)
print(f'MSE for w=100 and b=125 is: {mse_1:0.2f}')
print(f'MSE for w=200 and b=200 is: {mse_2:0.2f}')
print(f'MSE for w=142 and b=145 is: {mse_3:0.2f}')
print(f'MSE for w=115 and b=245 is: {mse_4:0.2f}')

MSE for w=100 and b=125 is: 12750.32
MSE for w=200 and b=200 is: 39856.80
MSE for w=142 and b=145 is: 20500.53
MSE for w=115 and b=245 is: 36551.84


In [56]:
weights = np.linspace(start=-10, stop=10, num=100)
biases = np.linspace(start=-10, stop=10, num=100)
weights_mesh, biases_mesh = np.meshgrid(weights, biases)
losses_mesh = []
for w in tqdm(np.ravel(weights_mesh[0])):
    for b in tqdm(np.ravel(biases_mesh[0])):
        loss = cost_function(X, y_true, w=w, b=b)
        losses_mesh.append(loss)

losses_mesh = np.array(losses_mesh)
losses_mesh = losses_mesh.reshape(weights_mesh.shape)

  0%|          | 0/100 [00:00<?, ?it/s]
100%|██████████| 100/100 [00:00<00:00, 1633.76it/s]

100%|██████████| 100/100 [00:00<00:00, 1572.70it/s]
  2%|▏         | 2/100 [00:00<00:06, 14.10it/s]
100%|██████████| 100/100 [00:00<00:00, 1518.45it/s]

100%|██████████| 100/100 [00:00<00:00, 1559.61it/s]
  4%|▍         | 4/100 [00:00<00:07, 13.67it/s]
100%|██████████| 100/100 [00:00<00:00, 1753.88it/s]

100%|██████████| 100/100 [00:00<00:00, 2138.61it/s]
  6%|▌         | 6/100 [00:00<00:06, 14.88it/s]
100%|██████████| 100/100 [00:00<00:00, 2234.14it/s]

100%|██████████| 100/100 [00:00<00:00, 2297.90it/s]
  8%|▊         | 8/100 [00:00<00:05, 16.49it/s]
100%|██████████| 100/100 [00:00<00:00, 2347.23it/s]

100%|██████████| 100/100 [00:00<00:00, 2268.64it/s]

100%|██████████| 100/100 [00:00<00:00, 2386.34it/s]
 11%|█         | 11/100 [00:00<00:04, 18.10it/s]
100%|██████████| 100/100 [00:00<00:00, 2197.54it/s]

100%|██████████| 100/100 [00:00<00:00, 1755.22it/s]
 13%|█▎        | 13/100 [00:00<00:04

In [57]:
fig = plt.figure(figsize=(9, 6))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(weights_mesh, biases_mesh, losses_mesh)
ax.set_xlabel('w')
ax.set_ylabel('b')
ax.set_zlabel('Cost')
plt.show()

<IPython.core.display.Javascript object>

In [58]:
def compute_gradient(X, y_true, w, b):
    delta = 1e-9
    cost_1 = cost_function(X, y_true, w=w, b=b)
    cost_2 = cost_function(X, y_true, w=w+delta, b=b)
    cost_3 = cost_function(X, y_true, w=w, b=b+delta)
    dw = (cost_2-cost_1) / delta
    db = (cost_3-cost_1) / delta
    return dw,db

In [59]:
loss = cost_function(X, y_true, w, b)
print("Loss is: ", loss, "Weight is: ", w, "Bias is: ", b)

Loss is:  95.0572008054445 Weight is:  10.0 Bias is:  -10.0


In [60]:
dw, db = compute_gradient(X, y_true, w, b)
print("dw is: ", dw, "db is: ", db)

dw is:  9.446651461075817 db is:  -9.999993721976352


In [61]:
lr = 0.001 #Learning rate
w = w - lr * dw
b = b - lr * db

In [62]:
for i in range(10000):
    less = cost_function(X, y_true, w=w, b=b)
    if i% 1000:
        print("Loss is: ", loss, "Weight is: ", w, "Bias is: ", b)
    dw, db = compute_gradient(X, y_true, w, b)
    w = w - lr*dw
    b = b - lr*db

Loss is:  95.0572008054445 Weight is:  9.98111613308538 Bias is:  -9.980009988576057
Loss is:  95.0572008054445 Weight is:  9.97168836785022 Bias is:  -9.97002997531581
Loss is:  95.0572008054445 Weight is:  9.962269995990027 Bias is:  -9.960059952286429
Loss is:  95.0572008054445 Weight is:  9.952861017504802 Bias is:  -9.950099905277057
Loss is:  95.0572008054445 Weight is:  9.943461403972833 Bias is:  -9.940149834287695
Loss is:  95.0572008054445 Weight is:  9.934071198026686 Bias is:  -9.930209696685779
Loss is:  95.0572008054445 Weight is:  9.924690357033796 Bias is:  -9.920279506682164
Loss is:  95.0572008054445 Weight is:  9.915318895205019 Bias is:  -9.910359221644285
Loss is:  95.0572008054445 Weight is:  9.905956784118644 Bias is:  -9.900448855782997
Loss is:  95.0572008054445 Weight is:  9.896604009563816 Bias is:  -9.890548394887446
Loss is:  95.0572008054445 Weight is:  9.887260585751392 Bias is:  -9.880657838957632
Loss is:  95.0572008054445 Weight is:  9.877926470048806 