# 1.5. Implementing a simple Linear Regression Algorithm

## Imports

In [None]:
import numpy as np
import tensorflow as tf 
import matplotlib.pyplot as plt
import pandas as pd
import warnings
import seaborn as sb

warnings.filterwarnings('ignore')

print(tf.__version__)

2.4.1


## Load data and take a look at it

In [None]:
data = pd.read_csv('https://storage.googleapis.com/activation-function/csv/airbnb_new_york.csv').sample(frac=1)
data.describe()

Unnamed: 0,id,host_id,latitude,longitude,price,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365
count,48895.0,48895.0,48895.0,48895.0,48895.0,48895.0,48895.0,38843.0,48895.0,48895.0
mean,19017140.0,67620010.0,40.728949,-73.95217,152.720687,7.029962,23.274466,1.373221,7.143982,112.781327
std,10983110.0,78610970.0,0.05453,0.046157,240.15417,20.51055,44.550582,1.680442,32.952519,131.622289
min,2539.0,2438.0,40.49979,-74.24442,0.0,1.0,0.0,0.01,1.0,0.0
25%,9471945.0,7822033.0,40.6901,-73.98307,69.0,1.0,1.0,0.19,1.0,0.0
50%,19677280.0,30793820.0,40.72307,-73.95568,106.0,3.0,5.0,0.72,1.0,45.0
75%,29152180.0,107434400.0,40.763115,-73.936275,175.0,5.0,24.0,2.02,2.0,227.0
max,36487240.0,274321300.0,40.91306,-73.71299,10000.0,1250.0,629.0,58.5,327.0,365.0


## Data preprocessing

In [None]:
features = data[['neighbourhood_group', 'room_type', 'minimum_nights', 'number_of_reviews', 
                 'reviews_per_month', 'calculated_host_listings_count', 'availability_365']]
#print(features.isna().sum())
features['reviews_per_month'] = features['reviews_per_month'].fillna(0)
#print(features.isna().sum())

onehot_neighborhood_group = pd.get_dummies(features['neighbourhood_group'])
onehot_room_type = pd.get_dummies(features['room_type'])
#print(onehot_room_type)

features = features.drop(columns=['neighbourhood_group', 'room_type'])
features = pd.concat([features, onehot_neighborhood_group, onehot_room_type], axis=1)
#print(features.head())

targets = data['price']

train_size = int(0.7*len(data))
X_train, X_test = features.values[:train_size, :], features.values[train_size:, :]
y_train, y_test = targets.values[:train_size], targets.values[train_size:]

print('X_train[0] length:', len(X_train[0]))
print('X_train shape:    ', X_train.shape, '\n')

X_train[0] length: 13
X_train shape:     (34226, 13) 



In [None]:
X_train[0], y_train[0]

(array([ 1.  , 15.  ,  0.44,  1.  ,  0.  ,  0.  ,  1.  ,  0.  ,  0.  ,
         0.  ,  0.  ,  1.  ,  0.  ]), 50)

## Data visualization and analysis

Will be completed at the next section

## The TensorFlow 2 Machine Learning Approaches

### Linear Regression

In [None]:
class LinearModel:
    def __init__(self):
        # y_pred = W*X + b
        self.initializer = tf.keras.initializers.GlorotUniform()
    
    def loss(self, y, y_pred):
        return tf.reduce_mean(tf.abs(y - y_pred))
    
    def train(self, X, y, lr=0.00001, epochs=20, verbose=True):
            
        X = np.asarray(X, dtype=np.float32)
        y = np.asarray(y, dtype=np.float32).reshape((-1, 1)) # [1,2,3,4] -> [[1],[2],[3],[4]]
        
        self.W = tf.Variable(
            initial_value=self.initializer(shape=(len(X[0]), 1), dtype='float32'))
        self.b = tf.Variable(
            initial_value=self.initializer(shape=(1,), dtype='float32'))

        def train_step():
            with tf.GradientTape() as t:
                current_loss = self.loss(y, self.predict(X))

            dW, db = t.gradient(current_loss, [self.W, self.b])
            self.W.assign_sub(lr * dW) # W -= lr * dW
            self.b.assign_sub(lr * db)
            
            return current_loss
        
        for epoch in range(epochs):
            current_loss = train_step()
            if verbose:
                print(f'Epoch {epoch}: Loss: {current_loss.numpy()}') # <3 eager execution
                
    def predict(self, X):
        # [a, b] x [b, c]
        # X->[n_instances, n_features] dot W->[n_features, 1]
        return tf.matmul(X, self.W) + self.b

In [None]:
model = LinearModel()
model.train(X_train, y_train, epochs=100)

Epoch 0: Loss: 209.1935577392578
Epoch 1: Loss: 209.0630645751953
Epoch 2: Loss: 208.93255615234375
Epoch 3: Loss: 208.8020782470703
Epoch 4: Loss: 208.67161560058594
Epoch 5: Loss: 208.5411376953125
Epoch 6: Loss: 208.4106903076172
Epoch 7: Loss: 208.2802734375
Epoch 8: Loss: 208.1498260498047
Epoch 9: Loss: 208.01942443847656
Epoch 10: Loss: 207.88905334472656
Epoch 11: Loss: 207.75865173339844
Epoch 12: Loss: 207.62828063964844
Epoch 13: Loss: 207.49789428710938
Epoch 14: Loss: 207.36752319335938
Epoch 15: Loss: 207.23716735839844
Epoch 16: Loss: 207.10679626464844
Epoch 17: Loss: 206.97642517089844
Epoch 18: Loss: 206.84605407714844
Epoch 19: Loss: 206.71568298339844
Epoch 20: Loss: 206.58534240722656
Epoch 21: Loss: 206.45497131347656
Epoch 22: Loss: 206.3246307373047
Epoch 23: Loss: 206.1942596435547
Epoch 24: Loss: 206.06393432617188
Epoch 25: Loss: 205.93359375
Epoch 26: Loss: 205.80325317382812
Epoch 27: Loss: 205.67295837402344
Epoch 28: Loss: 205.54266357421875
Epoch 29: Los

## Conclusions

Will be completed at the next section