# Gradient Descent

In [1]:
import pickle
import numpy as np
import time

## 1. Load data

In [2]:

X_train = np.array([[1, 1], [2, 2], [2, 9], [1, 14]])
X_train.shape #m, n

y_train = np.array([3, 4, 10, 13])
y_train.shape #m, 

X_val = np.array([[1.5, 2], [2, 3], [1, 9], [1, 13]])
X_val.shape #m, n

y_val = np.array([3, 4, 9.5, 13.5])
y_val.shape #m, 

X_test = np.array([[2, 1], [1, 2], [2, 8], [2, 13]])
X_test.shape #m, n

y_test = np.array([2, 5, 9, 14])
y_test.shape #m, 

# assert X_train.shape[1] == X_test.shape[1]


(4,)

## 2. Modeling

Imagine you do:
1. imputation - cleaning
2. scaling

### 2.1 Definition

In [3]:
# for definitions
m_train, n_train = X_train.shape
m_val, n_val     = X_val.shape
m_test, n_test   = X_test.shape
num_epochs       = 50
theta            = np.zeros(  (n_train,   )  )
lr               = 0.001
old_val_loss     = np.infty
tolerance        = 0.06
batch_size       = 2

### 2.2 Model

In [4]:
def mse(yhat, ytrue):
    return ( (yhat - ytrue) ** 2  ).sum() / yhat.shape[0]

In [5]:
def predict(input, theta):
    return input @ theta

In [6]:
def gradient(input, yhat, ytrue):
    return (input.T @ (yhat - ytrue) ) / input.shape[0]

### 2.3 Running the loop

In [7]:
#1. loop according to epoch
for i in range(num_epochs):  #0, 1, 2, 3, 4
    
    total_train_loss = 0
    
    #mini-batch sampling:  take only a portion of X, and do everything else
    #without replacement
    for idx in range(0, X_train.shape[0], batch_size):
        
        X_train_mini = X_train[idx:idx+batch_size]
        y_train_mini = y_train[idx:idx+batch_size]
    
        #2. predict
        yhat_train = predict(X_train_mini, theta) # (m, n) x (n, 1) = (m, 1)  #<---same shape as y 
        
        #3. gradient
        grad = gradient(X_train_mini, yhat_train, y_train_mini)  #(n, m) @ (m, 1) = (n, 1)
    
        #4. update
        theta = theta - lr * grad  # (n, 1) - (1) (n, 1) = (n, 1)
        
        total_train_loss +=  mse(yhat_train, y_train_mini)
        
    train_loss = total_train_loss / (X_train.shape[0] / batch_size)

    #validation loss
    #1. take the current theta, and do prediction with the validation set
    yhat_val = X_val @ theta
    #2. calculate the loss with y_val
    val_loss = mse(yhat_val, y_val)
    #3. finish
    
    #########early stopping###################
    #1. if new val_loss is very close to old val_loss by 0.001, you stop everything ok!
    diff = np.abs(old_val_loss - val_loss)
    
    # if diff < tolerance:
    #     print(f'Stopped at epoch {i} - :-)')
    #     #save your model right here!!!
    #     filename = f"chaky_model_{i}_{time.time():.0f}_lr.sav"
    #     pickle.dump(theta, open(filename, 'wb'))
    #     break
    # #2. otherwise continue
    # old_val_loss = val_loss
    
    if (i+1) % 10 == 0:
        print(f"Epoch: {i+1:5.0f}  : {train_loss =:7.3f} : {val_loss=:.3f}")

Epoch:    10  : train_loss =  6.381 : val_loss=5.147
Epoch:    20  : train_loss =  2.105 : val_loss=0.913
Epoch:    30  : train_loss =  1.829 : val_loss=0.474
Epoch:    40  : train_loss =  1.741 : val_loss=0.388
Epoch:    50  : train_loss =  1.664 : val_loss=0.358


## 3. Testing

In [8]:
load_theta = pickle.load(open(filename, 'rb'))

NameError: name 'filename' is not defined

In [None]:
load_theta

In [None]:
yhat_test = X_test @ load_theta
mse_      = mse(  yhat_test , y_test  )
print(f"{mse_= :.2f}")

assert mse_ < 100

## 4. Inference

In [None]:
X_train[0], y_train[0], X_train[1], y_train[1]

In [None]:
#1. randomly create a sample
chaky_test_case = np.array([ [1.5, 0.5] ])
assert len(chaky_test_case.shape) == 2  #m, n

#2. predict
predict_chaky_test_case = chaky_test_case @ theta
predict_chaky_test_case_int = float(predict_chaky_test_case)
print(f"Answer:  {predict_chaky_test_case_int: .2f}.  Does it satisfy you?")

#3. check with your instinct / ask expert whether is correct

## 5. Feature importance

In [None]:
theta

In [None]:
#assume you have standardize your feature
#feature 2 is more important
    # 0.30 / (0.04 + 0.30)
    
#feature 1 is less important
    # 0.04 / (0.04 + 0.030)