### 1. Load data from csv file

In [0]:
import numpy as np

TRAIN_DATA_PATH = './data_train.csv'
TEST_DATA_PATH = './data_test.csv'

# load train data
train_data = np.genfromtxt(TRAIN_DATA_PATH, delimiter=',', dtype=np.str)
train_data = np.char.strip(train_data, '"').astype(np.float64)
x_train = train_data[:, 0:-1]
y_train = train_data[:, -1].reshape(-1, 1)

# load test data
test_data = np.genfromtxt(TEST_DATA_PATH, delimiter=',', dtype=np.str)
test_data = np.char.strip(test_data, '"').astype(np.float64)
x_test = test_data[:, 0:-1]
y_test = test_data[:, -1].reshape(-1, 1)

### 2. Define functions for linear regression



In [0]:
def linear(theta, x):
  m = len(x)
  temp_x = np.concatenate((np.ones((m, 1)), x), axis=1)
  return np.matmul(temp_x, theta.T)

def objective(y_hat, y):
  m = len(y)
  return (1 / (2 * m)) * np.sum((y_hat - y) ** 2, axis=0)

def gradient(x, y_hat, y):
  m = len(x)
  temp_x = np.concatenate((np.ones((m, 1)), x), axis=1)
  return (1 / m) * np.matmul((y_hat - y).T, temp_x)

### 3. Learning with the gradient descent algorithm


In [0]:
# initialize model parameters and learning rate
feature_cnt = x_train.shape[1]
theta = np.zeros((1, feature_cnt + 1))

lr = 0.00001
epoch_count = 10000

history = {
    'theta': [],
    'train_err': [],
    'test_err': []
}

for epoch in range(epoch_count):
  # calculate training error
  y_hat_train = linear(theta, x_train)
  train_err = objective(y_hat_train, y_train)

  # calculate testing error
  y_hat_test = linear(theta, x_test)
  test_err = objective(y_hat_test, y_test)

  # logging history
  history['theta'].append(np.squeeze(theta))
  history['train_err'].append(train_err)
  history['test_err'].append(test_err)

  # gradient descent using the training dataset
  grad_theta = gradient(x_train, y_hat_train, y_train)
  theta -= lr * grad_theta