In [125]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [126]:
# Grab dataset from file
car_dataframe = pd.read_csv("data/car details v4.csv")
data = (car_dataframe.loc[:, ["Fuel Tank Capacity", "Year", "Price"]]).to_numpy().astype('float64')

# Use simple data cleaning of dropping all null data
data_clean = data[~np.isnan(data).any(axis=1)]

# Suffle data insure equal distabution between train, validation, and test sets (Also use seed to insure determinisum)
rng = np.random.default_rng(seed=42)
rng.shuffle(data_clean)

# Split up data set into train, validation, and test
train_set_end_index = int(data_clean.shape[0]/2)
validation_set_end_index = int(3*data_clean.shape[0]/4)
train_set = data_clean[:train_set_end_index]
validation_set = data_clean[train_set_end_index:validation_set_end_index]
test_set = data_clean[validation_set_end_index:]

x = train_set[:, :2]
y = train_set[:, 2:]

normalization_vars = np.empty((x.shape[1], 2))

for i in range(x.shape[1]):
    mean = x[:, i:i+1].mean()
    std_dev = x[:, i:i+1].std()
    x[:, i:i+1] = (x[:, i:i+1] - mean) / std_dev
    normalization_vars[i] = [mean, std_dev]

# Normalize targets to prevent overflow error.
target_mean = y.mean()
y = y / target_mean

In [127]:
def make_pred(x, w, b):
    return np.matmul(x, w) + b

def compute_cost(x, y, w, b):
    sqr_diff = (make_pred(x, w, b) - y)**2
    return sqr_diff.sum() / (2*x.shape[1])

def run_grad_decent(x, y, w, b, alpha):
    cost_sum = ((make_pred(x, w, b)-y).sum() / x.shape[1])
    new_b = b - alpha * cost_sum

    j = x.shape[1]
    i = x.shape[0]
    new_w = np.empty(j)
    for k in range(i):
        pred = w.reshape(-1).dot(x[k]) + b - y[k]
        for l in range(j):
            new_w[l] += (pred*x[k][l])[0]
    new_w /= j
    new_w = w - (alpha * new_w.reshape([j, 1]))
    return new_w, new_b

def run_linear_regression(x, y, alpha = .0001, iter = 500):
    j = x.shape[1]
    w = np.empty([j, 1])
    b = 0
    print(compute_cost(x, y, w, b))
    for _ in range(iter):
        w, b = run_grad_decent(x, y, w, b, alpha)
        print(compute_cost(x, y, w, b))
    return w, b

In [128]:
run_linear_regression(x, y)

polynomial_architectures = []

for architecture in polynomial_architectures:
    # Get train set x and y
    # Train on train set
    # Save cost of traing set
    # Save cost of validation set
    pass

683.6017554640707
639.5809586524996
599.9218990248204
564.2089340931029
532.047939706297
503.0842988295291
476.99892820808105
453.50470510546734
432.34325460404716
413.2820609644383
396.1118702474587
380.64435473212995
366.7100126539651
354.15627947397047
342.8458293016127
332.6550472623386
323.4726555471424
315.1984776306358
307.7423267152791
301.0230058709977
294.96740860760514
289.5097097568913
284.59063756501916
280.15681881578956
276.1601896319422
272.5574653444652
269.3096634873986
266.381674575507
263.7418758613179
261.3617837525751
259.2157410066528
257.28063520992436
255.53564540193207
253.96201402048854
252.54284162815594
251.26290213614922
250.10847647148745
249.06720283980903
248.12794192200306
247.2806555098118
246.516297235724
245.82671418750311
245.20455831911454
244.64320667900998
244.13668957491737
243.6796258826055
243.26716478551364
242.89493330358005
242.55898903385977
242.25577758332156
241.98209422620815
241.7350493651162
241.51203741702906
241.31070878339244
241.