In [5]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [6]:
# Grab dataset from file
car_dataframe = pd.read_csv("data/car details v4.csv")
data = (car_dataframe.loc[:, ["Fuel Tank Capacity", "Year", "Price"]]).to_numpy().astype('float64')

# Use simple data cleaning of dropping all null data
data_clean = data[~np.isnan(data).any(axis=1)]

# Suffle data insure equal distabution between train, validation, and test sets (Also use seed to insure determinisum)
rng = np.random.default_rng(seed=42)
rng.shuffle(data_clean)

# Split up data set into train, validation, and test
train_set_end_index = int(data_clean.shape[0]/2)
validation_set_end_index = int(3*data_clean.shape[0]/4)
train_set = data_clean[:train_set_end_index]
validation_set = data_clean[train_set_end_index:validation_set_end_index]
test_set = data_clean[validation_set_end_index:]

x = train_set[:, :2]
y = train_set[:, 2:]

normalization_vars = np.empty((x.shape[1], 2))

for i in range(x.shape[1]):
    mean = x[:, i:i+1].mean()
    std_dev = x[:, i:i+1].std()
    x[:, i:i+1] = (x[:, i:i+1] - mean) / std_dev
    normalization_vars[i] = [mean, std_dev]

# Normalize targets to prevent overflow error.
target_mean = y.mean()
y = y / target_mean

In [7]:
def make_pred(x, w, b):
    return np.matmul(x, w) + b

def compute_cost(x, y, w, b):
    sqr_diff = (make_pred(x, w, b) - y)**2
    return sqr_diff.sum() / (2*x.shape[1])

def run_grad_decent(x, y, w, b, alpha):
    cost_sum = ((make_pred(x, w, b)-y).sum() / x.shape[1])
    new_b = b - alpha * cost_sum

    j = x.shape[1]
    i = x.shape[0]
    new_w = np.empty(j)
    for k in range(i):
        pred = w.reshape(-1).dot(x[k]) + b - y[k]
        for l in range(j):
            new_w[l] += (pred*x[k][l])[0]
    new_w /= j
    new_w = w - (alpha * new_w.reshape([j, 1]))
    return new_w, new_b

def run_linear_regression(x, y, alpha = .00001, iter = 1000):
    j = x.shape[1]
    w = np.zeros([j, 1])
    b = 0
    #print("-------------")
    # print(w,b)
    print(compute_cost(x, y, w, b))
    for _ in range(iter):
        w, b = run_grad_decent(x, y, w, b, alpha)
        print(w,b)
        print(compute_cost(x, y, w, b))
    return w, b

In [8]:
# run_linear_regression(x, y)

polynomial_architectures = [x]
validation_set_architectures = [x]

for _i in range(3):
    last_polygon = polynomial_architectures[-1]
    polynomal_set = np.empty([last_polygon.shape[0], last_polygon.shape[1]**2])

    for i in range(last_polygon.shape[0]):
        index = 0
        for j in range(last_polygon.shape[1]):
            for k in range(last_polygon.shape[1]):
                polynomal_set[i, index] = last_polygon[i, j] * last_polygon[i, k]
                index += 1

    polynomial_architectures.append(polynomal_set)

for _i in range(3):
    last_polygon = validation_set_architectures[-1]
    polynomal_set = np.empty([last_polygon.shape[0], last_polygon.shape[1]**2])

    for i in range(last_polygon.shape[0]):
        index = 0
        for j in range(last_polygon.shape[1]):
            for k in range(last_polygon.shape[1]):
                polynomal_set[i, index] = last_polygon[i, j] * last_polygon[i, k]
                index += 1

    validation_set_architectures.append(polynomal_set)

print(polynomial_architectures[1])
run_linear_regression(polynomial_architectures[1], y)

for architecture in polynomial_architectures:
    # run_linear_regression(architecture, y)
    # Get train set x and y
    # Get validation set x and y
    # Train on train set
    # Save cost of traing set
    # Save cost of validation set
    pass

[[ 1.32674914  1.99188823  1.99188823  2.99048148]
 [ 0.46673582  0.34616501  0.34616501  0.25674099]
 [ 0.85316963  0.09662181  0.09662181  0.01094246]
 ...
 [ 0.035043    0.30566813  0.30566813  2.66623866]
 [ 0.85316963 -0.46802095 -0.46802095  0.25674099]
 [ 1.41962593  0.12463627  0.12463627  0.01094246]]
341.80087773203536
[[ 4.50547688e-003]
 [ 1.36918860e+098]
 [-1.92913777e+289]
 [ 2.44755502e-003]] 0.0024325
inf
[[ 9.86837114e+285]
 [ 5.78102781e+286]
 [-1.92336156e+289]
 [-4.17451363e+286]] 5.563852670434533e+285
inf
[[ 1.97109999e+286]
 [ 1.15177091e+287]
 [-1.91762968e+289]
 [-8.24264673e+286]] 1.1158379127041552e+286
inf
[[ 2.95252812e+286]
 [ 1.72105403e+287]
 [-1.91194161e+289]
 [-1.22067744e+287]] 1.678123538554041e+286
inf
[[ 3.93087026e+286]
 [ 2.28600100e+287]
 [-1.90629688e+289]
 [-1.60692194e+287]] 2.2430144410855135e+286
inf
[[ 4.90588415e+286]
 [ 2.84665989e+287]
 [-1.90069500e+289]
 [-1.98322536e+287]] 2.8102894504134932e+286
inf
[[ 5.87733631e+286]
 [ 3.403078

  sqr_diff = (make_pred(x, w, b) - y)**2


[[ 2.16590021e+287]
 [ 1.22557951e+288]
 [-1.80668381e+289]
 [-7.28794377e+287]] 1.3211996618300907e+287
inf
[[ 2.25351635e+287]
 [ 1.27431852e+288]
 [-1.80181411e+289]
 [-7.51249517e+287]] 1.3790173191088925e+287
inf
[[ 2.34047218e+287]
 [ 1.32271068e+288]
 [-1.79697908e+289]
 [-7.73073240e+287]] 1.43674641368697e+287
inf
[[ 2.42675775e+287]
 [ 1.37075948e+288]
 [-1.79217837e+289]
 [-7.94279786e+287]] 1.4943754079972785e+287
inf
[[ 2.51236364e+287]
 [ 1.41846834e+288]
 [-1.78741162e+289]
 [-8.14883084e+287]] 1.551893143021632e+287
inf
[[ 2.59728090e+287]
 [ 1.46584064e+288]
 [-1.78267850e+289]
 [-8.34896755e+287]] 1.6092888288348417e+287
inf
[[ 2.68150107e+287]
 [ 1.51287972e+288]
 [-1.77797868e+289]
 [-8.54334121e+287]] 1.666552035367362e+287
inf
[[ 2.76501611e+287]
 [ 1.55958887e+288]
 [-1.77331184e+289]
 [-8.73208212e+287]] 1.7236726833815287e+287
inf
[[ 2.84781847e+287]
 [ 1.60597131e+288]
 [-1.76867764e+289]
 [-8.91531769e+287]] 1.7806410356565888e+287
inf
[[ 2.92990100e+287]
 [ 