In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import math
import random
import time

import seaborn as sns
from IPython import display

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
sns.set()

In [None]:
# feature scaling utils
# value, mean, s = max - min

scale_down = lambda x, m, s, a: (x - m) / s + a
scale_up = lambda x, m, s, a: (x - a) * s + m - a

In [None]:
# input data

choice = "default"
use_feature_scaling = True
fs_factor = 1

# features (X): sizes
# outputs (Y): prices

if choice == "default":
    sizes = [1400, 1600, 1700, 1875, 1100, 1550, 3200, 2350, 2450, 1425, 1700, 1515]
    prices = [245, 312, 279, 308, 199, 212, 501, 219, 405, 324, 319, 255]

elif choice == "random":
    sizes = [random.randint(1200, 3500) for i in range(100)]

    q = random.uniform(0, 1)
    prices = [q * s + random.randint(10, 500) for s in sizes]

elif choice == "generate":
    f = lambda x: 1.2 * np.sqrt(x)
    lst = [(x, f(x) + random.uniform(-1, 1)) for x in np.arange(0.2, 100.0, 0.2)]

    #     f = lambda x : 1 + x + math.sqrt(x)
    #     lst = [(x, f(x)) for x in np.arange(0.2, 10.0, 0.2)]

    sizes = [x for (x, _) in lst]
    prices = [x for (_, x) in lst]

In [None]:
# number of training examples
m = len(sizes)

sizes_mean = np.mean(sizes)
prices_mean = np.mean(prices)

sizes_mm = max(sizes) - min(sizes)
prices_mm = max(prices) - min(prices)

# feature scaling

if use_feature_scaling:
    f = lambda x: scale_down(x, sizes_mean, sizes_mm, fs_factor)
    sizes_fs = list(map(f, sizes))

    f = lambda x: scale_down(x, prices_mean, prices_mm, fs_factor)
    prices_fs = list(map(f, prices))
else:
    sizes_fs = sizes
    prices_fs = prices

data = {"size": sizes, "price": prices, "size_fs": sizes_fs, "price_fs": prices_fs}

table = pd.DataFrame(data)
table.head(10)

In [None]:
plotX = sizes_fs
plotY = prices_fs

plt.scatter(plotX, plotY, color="black")
plt.xlabel("size")
plt.ylabel("price")
# plt.axis((min(plotX), max(plotY), min(plotY), max(plotY)))
plt.show()

In [None]:
# GRADIENT DESCENT

# hw(x) = w0 + w1 * x + w2 * sqrt(x)
# X/Y_fs = X/Y if no fs is used


def dJ(w):
    w0, w1, w2 = w

    arr_w0 = w0 * np.ones(m)

    arr_x = np.array(sizes_fs)
    arr_sqrtx = np.fromiter(map(math.sqrt, arr_x), dtype=np.float)

    arr_y = np.array(prices_fs)

    arr_aux = arr_w0 + w1 * arr_x + w2 * arr_sqrtx - arr_y

    dw0 = np.sum(arr_aux) / m
    dw1 = np.sum(arr_aux * arr_x) / m
    dw2 = np.sum(arr_aux * arr_sqrtx) / m

    return dw0, dw1, dw2


def gradient_descent(weights, learning_rate, num_iterations):
    w0, w1, w2 = weights

    for _ in range(num_iterations):
        dw0, dw1, dw2 = dJ([w0, w1, w2])

        w0 = w0 - learning_rate * dw0
        w1 = w1 - learning_rate * dw1
        w2 = w2 - learning_rate * dw2

    return w0, w1, w2

In [None]:
def graph(formula, xinf, xsup, step):
    X = np.arange(xinf, xsup, step)
    Y = eval(formula)
    plt.plot(X, Y)


def scatter():
    plt.scatter(sizes, prices, color="black")
    plt.xlabel("size")
    plt.ylabel("price")
    plt.show()


def predict(weights, x):
    [w0, w1, w2] = weights

    if use_feature_scaling:
        scaled_x = scale_down(x, sizes_mean, sizes_mm, fs_factor)

        predicted = (
            w0
            + w1 * scaled_x
            + w2 * np.fromiter(map(math.sqrt, scaled_x), dtype=np.float)
        )

        scaled_predicted = scale_up(predicted, prices_mean, prices_mm, fs_factor)
        return scaled_predicted
    else:
        predicted = w0 + w1 * x + w2 * np.fromiter(map(math.sqrt, x), dtype=float)
        return predicted

In [None]:
learning_rate = 0.001
num_iterations = 100000

weights = gradient_descent(np.random.random(3), learning_rate, num_iterations)
print(weights)

graph("predict(weights, X)", min(sizes), max(sizes), 0.5)
scatter()

In [None]:
# see fits
learning_rate = 0.01

REFRESH_INTERVAL = 1

for i in range(6):
    num_iterations = 10**i

    weights = gradient_descent(np.random.random(3), learning_rate, num_iterations)
    print(weights)

    graph("predict(weights, X)", min(sizes), max(sizes), 1)
    scatter()

    if i % REFRESH_INTERVAL == REFRESH_INTERVAL - 1:
        display.clear_output(wait=True)
        time.sleep(1)