In [None]:
import pandas as pd
import plotly.express as px


In [None]:
# Gradient Descent
def mse(xs, ys, w, b):
  e = 0.0

  n = len(xs)
  for i in range(n):
    e += (ys[i] - (w*xs[i] + b))**2
  e /= float(n)

  return e

def update_w_and_b(xs, ys, w, b, a):
  dl_dw = 0.0
  dl_db = 0.0

  n = len(xs)
  for i in range(n):
    dl_dw += -2 * xs[i] * (ys[i] - (w*xs[i] + b))
    dl_db += -2  * (ys[i] - (w*xs[i] + b))
  dl_dw /= float(n)
  dl_db /= float(n)

  w -= a * dl_dw
  b -= a * dl_db

  return w, b

def train(xs, ys, w, b, a, epochs):
  for i in range(epochs):
    w, b = update_w_and_b(xs, ys, w, b, a)

    if i % 400 == 0:
      print("epoch: ", i, "; loss: ", mse(xs, ys, w, b))

  return w, b

def predict(x, w, b):
  return w * x + b


In [None]:
df = pd.read_csv("/Users/akapralov/tmp/vehicles.csv")\
  .filter(items=["manufacturer", "model", "year", "price"])\
  .query("year >= 2012 & price > 5000")
print(df)


In [None]:
stats = df.groupby(["manufacturer", "model"])\
  .size()\
  .reset_index(name='counts')\
  .sort_values(by="counts",ascending=False)
print(stats)


In [None]:
prices = df.query("manufacturer == 'ford' & model == 'f-150'").filter(items=["year", "price"])
prices.price = prices.price.apply(lambda x: float(x) / 1000)
prices["age"] = prices.year.apply(lambda x: 2022 - x)
print(prices)

In [None]:
fig = px.scatter(prices, y="price", x="age")
fig.show()


In [None]:
w, b = train(prices.age.values, prices.price.values, 0.0, 0.0, 0.001, 15000)
print("w: ", w, "; b: ", b)


In [None]:
fig = px.scatter(prices, y="price", x="age")
fig.add_shape(type="line", x0=1, y0=predict(1, w, b), x1=10, y1=predict(10, w, b))
fig.show()


In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression().fit(X=prices[['age']], y=prices.price)
print(model.predict(pd.DataFrame({'age': [2, 4, 6, 8, 10]})))
