In [None]:
import json

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [None]:
df = pd.read_parquet("datasets/auto-mpg.parquet")
df

In [None]:
sns.pairplot(df)

In [None]:
sns.scatterplot(data=df, x="weight", y="mpg")
plt.grid(True)
plt.gca()

In [None]:
gpm = (1.0 / df.mpg)
lp100 = gpm * 3.78541 / 1.60934 * 100
lp100 = lp100.rename("lp100")

In [None]:
weight_kg = df.weight * 0.453592
weight_kg = weight_kg.rename("weight_kg")

In [None]:
dfr = pd.DataFrame({lp100.name: lp100, weight_kg.name: weight_kg})
dfr

In [None]:
sns.pairplot(dfr)

In [None]:
coeffs, _, _, _ = np.linalg.lstsq(
    a=np.stack([dfr.weight_kg, np.ones_like(dfr.weight_kg)], axis=1),
    b=dfr.lp100,
)
coeffs

In [None]:
slope, intercept = coeffs

In [None]:
with open("models/coeffs.json", mode="w", encoding="utf-8") as json_output:
    json.dump({"slope": slope, "intercept": intercept}, json_output)

In [None]:
from models.consumption import lp100_pred

In [None]:
sns.scatterplot(x=dfr.weight_kg, y=dfr.lp100, color="C0", alpha=0.25)
plt.plot(dfr.weight_kg, lp100_pred(dfr.weight_kg), color="C0")
plt.grid(True)
plt.savefig("images/prediction.png")
plt.gca()

In [None]:
pred_error = lp100_pred(dfr.weight_kg) - dfr.lp100
pred_error = pred_error.rename("pred_error")
pred_error

In [None]:
pred_error.describe()

In [None]:
sns.histplot(data=pred_error, kde=True)
plt.grid(True)
plt.savefig("images/error.png")
plt.gca()