In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from scipy.optimize import curve_fit

### Read in x-abc data

In [None]:
filename = "xabc.csv"
data = np.genfromtxt(filename, delimiter=",", names=True)
collumns = list(data.dtype.names)

print(collumns)

xdata = data["x"]
adata = data["a"]
bdata = data["b"]
cdata = data["c"]

In [None]:
plt.plot(xdata, adata, "x-", label="a")
plt.plot(xdata, bdata, "x-", label="b")
plt.plot(xdata, cdata, "x-", label="c")
plt.ylabel("data")
plt.xlabel("x")
plt.legend()
plt.show()

In [None]:
ya = np.poly1d(np.polyfit(xdata, adata, 1))
yb = np.poly1d(np.polyfit(xdata, bdata, 1))
yc = np.poly1d(np.polyfit(xdata, cdata, 1))

x = np.linspace(min(xdata), max(xdata), 100)

plt.plot(xdata, adata, "bx")
plt.plot(x, ya(x), "b-", label="a")
plt.plot(xdata, bdata, "rx")
plt.plot(x, yb(x), "r-", label="b")
plt.plot(xdata, cdata, "gx")
plt.plot(x, yc(x), "g-", label="c")

plt.title("Linear fit")
plt.ylabel("data")
plt.xlabel("x")
plt.legend()
plt.show()

### Fit, using curve_fit, to get uncertainties and confidence intervals

In [None]:
def f_lin(x, a, b):
    return a + b * x


# perform the fit:
popt, pcov = curve_fit(f_lin, xdata, bdata)

# extract the parameters from 'popt = optimised paramters' (careful of the order)
a, b = popt

# Extract the _approximate_ (1 sigma = standard) uncertainties from pcov (parameter covariance)
da, db = np.sqrt(np.diag(pcov))

print(a, b)
print(da, db)


def df(x, da, db):
    return np.sqrt(da**2 + (db * x) ** 2)

In [None]:
x = np.linspace(min(xdata), max(xdata), 100)
best = f_lin(x, a, b)
err = df(x, da, db)

plt.plot(xdata, bdata, "bx")
plt.plot(x, best, "r-", label="best fit")
plt.plot(x, best + err, ls="dotted", c="lightgrey")
plt.plot(x, best - err, ls="dotted", c="lightgrey")
plt.plot(x, best + 2.0 * err, ls="dotted", c="lightgrey")
plt.plot(x, best - 2.0 * err, ls="dotted", c="lightgrey")
plt.plot(x, best + 3.0 * err, ls="dotted", c="lightgrey")
plt.plot(x, best - 3.0 * err, ls="dotted", c="lightgrey")
plt.fill_between(
    x,
    best + err,
    best - err,
    color="red",
    alpha=0.4,
    label="1$\\sigma\\approx68\%$ C.L.",
)
plt.fill_between(
    x,
    best + 2.0 * err,
    best - 2.0 * err,
    color="red",
    alpha=0.25,
    label="2$\\sigma\\approx95\%$ C.L.",
)
plt.fill_between(
    x,
    best + 3.0 * err,
    best - 3.0 * err,
    color="red",
    alpha=0.1,
    label="3$\\sigma\\approx99.7\%$ C.L.",
)
plt.legend()

plt.title("Linear fit (to b data)")
plt.ylabel("b")
plt.xlabel("x")
plt.show()

### Car data

In [None]:
filename = "CarsRetailPrice.csv"
car_data = pd.read_csv(filename)
car_columns = list(car_data.columns)
print(car_columns)

In [None]:
price = car_data["Price"]
mileage = car_data["Mileage"]

plt.plot(mileage, price, "x")
plt.xlabel("Mileage")
plt.ylabel("Price")
plt.show()

In [None]:
coefs1 = np.polyfit(mileage, price, 1)

# Takes the coefs from the fit, and creates callable polynomial functions
y = np.poly1d(coefs1)

# For smooth plots:
x = np.linspace(min(mileage), max(mileage), 100)

plt.plot(mileage, price, "x")
plt.plot(x, y(x), label="fit")
plt.xlabel("Mileage")
plt.ylabel("Price")
plt.show()

In [None]:
makes = car_data["Make"].unique()
print(makes)

In [None]:
for i, make in enumerate(makes):
    tmp_data = car_data[car_data.Make == make]
    x_data = tmp_data["Mileage"]
    y_data = tmp_data["Price"]
    coefs1 = np.polyfit(x_data, y_data, 1)
    y = np.poly1d(coefs1)
    plt.plot(x_data, y_data, "x", color=f"C{i}")
    plt.plot(x, y(x), "-", label=make, color=f"C{i}")
plt.legend()
plt.xlabel("Mileage")
plt.ylabel("Price")
plt.show()

In [None]:
chev_data = car_data[car_data["Make"] == "Chevrolet"]
chev_models = chev_data["Model"].unique()
print(chev_models)

plt.title("Chevrolet")
for i, model in enumerate(chev_models):
    tmp_data = car_data[car_data["Model"] == model]
    x_data = tmp_data["Mileage"]
    y_data = tmp_data["Price"]
    coefs1 = np.polyfit(x_data, y_data, 1)
    y = np.poly1d(coefs1)
    plt.plot(x_data, y_data, "x", color=f"C{i}")
    plt.plot(x, y(x), "-", label=model, color=f"C{i}")
plt.legend()
plt.xlabel("Mileage")
plt.ylabel("Price")
plt.show()

In [None]:
fig, axs2D = plt.subplots(3, 2, sharex=True, sharey=True)
fig.tight_layout(pad=2.0)  # add some space

axs = axs2D.flatten()

for i, make in enumerate(makes):
    make_data = car_data[car_data["Make"] == make]
    models = make_data["Model"].unique()

    axs[i].set_title(make)
    for j, model in enumerate(models):
        tmp_data = make_data[make_data["Model"] == model]
        x_data = tmp_data["Mileage"]
        y_data = tmp_data["Price"]
        coefs1 = np.polyfit(x_data, y_data, 1)
        y = np.poly1d(coefs1)
        axs[i].plot(x_data, y_data, "x", color=f"C{j}")
        axs[i].plot(x, y(x), "--", color=f"C{j}")
    coefs1 = np.polyfit(make_data["Mileage"], make_data["Price"], 1)
    y = np.poly1d(coefs1)
    axs[i].plot(x, y(x), "k-", linewidth=3)
[axs2D[2, k].set_xlabel("Mileage") for k in range(0, 2)]
[axs2D[k, 0].set_ylabel("Price") for k in range(0, 3)]
plt.show()