# Analyze car paremeters

I use data from [otomoto.pl](https://www.otomoto.pl/osobowe) acces on filename (carsDD_MM_YYYY_hh_mm.csv)

Main goal of analyze is found driver profiles for model.

In [None]:
import os
import datetime
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from utils import get_path_with, save_in

CV = "CV"
EV = "BEV"
PHEV = "PHEV"
car_types = [CV, PHEV, EV]

### set plot params

In [None]:
# plt.style.use("grayscale")
# plt.style.use("dark_background")
plt.rc("grid", alpha=0.3)
plt.rc("text", usetex=True)
kde_params = {
    "fill": True,
    "linewidth": 1,
    "common_norm": False,
}

hist_params = {
    "fill": True,
    "linewidth": 0,
    "common_norm": False,
    "common_bins": False,
    "kde": True,
    "stat": "probability",
}

### Set data

In [None]:
data_file = os.sep.join([get_path_with("data"), "data", "cars02_05_2023_01_21.csv"])
df = pd.read_csv(data_file)

aliases = {
    "petrol": CV,
    "diesel": CV,
    "petrol-lpg": CV,
    "petrol-cng": CV,
    "electric": EV,
    "hybrid": PHEV,
}

df["type"] = df["fuel_type"].apply(lambda x: aliases[x])

today_year = datetime.date.today().year
today_year = 2023
df["age"] = df["year"].apply(lambda x: today_year - x)

df["mean_year_miliage"] = df.apply(
    lambda row: row["mileage"] // row.age if row.age != 0 else None, axis=1
)

# Age car analise

In [None]:
suggest_shelf_life = {}

for c_type in car_types:
    suggest_shelf_life[c_type] = df[(df["type"] == c_type) & (df["age"] > 0)][
        "age"
    ].quantile(0.9)

In [None]:
fig, ax = plt.subplots(nrows=3, figsize=(10, 5))

for i, c_type in enumerate(car_types):
    sns.histplot(
        df[(df["age"] > 0) & (df["type"] == c_type)],
        ax=ax[i],
        x="age",
        hue="type",
        **hist_params,
    )
    ax[i].axvline(x=suggest_shelf_life[c_type])

fig.tight_layout()
# plt.ylim([0,1])
plt.xlim([0, 25])

plt.title("Mean age of cars");

In [None]:
fig, ax = plt.subplots(figsize=(5, 3))
for i, c_type in enumerate(car_types):
    ax.axhline(y=suggest_shelf_life[c_type], xmin=i / 3, xmax=(i + 1) / 3)
sns.violinplot(
    df[(df["age"] < 30) & (df["age"] > 0)],
    ax=ax,
    y="age",
    x="type",
    order=car_types,
)
ax.set_title("Car age in otomoto offers")
ax.legend(["lifetime"])

In [None]:
fig, ax = plt.subplots(figsize=(5, 3))
for i, c_type in enumerate(car_types):
    ax.axhline(y=suggest_shelf_life[c_type], xmin=i / 3, xmax=(i + 1) / 3)
sns.boxplot(
    df[(df["age"] < 30) & (df["age"] > 0)],
    ax=ax,
    y="age",
    x="type",
    order=car_types,
)
ax.set_title("Car age in otomoto offers")
ax.legend(["lifetime"])
save_in("carAge", "pictures", fig)

In [None]:
suggest_shelf_life