## Price Versus Distance

In [118]:
import pandas as pd
import plotly.express as px

df = pd.read_csv('data.CSV', header=0)
df.insert(0, 'age_years', 0)
df['age_days'] = (pd.to_datetime('today') - pd.to_datetime(df['first_registration'], format="%Y-%m-%d", errors='coerce')).dt.days
df.set_index('first_registration', inplace=True)
max_distance = 180000
try:
    df = df[df['distance'] <= max_distance]
except Exception as e:
    pass
df.sort_values(by=['search', 'distance'], ascending=[True,True], inplace=True)
scale = px.colors.sequential.Rainbow
fig = px.scatter(df, x='distance',
                 y='price',
                 color='search',
                 title="finn.no car models",
                 trendline="lowess",
                 trendline_options=dict(frac=0.4),
                 hover_name='title',
                 hover_data=['price', 'distance', 'year', 'age_days', 'url', 'area'],
                 color_continuous_scale='Viridis')
fig.update_xaxes(showline=True, linewidth=2, linecolor='black', gridcolor='grey', nticks=20)
fig.update_yaxes(showline=True, linewidth=2, linecolor='black', gridcolor='grey', nticks=20)


fig.show(renderer="browser")

## Price Versus Age

In [120]:
import pandas as pd
import plotly.express as px

df = pd.read_csv('data.CSV', header=0)
df.insert(0, 'age_years', 0)
df['age_days'] = (pd.to_datetime('today') - pd.to_datetime(df['first_registration'], format="%Y-%m-%d", errors='coerce')).dt.days
df['age_years'] = df['age_days'] / 365
df.set_index('first_registration', inplace=True)
max_distance = 180000
try:
    df = df[df['distance'] <= max_distance]
except Exception as e:
    pass
df.sort_values(by=['search', 'distance'], ascending=[True,True], inplace=True)
scale = px.colors.sequential.Rainbow
fig = px.scatter(df, x='age_years',
                 y='price',
                 color='search',
                 title="finn.no car models",
                 trendline="lowess",
                 trendline_options=dict(frac=0.4),
                 hover_name='title',
                 hover_data=['price', 'distance', 'year', 'url', 'area', 'age_days'],
                 color_continuous_scale='Viridis')
fig.update_xaxes(showline=True, linewidth=2, linecolor='black', gridcolor='grey', nticks=20)
fig.update_yaxes(showline=True, linewidth=2, linecolor='black', gridcolor='grey', nticks=20)


fig.show(renderer="browser")

# Price Versus Age and Distance

In [121]:
import pandas as pd
import plotly.express as px
import numpy as np

df = pd.read_csv('data.CSV', header=0)
df.insert(0, 'age_days', 0)
df['age_days'] = (pd.to_datetime('today') - pd.to_datetime(df['first_registration'], format="%Y-%m-%d", errors='coerce')).dt.days
df.insert(0, 'age_years', 0)
df['age_years'] = df['age_days'] / 365
df.set_index('first_registration', inplace=True)

max_days = df['age_days'].max()
max_distance = df['distance'].max()
df['hypotenuse of days and distance'] = np.hypot(df['age_days'] / max_days, df['distance'] / max_distance)

fig = px.scatter(df, x='hypotenuse of days and distance',
                 y='price',
                 color='search',
                 title="cars by age and distance combined",
                 trendline="lowess",
                 trendline_options=dict(frac=0.7),
                 hover_name='title',
                 hover_data=['age_days', 'price', 'distance', 'year', 'url', 'area'],
                 color_continuous_scale='Viridis')
fig.update_xaxes(showline=True, linewidth=2, linecolor='black', gridcolor='grey', nticks=20)
fig.update_yaxes(showline=True, linewidth=2, linecolor='black', gridcolor='grey', nticks=20)

fig.show(renderer="browser")

# Daily Value loss
What is the average price loss of a year owned?
This graph finds that out

In [122]:
import pandas as pd
import plotly.express as px
import statsmodels.api as sm
df = pd.read_csv('data.CSV', header=0)
df.insert(0, 'age_days', 0)
df['age_days'] = (pd.to_datetime('today') - pd.to_datetime(df['first_registration'], format="%Y-%m-%d", errors='coerce')).dt.days
df.insert(0, 'age_years', 0)
df['age_years'] = df['age_days'] / 365
df.set_index('first_registration', inplace=True)

result_df = pd.DataFrame([], columns=['age_days', 'price', 'search'])

for search in df['search'].unique():
    df_search = df[df['search'] == search]

    frac = 0.7
    result = sm.nonparametric.lowess(exog=df_search['age_days'], endog=df_search['price'], frac=frac, it=3, return_sorted=True)

    start_x = int(np.floor(df_search['age_days'].min()))
    if start_x < 0:
        start_x = 0
    end_x = int(np.ceil(df_search['age_days'].max()))
    num_samples = end_x - start_x + 1
    print(f"start_x: {start_x}, end_x: {end_x}, num_samples: {num_samples} search: {search}")
    new_x = np.linspace(start=start_x, stop=end_x, num=num_samples, dtype=np.float32)
    new_y = np.interp(x=new_x, xp=result[:,0], fp=result[:,1])

    new_y = np.abs(np.gradient(new_y))
    result = np.column_stack((new_x, new_y))
    new = pd.DataFrame(result, columns=['age_days', 'price'])
    #new['price'] = new['price'].pct_change()

    new.insert(2, 'search', search)
    result_df = pd.concat([result_df, new], axis=0)
fig = px.scatter(result_df, x='age_days',
                y='price',
                color='search',
                title="Daily value loss (NOK) over time",
                hover_name='price',
                hover_data=['age_days', 'price', 'search'],
                color_continuous_scale='Viridis')
fig.update_xaxes(showline=True, linewidth=2, linecolor='black', gridcolor='grey', nticks=20)
fig.update_yaxes(showline=True, linewidth=2, linecolor='black', gridcolor='grey', nticks=20)

fig.show(renderer="browser")

#frac = 0.7
#result_df = pd.DataFrame([], columns=['age_days', 'price'])
#trendline_options=dict(frac=frac),
#result = sm.nonparametric.lowess(exog=df['age_days'], endog=df['price'], frac=frac, it=3, return_sorted=True)
##result.gradient()
#new = pd.DataFrame(result, columns=['age_days', 'price'])
#result_df = pd.concat([result_df, new], axis=0)
##result_df = result_df.pct_change(periods=365)
#fig = px.scatter(result_df, x='age_days',
#                 y='price',
#                 title="price change over time",
#                 color_continuous_scale='Viridis')
#fig.update_xaxes(showline=True, linewidth=2, linecolor='black', gridcolor='grey', nticks=20)
#fig.update_yaxes(showline=True, linewidth=2, linecolor='black', gridcolor='grey', nticks=20)
#
#fig.show(renderer="browser")

start_x: 107, end_x: 2728, num_samples: 2622 search: Hyundai Ioniq
start_x: 0, end_x: 6857, num_samples: 6858 search: Mitsubishi Outlander
start_x: 165, end_x: 10967, num_samples: 10803 search: Passat
start_x: 387, end_x: 7828, num_samples: 7442 search: Skoda Octavia
start_x: 107, end_x: 6295, num_samples: 6189 search: Skoda Superb
start_x: 11, end_x: 3825, num_samples: 3815 search: Suzuki S-Cross
start_x: 79, end_x: 1879, num_samples: 1801 search: Tesla Model 3
start_x: 230, end_x: 1824, num_samples: 1595 search: Toyota Corolla Touring Sports
start_x: 139, end_x: 8392, num_samples: 8254 search: Toyota RAV4
start_x: 17, end_x: 4511, num_samples: 4495 search: Volkswagen ID.4



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.

