In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv('data_new.csv', index_col = 0)

In [3]:
df['hp']

0        96
1       100
2       155
3       100
4       192
       ... 
6467    114
6468     90
6469    114
6470    101
6471    100
Name: hp, Length: 6453, dtype: int64

- ## General Brand Stats

In [19]:
brand = "Mercedes-benz"
# dummy = df.query('price < 10000')
dummy = df
dummy = dummy[dummy['brand'] == brand].groupby(['model'], as_index=False).size().sort_values(ascending = False, by = 'size')
dummy.reset_index(drop = True, inplace = True)

top_car = 0
if dummy['model'][top_car] == 'Unknown':
    top_car = 1

if dummy['size'][top_car] >= 50:
    dummy['ignored'] = dummy['size'] < np.round(0.1 * dummy['size'][top_car])
else:
    dummy['ignored'] = dummy['size'] < np.round(0.3 * dummy['size'][top_car])
dummy['ignored'][dummy['model'] == 'Unknown'] = True
dummy.loc[len(dummy)] = ['Other', sum(dummy['size'][dummy['ignored'] == True]), False]

In [20]:
fig = px.bar(dummy, x = dummy['model'][dummy['ignored'] == False], 
             y = dummy['size'][dummy['ignored'] == False], title = f'{brand} Models Popularity', 
             color = dummy['model'][dummy['ignored'] == False], width = 600, height = 500)
fig.update_layout(xaxis_title = f"{brand} Models",
                  yaxis_title = "Number of Vehicles",
                  legend_title = "Models", title = {'x': 0.5},
                  xaxis_type = 'category')
fig.show()

In [21]:
fig = px.pie(dummy, values = dummy['size'][dummy['ignored'] == False], 
             names = dummy['model'][dummy['ignored'] == False], 
             title = f'Population Share of {brand} Models', width = 600, height = 500)
fig.update_traces(textposition = 'inside', textinfo = 'percent+label')
fig.update_layout(legend_title = "Models", title = {'x': 0.5})
fig.show()

- ## Specialized Models Stats

In [28]:
model = "A 180"
dummy = df.query(f'brand == "{brand}"').query(f'model == "{model}"').query('price != "Ask price"')
dummy['price'] = dummy['price'].values.astype('int32')
# lets say we want to make a displacement restriction
# dummy = dummy.query('hp < 150')

In [29]:
fig = px.histogram(dummy, x = 'price', nbins = 20, color = 'fuel', width = 800, height = 400)
fig.update_layout(xaxis_title = f'{brand} {model} Price Distribution (\N{euro sign})',
                  yaxis_title = "Number of Vehicles", legend_title = 'Fuel')
fig.show()

In [30]:
np.round(dummy.groupby(['fuel'])['price'].mean())

fuel
Diesel    21021.0
Petrol    20726.0
Name: price, dtype: float64

In [31]:
fig = px.pie(dummy, values = dummy.groupby(['fuel']).size(), 
             names = dummy.groupby(['fuel']).size().index, 
             title = f'{brand} {model} Population Share w.r.t. Fuel',
             width = 600, height = 500,
             hover_data = {'Mean Price (\N{euro sign}) ': np.round(dummy.groupby(['fuel'])['price'].mean())})
fig.update_traces(textposition = 'inside', textinfo = 'percent+label')
fig.update_layout(legend_title = "Fuel", title = {'x': 0.5})
fig.show()

In [69]:
fig = px.sunburst(dummy, path = ['fuel', 'transmission'], values = np.ones(len(dummy)), 
                  title = f'{brand} {model} Population Share w.r.t. Fuel/Transmission',
                  width = 600, height = 500)
fig.update_layout(title = {'x': 0.5})
fig.show()

In [70]:
dummy.groupby(['fuel', 'transmission']).size()

fuel    transmission
Diesel  Automatic        7
        Manual          24
Petrol  Automatic        4
        Manual          38
dtype: int64

In [65]:
fig = px.histogram(dummy, x = 'fuel', width = 600, height = 500,
                   title = f'Renault {model} Population Share w.r.t. Fuel/Transmission',
                   color = 'transmission', text_auto = True)
fig.update_layout(xaxis_title = "Fuel",
                  yaxis_title = "Number of Vehicles",
                  legend_title = "Transmission Type", title = {'x': 0.5})
fig.show()