In [None]:
# Import required libraries
import pandas as pd
import plotly.graph_objects as go

### 1. Dataset overview

In [None]:
# Get US vehicles dataframe
vehicles_df = pd.read_csv('../vehicles_us.csv')

vehicles_df.sample(5)

In [None]:
# Get general info
vehicles_df.info()

In [None]:
# Check for duplicate records
vehicles_df.duplicated().sum()

In [None]:
# Get descriptive statistics from dataset
vehicles_df.describe()

### 2. Basic data cleansing

In [None]:
# Convert date_posted to datetime
vehicles_df['date_posted'] = pd.to_datetime(vehicles_df['date_posted'])

# Replace NaNs in important columns if necessary (only if justified)
vehicles_df['is_4wd'] = vehicles_df['is_4wd'].fillna(0)  # 1.0 if it has it, 0.0 if not


### 3. Exploratory graphics

In [None]:
# 1. Odometer histogram
fig_odometer = go.Figure(data=[go.Histogram(
    x=vehicles_df['odometer'],
    xbins=dict(
        start=0,    # Start of the first bin
        end=400000,    # Upper limit of the x-axis
        size=20000      # Bin size, e.g., 5 units wide
    ),
    marker=dict(
        line=dict(
            color='black',  # Color of the border
            width=1         # Width of the border
        )
    )
)])
fig_odometer.update_layout(
    title='Odometer distribution',
    xaxis_title='Odometer',
    yaxis_title='Frequency',
    width=800,  # Set the desired width in pixels
    height=500,  # Set the desired height in pixels
)
fig_odometer.show()

In [None]:
# Static plotly chart for github
fig_odometer.write_image('fig_odometer.png')

[comment]: # (Diplay odotemer distribution image for github)
![Odometer distribution](fig_odometer.png)

In [None]:
# 2. Price histogram
fig_price = go.Figure(data=[go.Histogram(
    x=vehicles_df['price'],
    xbins=dict(
        start=0,
        end=60000,
        size=3000
    ),
    marker=dict(
        line=dict(
            color='black',  # Color of the border
            width=1         # Width of the border
        )
    )
)])
fig_price.update_layout(title='Price distribution',
                        xaxis_title='Price',
                        yaxis_title='Frequency',
                        width=800,
                        height=500
                    )
fig_price.show()

In [None]:
# Static plotly chart for github
fig_price.write_image('fig_price.png')

[comment]: # (Diplay price distribution image for github)
![Price distribution](fig_price.png)

In [None]:
# 3. Model year histogram
fig_year = go.Figure(data=[go.Histogram(
    x=vehicles_df['model_year'],
    xbins=dict(
        start=1980
    ),
    marker=dict(
        line=dict(
            color='black',  # Color of the border
            width=1         # Width of the border
        )
    )
)])
fig_year.update_layout(title='Model year distribution',
                       xaxis_title='Model year',
                       yaxis_title='Frequency',
                       width=800,
                       height=500
                    )
fig_year.show()

In [None]:
# Static plotly chart for github
fig_year.write_image('fig_year.png')

[comment]: # (Diplay model year distribution image for github)
![Model year distribution](fig_year.png)

In [None]:
# Get 'condition' column values.
vehicles_df['condition'].dropna().unique()

In [None]:
# 4. Scatter plot: Price vs. Odometer for 'like new' condition
fig_scatter = go.Figure()

condition = ['like new']

# for cond in vehicles_df['condition'].dropna().unique():
for cond in condition:
    group = vehicles_df[vehicles_df['condition'] == cond]
    fig_scatter.add_trace(go.Scatter(
        x=group['odometer'],
        y=group['price'],
        mode='markers',
        name=cond,
        opacity=0.6,
        hovertemplate="(%{x}, $%{y})<extra></extra>"
    ))

fig_scatter.update_layout(title='Price vs. Odometer for "Like New" condition',
                          xaxis_title='Odometer',
                          yaxis_title='Price',
                          xaxis=dict(range=[0, 400000])
                        )
fig_scatter.show()

In [None]:
# Static plotly chart for github
fig_scatter.write_image('fig_condition.png')

[comment]: # (Diplay odometer-vs-price image for github)
![Price vs. Odometer for "Like New" condition](fig_condition.png)

In [None]:
# 5. Bar chart: Average price by car type
avg_price_by_type = vehicles_df.groupby('type')['price'].mean().reset_index()

fig_bar = go.Figure(data=[go.Bar(
    x=avg_price_by_type['type'],
    y=avg_price_by_type['price'],
    hovertemplate="(%{x}, $%{y:,.0f})<extra></extra>"
)])

fig_bar.update_layout(title='Average price by vehicle type',
                      xaxis_title='Type',
                      yaxis_title='Average price',
                      width=800,
                      height=500
                    )
fig_bar.show()

In [None]:
# Static plotly chart for github
fig_bar.write_image('fig_vehicle_type.png')

[comment]: # (Diplay avg-price image for github)
![Average price by vehicle type](fig_vehicle_type.png)