In [7]:
import pandas as pd
import os
import sys
sys.path.insert(1, "../landing")
sys.path.insert(2, "../scripts")
from constants import *
import plotly.graph_objects as go
import plotly.express as px
from ipywidgets import widgets

## Reading in External Dataset

This external dataset looks at the US Vehicles Sales from years 2013 to 2022. It is split according to car brand as well as its model.

In [12]:
us_vehicle_sales = pd.read_csv("../landing/us_car_model_sales_2013_2022.csv")
us_vehicle_sales.columns = [x.lower() for x in us_vehicle_sales.columns]
us_vehicle_sales.rename(columns={"maker/brand": MAKER, "maker_brand":MODEL}, inplace=True)
us_vehicle_sales.fillna(0, inplace=True)
us_vehicle_sales

Unnamed: 0,maker,model,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,Acura,Acura ILX,20430,17854,18531,14597,11757,11273.0,14685.0,13414.0,13900.0,6296
1,Acura,Acura Integra,0,0,0,0,0,0.0,0.0,0.0,0.0,13027
2,Acura,Acura TL,24318,10616,88,0,0,0.0,0.0,0.0,0.0,0
3,Acura,Acura TLX,0,19127,47080,37156,34846,30468.0,26548.0,21785.0,26100.0,11508
4,Acura,Acura TSX,17484,6287,35,4,0,0.0,0.0,0.0,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
440,VW,VW ID.4,0,0,0,0,0,0.0,0.0,0.0,16742.0,20511
441,VW,VW Taos,0,0,0,0,0,0.0,0.0,0.0,31682.0,59103
442,VW,VW Tiguan,30002,25121,35843,43638,46983,103022.0,109963.0,100705.0,109747.0,88577
443,VW,VW Atlas,0,0,0,0,27119,59677.0,81508.0,87361.0,115687.0,82025


Summary statistics

In [13]:
summary_table = us_vehicle_sales.describe()
summary_table

Unnamed: 0,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
count,445.0,445.0,445.0,445.0,445.0,445.0,445.0,445.0,445.0,445.0
mean,35962.296629,38057.640449,40295.346067,40367.975281,39691.044944,39993.946067,39380.991011,33840.773034,34285.930337,31760.348315
std,76098.751835,80182.216509,83851.419242,84005.870987,86244.747803,87015.776324,86291.273687,76269.01066,74452.272772,68338.632533
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,4802.0,4990.0,6276.0,5707.0,5595.0,4704.0,5369.0,4512.0,4257.0,4224.0
75%,35017.0,35368.0,40359.0,42120.0,35727.0,38178.0,37728.0,33608.0,34791.0,30533.0
max,763402.0,753851.0,780354.0,820799.0,896764.0,909330.0,896526.0,787422.0,726004.0,653957.0


Sales by car brand

In [14]:
sales_by_brand = us_vehicle_sales.drop(columns=[MODEL]).groupby(MAKER).sum()
sales_by_brand = sales_by_brand.reset_index()

In [19]:
import plotly.graph_objects as go

# Create traces for each line
x_values = us_vehicle_sales[MAKER]

trace1 = go.Scatter(x=x_values, y=us_vehicle_sales["2013"].tolist(), mode='lines', name='2013')
trace2 = go.Scatter(x=x_values, y=us_vehicle_sales["2014"].tolist(), mode='lines', name='2014')
trace3 = go.Scatter(x=x_values, y=us_vehicle_sales["2015"].tolist(), mode='lines', name='2015')
trace4 = go.Scatter(x=x_values, y=us_vehicle_sales["2016"].tolist(), mode='lines', name='2016')
trace5 = go.Scatter(x=x_values, y=us_vehicle_sales["2017"].tolist(), mode='lines', name='2017')
trace6 = go.Scatter(x=x_values, y=us_vehicle_sales["2018"].tolist(), mode='lines', name='2018')
trace7 = go.Scatter(x=x_values, y=us_vehicle_sales["2019"].tolist(), mode='lines', name='2019')
trace8 = go.Scatter(x=x_values, y=us_vehicle_sales["2020"].tolist(), mode='lines', name='2020')
trace9 = go.Scatter(x=x_values, y=us_vehicle_sales["2021"].tolist(), mode='lines', name='2021')
trace10 = go.Scatter(x=x_values, y=us_vehicle_sales["2022"].tolist(), mode='lines', name='2022')

# Create figure
fig = go.Figure()

# Add traces to the figure
fig.add_trace(trace1)
fig.add_trace(trace2)
fig.add_trace(trace3)
fig.add_trace(trace4)
fig.add_trace(trace5)
fig.add_trace(trace6)
fig.add_trace(trace7)
fig.add_trace(trace8)
fig.add_trace(trace9)
fig.add_trace(trace10)


# Update layout if needed
fig.update_layout(title='US Vehicle Sales By Car Brand From 2013 to 2022', xaxis_title='Car Brand', yaxis_title='Total Car Units Sold')

# Show plot
fig.show()

fig.write_html("../plots/vehiclesales.html")



### Sales by Car Brand

In [20]:
# fig2 = px.bar(sales_by_brand, x=MAKER, y=[str(i) for i in range(2013, 2023)])
fig2 = px.bar(sales_by_brand, y=MAKER, x=[str(i) for i in range(2013, 2023)], orientation="h")
fig2.update_layout(title='US Vehicle Sales By Car Brand From 2013 to 2022', yaxis_title='Car Brand', xaxis_title='Total Car Units Sold')

fig2.write_html("../plots/vehiclesales2.html")
fig2.show()

## Top 10 Car Brands Sales per Year

In [21]:
years = [str(i) for i in range(2013, 2023)]

for year in years:
    sales = sales_by_brand[[MAKER, year]].sort_values(by=[year], ascending=False).head(10)
    fig1 = px.bar(sales, x=MAKER, y=year)
    fig1.update_layout(title=f'Top 10 US Vehicles Sales By Car Brand in {year}', xaxis_title='Car Brand', yaxis_title='Total Car Units Sold')
    fig1.write_html(f"../plots/top10salesbybrand{year}.html")


In [22]:
sales_2013 = sales_by_brand[[MAKER, "2013"]].sort_values(by=["2013"], ascending=False).head(10)
fig = px.bar(sales_2013, x=MAKER, y="2013")
fig.update_layout(title='Top 10 US Vehicles Sales By Car Brand in 2013', xaxis_title='Car Brand', yaxis_title='Total Car Units Sold')
fig.show()

In [23]:
sales_2014 = sales_by_brand[[MAKER, "2014"]].sort_values(by=["2014"], ascending=False).head(10)
fig = px.bar(sales_2014, x=MAKER, y="2014")
fig.update_layout(title='Top 10 US Vehicles Sales By Car Brand in 2014', xaxis_title='Car Brand', yaxis_title='Total Car Units Sold')
fig.show()

In [24]:
sales_2015 = sales_by_brand[[MAKER, "2015"]].sort_values(by=["2015"], ascending=False).head(10)
fig = px.bar(sales_2015, x=MAKER, y="2015")
fig.update_layout(title='Top 10 US Vehicles Sales By Car Brand in 2015', xaxis_title='Car Brand', yaxis_title='Total Car Units Sold')
fig.show()

In [25]:
sales_2016 = sales_by_brand[[MAKER, "2016"]].sort_values(by=["2016"], ascending=False).head(10)
fig = px.bar(sales_2016, x=MAKER, y="2016")
fig.update_layout(title='Top 10 US Vehicles Sales By Car Brand in 2016', xaxis_title='Car Brand', yaxis_title='Total Car Units Sold')
fig.show()

In [26]:
sales_2017 = sales_by_brand[[MAKER, "2017"]].sort_values(by=["2017"], ascending=False).head(10)
fig = px.bar(sales_2017, x=MAKER, y="2017")
fig.update_layout(title='Top 10 US Vehicles Sales By Car Brand in 2017', xaxis_title='Car Brand', yaxis_title='Total Car Units Sold')
fig.show()

In [27]:
sales_2018 = sales_by_brand[[MAKER, "2018"]].sort_values(by=["2018"], ascending=False).head(10)
fig = px.bar(sales_2018, x=MAKER, y="2018")
fig.update_layout(title='Top 10 US Vehicles Sales By Car Brand in 2018', xaxis_title='Car Brand', yaxis_title='Total Car Units Sold')
fig.show()

In [28]:
sales_2019 = sales_by_brand[[MAKER, "2019"]].sort_values(by=["2019"], ascending=False).head(10)
fig = px.bar(sales_2019, x=MAKER, y="2019")
fig.update_layout(title='Top 10 US Vehicles Sales By Car Brand in 2019', xaxis_title='Car Brand', yaxis_title='Total Car Units Sold')
fig.show()

In [29]:
sales_2020 = sales_by_brand[[MAKER, "2020"]].sort_values(by=["2020"], ascending=False).head(10)
fig = px.bar(sales_2020, x=MAKER, y="2020")
fig.update_layout(title='Top 10 US Vehicles Sales By Car Brand in 2020', xaxis_title='Car Brand', yaxis_title='Total Car Units Sold')
fig.show()

In [30]:
sales_2021 = sales_by_brand[[MAKER, "2021"]].sort_values(by=["2021"], ascending=False).head(10)
fig = px.bar(sales_2021, x=MAKER, y="2021")
fig.update_layout(title='Top 10 US Vehicles Sales By Car Brand in 2021', xaxis_title='Car Brand', yaxis_title='Total Car Units Sold')
fig.show()

In [31]:
sales_2022 = sales_by_brand[[MAKER, "2022"]].sort_values(by=["2022"], ascending=False).head(10)
fig = px.bar(sales_2022, x=MAKER, y="2022")
fig.update_layout(title='Top 10 US Vehicles Sales By Car Brand in 2022', xaxis_title='Car Brand', yaxis_title='Total Car Units Sold')
fig.show()