In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import streamlit as st

In [None]:
# Title: The number of Vehicle Ads
# In this project, we will look at data about vehicles in the US and see the number of Ads by each vehicle and analysis what variables affect the amount of days a vehicle is on sale.

# To complete this project, you will have to import some packages to start to run the data. 
# Those would be pandas, numpy, plotly.express and streamlit. 
# After running this code, we need to read the data set itself. The data is about the Ads by each vehicle in the US. 
# Once the data has been uploaded, we create a selectbox to show data filtered by each model in the data. 

df = pd.read_csv('vehicles_us.csv')
df = df.drop(df.columns[0], axis=1)
df.head()


Unnamed: 0,model_year,model,condition,cylinders,fuel,odometer,transmission,type,paint_color,is_4wd,date_posted,days_listed
0,2011.0,bmw x5,good,6.0,gas,145000.0,automatic,SUV,,1.0,2018-06-23,19
1,,ford f-150,good,6.0,gas,88705.0,automatic,pickup,white,1.0,2018-10-19,50
2,2013.0,hyundai sonata,like new,4.0,gas,110000.0,automatic,sedan,red,,2019-02-07,79
3,2003.0,ford f-150,fair,8.0,gas,,automatic,pickup,,,2019-03-22,9
4,2017.0,chrysler 200,excellent,4.0,gas,80903.0,automatic,sedan,black,,2019-04-02,28


In [None]:

model_choice = df['model'].unique()
selected_manu = st.selectbox('Select a model', model_choice )
min_days, max_days = int(df['days_listed'].min()), int(df['days_listed'].max())
print(df['days_listed'].min())
print(df['days_listed'].max())

# The filtered created will be able to show the data by days listed. The min and max are ran to show the max amount of time, which is 271. 

0
271


In [None]:
days_range = st.slider("Choose Number of Days Posted", value=(min_days, max_days), min_value=min_days,max_value= max_days)

actual_range = list(range(days_range[0], days_range[1]+1))

df_filtered = df[ (df.model == selected_manu) & (df.model_year.isin(list(actual_range)) )]

df.duplicated().sum()

df.isna().sum()


In [None]:
st.header('Days Posted analysis')

list_for_hist = ['condition','model_year']

selected_var = st.selectbox('Filter for Days Posted Difference',list_for_hist)

fig1 = px.histogram(df, x="days_listed", color= selected_var)
fig1.update_layout(title = "<b> Split of Days Posted by {}</b>".format(selected_var))
st.plotly_chart(fig1)

# We then create a histogram and a scatterplot to show different ways of analyzying the days posted based on the condition and model year listed.

DeltaGenerator()

In [None]:
df['age'] = 2024 - df['model_year']
df.head()

Unnamed: 0,model_year,model,condition,cylinders,fuel,odometer,transmission,type,paint_color,is_4wd,date_posted,days_listed,age
0,2011.0,bmw x5,good,6.0,gas,145000.0,automatic,SUV,,1.0,2018-06-23,19,13.0
1,,ford f-150,good,6.0,gas,88705.0,automatic,pickup,white,1.0,2018-10-19,50,
2,2013.0,hyundai sonata,like new,4.0,gas,110000.0,automatic,sedan,red,,2019-02-07,79,11.0
3,2003.0,ford f-150,fair,8.0,gas,,automatic,pickup,,,2019-03-22,9,21.0
4,2017.0,chrysler 200,excellent,4.0,gas,80903.0,automatic,sedan,black,,2019-04-02,28,7.0


In [None]:
def age_category(x):
    if x<5: return '<5'
    elif  x>=5 and x<10: return '5-10'
    elif x>=10 and x<20: return '10-20'
    else: return '>20'

df['age_category'] = df['age'].apply(age_category)

In [None]:
list_for_scatter = ["paint_color", "is_4wd", "type"]

In [None]:
choice_for_scatter = st.selectbox('Price depending on', list_for_scatter)

In [None]:
fig2 = px.scatter(df, x='days_listed', y= choice_for_scatter, color = "age_category", hover_data= ["model_year"])
fig2.update_layout(title = "<b> Days Listed vs {}</b>".format(choice_for_scatter))
st.plotly_chart(fig2)

DeltaGenerator()

In [None]:
print("""After viewing the data, we can tell that that the odometer number influences the number of days listed. """)


print(""" Overall, even though other factors come into play about how long a vehicle is listed, the condition has the greastest impact of the amount of days listed for vehicles in the US.  """)