I imported the libraries I am going to use to build my web app and
also imported the data set for the vehicles.

In [22]:
import pandas as pd
import streamlit as st
import plotly.express as px
import numpy as np

vehicle_data = pd.read_csv('vehicles_us.csv')


This cache stores the function, allowing the app to run more smoothly. 

In [23]:
@st.cache_data
def median_value(group):
    if group.notnull().any():
        return group.fillna(group.median())
    else:
        return group.fillna(np.nan)

2023-09-16 13:48:16.355 No runtime found, using MemoryCacheStorageManager


This first grouping of code adds the median value to each column where there are missing values. 

Right below is where I changed the names of the columns to more presentable names.

In [24]:
vehicle_data['model_year'] = vehicle_data.groupby('model')['model_year'].transform(median_value)
vehicle_data['cylinders'] = vehicle_data.groupby('model')['cylinders'].transform(median_value)
vehicle_data['odometer'] = vehicle_data.groupby('model_year')['odometer'].transform(lambda x: x.fillna(x.median()))

new_column_names = {'price': 'Price', 'model_year': 'Model Year', 'model': 'Model', 'condition': 'Condition', 'cylinders': 'Cylinders', 'fuel': 'Fuel',
                    'odometer': 'Odometer', 'transmission': 'Transmission', 'type': 'Type', 'paint_color': 'Paint Color', 'is_4wd': 'Has 4WD', 'date_posted': 'Date Posted',
                    'days_listed': 'Days Listed'}
vehicle_data.rename(columns=new_column_names, inplace=True)


Mean of empty slice



Here we create the title of the web page and the code for the histogram and scatterplots.

In [25]:
st.title('Vehicle Data Analysis Tool')

st.set_option('deprecation.showPyplotGlobalUse', False)

select_model = st.selectbox('Select a car model', vehicle_data['Model'].unique())
filtered_data = vehicle_data[vehicle_data['Model'] == select_model]

st.write('Filtered Data:')
st.write(filtered_data)

st.subheader('Scatterplot for selected car model')
x_column = st.selectbox('Select x-axis column', vehicle_data.columns)
y_column = st.selectbox('Select y-axis column', vehicle_data.columns)

fig = px.scatter(filtered_data, x=x_column, y=y_column, title=f"Scatterplot: {x_column} vs {y_column}")
st.plotly_chart(fig)

st.subheader('Histogram for selected car model')
fig = px.histogram(filtered_data, x=x_column, nbins=20, title=f"Histogram: {x_column}")
st.plotly_chart(fig)

st.write(f"Statistics for {x_column}:")
st.write(filtered_data[x_column].describe())

This will display the statistics for each column selected in the dropdown menu.

In [26]:
st.write(f"Statistics for {x_column}:")

st.write(vehicle_data[x_column].describe())