In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
from scipy.stats import pearsonr
from scipy import stats
import ipywidgets as widgets
from IPython.display import display, HTML

# Load the data
try:
    car = pd.read_excel(r'data\\carbitrage-data.xlsx')
except Exception as e:
    print(f"Error loading data: {e}")
    raise

# Drop rows with missing price or odometer values, as they are essential for the plot
car = car.dropna(subset=['price', 'odometer'])

# Remove outliers in mileage using the z-score method (statistical practice)
car['z_score'] = np.abs(stats.zscore(car['odometer']))
car = car[car['z_score'] < 3]  # Keeping only rows where the z-score is less than 3
car = car.drop(columns=['z_score'])  # Dropping the z_score column after filtering

# Error check: Ensure there are still valid makes and models after filtering
if car.empty:
    raise ValueError("The dataset is empty after filtering. Please check the data.")

# Create a list of unique makes and models
makes = car['make'].unique()
if len(makes) == 0:
    raise ValueError("No valid makes available after filtering. Please check the data.")

# Dropdown widgets for user to select make and model
make_dropdown = widgets.Dropdown(options=makes, description='Make:', layout=widgets.Layout(width='50%'))
model_dropdown = widgets.Dropdown(description='Model:', layout=widgets.Layout(width='50%'))

# Update the models based on the selected make
def update_models(*args):
    selected_make = make_dropdown.value
    filtered_models = car[car['make'] == selected_make]['model'].unique()
    if len(filtered_models) == 0:
        model_dropdown.options = ['No valid models available']
    else:
        model_dropdown.options = filtered_models

make_dropdown.observe(update_models, 'value')

# Explanation widget
explanation = widgets.HTML(
    value=(
        "<h3>Instructions</h3>"
        "<p>Select a car make and model from the dropdown menus to see a scatter plot of price vs. mileage.</p>"
        "<p><b>R<sup>2</sup></b>: The coefficient of determination, which indicates the proportion of the variance "
        "in the dependent variable (price) that is predictable from the independent variable (mileage). "
        "An R<sup>2</sup> close to 1 indicates a strong linear relationship.</p>"
        "<p><b>Number of Observations</b>: The total number of data points (cars) considered for the selected make and model.</p>"
    )
)

# Function to create the scatter plot
def plot_graph(make, model):
    if not make or not model or (isinstance(model, str) and 'No valid models available' in model):
        explanation.value = "Please select both a make and a model."
        return  # Return if either make or model is not selected
    
    # Filter data based on user selection
    filtered_data = car[(car['make'] == make) & (car['model'] == model)]
    
    if filtered_data.empty:
        explanation.value = "No data available for the selected make and model."
        return
    
    # Calculate correlation coefficient and number of observations
    if len(filtered_data) > 1:
        correlation, _ = pearsonr(filtered_data['odometer'], filtered_data['price'])
        correlation_text = f"R<sup>2</sup>: {correlation**2:.2f}"  # Using R^2
    else:
        correlation_text = "R<sup>2</sup>: N/A"
    
    observations_text = f"Observations: {len(filtered_data)}"
    
    # Create the scatter plot
    fig = px.scatter(
        filtered_data,
        x='odometer',
        y='price',
        trendline='ols',
        title=f'{make} {model} Price vs Mileage',
    )
    
    # Customize the layout to make it minimalistic
    fig.update_layout(
        title=dict(text=f'{make} {model} Price vs Mileage', x=0.01, xanchor='left'),
        showlegend=False,
        margin=dict(l=0, r=0, t=30, b=0),
        xaxis_title="Mileage",
        yaxis_title="Price",
        paper_bgcolor="white",
        plot_bgcolor="white",
        xaxis=dict(showgrid=False),
        yaxis=dict(showgrid=False),
    )
    
    # Remove the top and right spines
    fig.update_xaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
    fig.update_yaxes(showline=True, linewidth=1, linecolor='black', mirror=True)
    
    # Add text annotations for R^2 and number of observations
    fig.add_trace(go.Scatter(
        x=[filtered_data['odometer'].max()],
        y=[filtered_data['price'].min()],
        text=[f"{correlation_text}<br>{observations_text}"],
        mode="text",
        showlegend=False
    ))
    
    fig.show()

# Connect the plot function to widget changes
def on_dropdown_change(change):
    plot_graph(make_dropdown.value, model_dropdown.value)

make_dropdown.observe(on_dropdown_change, names='value')
model_dropdown.observe(on_dropdown_change, names='value')

# Display the dropdowns and explanation
display(HTML("<h2>Car Price vs Mileage Analysis</h2>"))
display(explanation)
display(make_dropdown, model_dropdown)

# Initial plot (if needed)
plot_graph(make_dropdown.value, model_dropdown.value)


HTML(value='<h3>Instructions</h3><p>Select a car make and model from the dropdown menus to see a scatter plot …

Dropdown(description='Make:', layout=Layout(width='50%'), options=('ford', 'jeep', 'toyota', 'chevrolet', 'cad…

Dropdown(description='Model:', layout=Layout(width='50%'), options=(), value=None)

TraitError: Invalid selection: value not found

TraitError: Invalid selection: value not found