In [21]:
import pandas as pd
import plotly.express as px
df = pd.read_csv('modelComparisons.csv')
# df.columns = df.columns.str.strip()

# df.head()

df = df.sort_values('r_squared')
df.head()

# Bar Chart
fig = px.bar(
    df,
    x='model',
    y='r_squared',
    color = 'model',
    text='r_squared',
    title='Model Comparison: R² Score',
    labels={'r_squared': 'R² Score', 'model': 'Model'}
)

fig.update_traces(texttemplate='%{text:.4f}', textposition='outside')
fig.update_layout(yaxis=dict(range=[0, 1]), uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_layout(
    height=500,  # in pixels
    width=500   # in pixels
)

fig.show()


df.reset_index().head()

Unnamed: 0,index,model,r_squared,rmse,mae
0,0,linear_regression,0.4016,26594.62,13241.94
1,4,knn,0.5445,23623.58,13569.55
2,1,neural_network,0.861249,11797.698885,6508.321579
3,3,stacked,0.943235,7546.074026,4698.246602
4,2,random_forest,0.946979,7292.927187,4492.447617


In [15]:
import pandas as pd
import plotly.express as px

# Load the data
df = pd.read_csv('modelComparisons.csv')
df = df.sort_values('rmse',ascending=False)
# Reshape to long format (excluding r_squared)
df_long = df.melt(id_vars='model', value_vars=['rmse', 'mae'], 
                  var_name='metric', value_name='value')

# Create grouped bar chart
fig = px.bar(
    df_long,
    x='model',
    y='value',
    color='metric',
    barmode='group',
    text='value',
    title='Model Error Comparison: RMSE and MAE',
    labels={'value': 'Error Value', 'model': 'Model', 'metric': 'Metric'}
)

fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.update_layout(
    height=800,  # in pixels
    width=800   # in pixels
)
fig.show()


In [16]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Load and clean data
df = pd.read_csv('modelComparisons.csv')
df.columns = df.columns.str.strip()

# Sort for R² chart
df_r2 = df.sort_values('r_squared')

# Create subplot layout: 1 row, 2 columns
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=["R² Score by Model", "Error Comparison: RMSE vs MAE"]
)

# --- Plot 1: R² Bar Chart ---
fig.add_trace(
    go.Bar(
        x=df_r2['model'],
        y=df_r2['r_squared'],
        text=df_r2['r_squared'].round(4),
        textposition='outside',
        name='R² Score',
        marker_color='blue'
    ),
    row=1, col=1
)

# --- Plot 2: RMSE/MAE Grouped Bar Chart ---
# Reshape to long format
df_long = df.melt(id_vars='model', value_vars=['rmse', 'mae'],
                  var_name='metric', value_name='value')

# Create grouped bars manually
metrics = ['rmse', 'mae']
for metric in metrics:
    metric_data = df_long[df_long['metric'] == metric]
    fig.add_trace(
        go.Bar(
            x=metric_data['model'],
            y=metric_data['value'],
            name=metric.upper(),
            text=metric_data['value'].round(2),
            textposition='outside'
        ),
        row=1, col=2
    )

# Layout tweaks
fig.update_layout(
    title_text='Model Performance Overview',
    barmode='group',
    uniformtext_minsize=8,
    uniformtext_mode='hide',
    height=500,
    width=700,
    showlegend=True
)

fig.show()

fig.write_html('test.html')


In [17]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.colors as pc

# Load and clean data
df = pd.read_csv('modelComparisons.csv')
df.columns = df.columns.str.strip()
df_r2 = df.sort_values('r_squared')

# Normalize R² values to [0, 1] for color mapping
r2_vals = df_r2['r_squared']
norm_r2 = (r2_vals - r2_vals.min()) / (r2_vals.max() - r2_vals.min())

# Use a Plotly colorscale (e.g., Viridis)
colorscale = pc.sample_colorscale('Viridis', norm_r2)

# Create subplot layout
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=["R² Score by Model", "Error Comparison: RMSE vs MAE"]
)

# --- R² Bar Chart with Gradient Colors ---
fig.add_trace(
    go.Bar(
        x=df_r2['model'],
        y=df_r2['r_squared'],
        text=df_r2['r_squared'].round(4),
        textposition='outside',
        marker=dict(color=colorscale),
        name='R² Score',
    ),
    row=1, col=1
)

# --- RMSE / MAE Grouped Bar Chart ---
df_long = df.melt(id_vars='model', value_vars=['rmse', 'mae'],
                  var_name='metric', value_name='value')

for metric in ['rmse', 'mae']:
    metric_data = df_long[df_long['metric'] == metric]
    fig.add_trace(
        go.Bar(
            x=metric_data['model'],
            y=metric_data['value'],
            name=metric.upper(),
            text=metric_data['value'].round(2),
            textposition='outside'
        ),
        row=1, col=2
    )

# Final layout
fig.update_layout(
    title_text='Model Performance Overview',
    barmode='group',
    height=500,
    width=1000,
    showlegend=True,
    uniformtext_minsize=8,
    uniformtext_mode='hide'
)

fig.show()


In [18]:
import pandas as pd
importance = pd.read_csv('feature_importance.csv')
importance.head(5)

Unnamed: 0,Feature,Importance
0,model_encoded,0.767073
1,engine_encoded,0.195193
2,milage_binned,0.015735
3,model_year,0.00726
4,transmission_encoded,0.002982
