In [2]:
!pip install pandas
!pip install plotly
!pip install dash




In [3]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from dash import Dash, dcc, html, Input, Output, State

def select_best_model(df, sizes, alpha, beta):
    results = []
    max_size = df['Size (Mb)'].max()
    min_size = df['Size (Mb)'].min()

    for beta_value in beta:
        if beta_value < 0 or beta_value > 1:
            raise ValueError("Beta should be between 0 and 1.")
        
        for size in sizes:
            filtered_df = df[df['Size (Mb)'] <= size]
            if filtered_df.empty:
                continue
            
            norm_size = (filtered_df['Size (Mb)'] - min_size) / (max_size - min_size)
            filtered_df['Utility'] = ((alpha * filtered_df['Recall'] + (1 - alpha) * filtered_df['Precision']) + beta_value * filtered_df['AUC'] - (1 - beta_value) * norm_size) / 2
            best_model = filtered_df.loc[filtered_df['Utility'].idxmax()]
            
            results.append({
                'Size (Mb)': str(size),
                'Alpha': alpha,
                'Beta': beta_value,
                'Best Model': best_model['Model name'],
                'Utility': best_model['Utility'],
            })
    
    result_df = pd.DataFrame(results)
    unique_values = result_df[['Alpha', 'Beta']].drop_duplicates().values.tolist()
    
    fig = make_subplots(rows=len(unique_values), cols=1, shared_xaxes=True, 
                        subplot_titles=[f'Alpha = {x[0]}, Beta = {x[1]}' for x in unique_values])
    
    for i, x in enumerate(unique_values):
        beta_df = result_df[(result_df['Alpha'] == x[0]) & (result_df['Beta'] == x[1])]
        fig.add_trace(
            go.Bar(x=beta_df['Size (Mb)'], 
                   y=beta_df['Utility'], 
                   text=beta_df['Best Model'],
                   textposition='auto',
                   name=f'Alpha = {x[0]}, Beta = {x[1]}'),
            row=i + 1, col=1
        )
    
    fig.update_layout(
        title='Model Utility by Size, Alpha, and Beta',
        xaxis_title='Model Size (Mb)',
        yaxis_title='Model Utility',
        height=600,
        showlegend=True
    )
    
    return fig, result_df

app = Dash(__name__)
df = pd.read_csv('results.csv')
df = df.dropna()
distinct_sizes = sorted(df['Size (Mb)'].unique())
alphas = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
betas = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
image_path = 'assets/utility_function.png'
image_path2 = 'assets/minmax.png'
app.layout = html.Div([
    html.H1('Model Selection Dashboard', style={'textAlign': 'center', 'color': '#2c3e50'}),

    
    # Explanation paragraph with LaTeX formulas
    html.Div([
            html.P("This dashboard allows you to explore the utility of various models based on their size, "
           "recall, precision, and AUC. The utility is calculated using the following formula:"),
    
    html.Img(src='/assets/utility_function.png', style={'display': 'block', 'margin': '10px auto'}),
    
    html.P("Alpha (α) Tradeoff:"),
    html.P("When α is set to 0, the formula heavily favors precision over recall. In this case, the model’s ability to "
           "correctly identify true positives (recall) becomes less significant, and the focus is on reducing false positives. "
           "This might be important in scenarios where false positives are costly, such as in spam detection, where a legitimate "
           "email being classified as spam could have serious consequences."),
    html.P("Conversely, when α is set to 1, recall takes precedence over precision. Here, the model is optimized to catch as many "
           "true positives as possible, even at the expense of increasing false positives. This is crucial in situations where missing "
           "a true positive is more costly, like in medical diagnoses, where failing to identify a disease can have serious implications."),
    html.P("The choice of α reflects the importance of recall versus precision in your specific use case, and the utility will adjust accordingly, "
           "always yielding a value between 0 and 1, representing the trade-off between these metrics."),

    html.P("Beta (β) Tradeoff:"),
    html.P("When β is set to 0, the model size becomes a critical factor in the utility calculation. This might be necessary in environments with "
           "limited computational resources or when deploying models on edge devices where smaller model sizes are essential. In this scenario, AUC "
           "(a measure of the model’s overall performance) is less emphasized, and the focus is on achieving a balance between performance and model size."),
    html.P("On the other hand, when β is set to 1, AUC is prioritized over model size. This might be preferred when computational resources are abundant, "
           "and the primary goal is to maximize model performance, even if it means using a larger, more complex model."),
    html.P("By adjusting β, you can control the trade-off between model size and performance, allowing you to optimize for the best possible utility in a given context. "
           "The utility formula, designed to yield a value between 0 and 1, encapsulates these trade-offs, providing a holistic view of model suitability for a specific task. "
           "Whether your priority is minimizing false positives, maximizing true positives, optimizing for smaller models, or enhancing overall performance, this dashboard helps "
           "you make informed decisions by visualizing these trade-offs."),
        
        # Utility formula in LaTeX
        
       # html.Img(src=image_path, style={'display': 'block', 'margin': 'auto', 'marginTop': '20px'}),
        
        html.P("The graph displays the utilities of models for different sizes, where only models with sizes "
               "smaller or equal to the selected size are considered."),
        html.P(
    "Min-max normalization is a technique used to scale data within a specific range, usually between 0 and 1. For model size normalization, this involves adjusting the size of a model in megabytes (MB) so that it falls within this range. The formula used for min-max normalization is:\n\n" 
        ),
        html.Img(src=image_path2, style={'display': 'block', 'margin': 'auto', 'marginTop': '20px'}),
        html.P(
    "Where 'Size_model_MB' is the model size to be normalized, 'Min_model_size' is the smallest model size in the dataset, and 'Max_model_size' is the largest model size in the dataset. This ensures that the size of the model is represented as a value between 0 and 1, where 0 corresponds to the smallest size and 1 corresponds to the largest size. This normalization helps in comparing models on a consistent scale, making it easier to evaluate and visualize their relative sizes."
),
        # Min-Max normalization formula in LaTeX
       
    ], style={'backgroundColor': '#ecf0f1', 'padding': '20px', 'borderRadius': '10px', 'marginBottom': '20px'}),

    # Rest of the layout...

    html.Div([
        dcc.Graph(id='model-graph', style={'width': '80%', 'display': 'inline-block'}),
        
        html.Div([
            html.Label('Alpha', style={'color': '#ecf0f1'}),
            dcc.Slider(
                id='alpha-slider',
                min=0,
                max=1,
                step=0.1,
                marks={i: str(i) for i in alphas},
                value=0.5,
                vertical=False
            ),
            
            html.Label('Beta', style={'marginTop': '20px', 'color': '#ecf0f1'}),
            dcc.Slider(
                id='beta-slider',
                min=0,
                max=1,
                step=0.1,
                marks={i: str(i) for i in betas},
                value=0.5,
                vertical=False
            )
        ], style={'width': '15%', 'display': 'inline-block', 'paddingLeft': '20px'})
    ], style={'backgroundColor': '#34495e', 'padding': '20px'}),
    
    html.H2('Manual Input for Model Selection', style={'textAlign': 'center', 'color': '#2c3e50', 'marginTop': '40px'}),
    
    html.Div([
        html.Div([
            html.Label('Alpha:', style={'color': '#2c3e50'}),
            dcc.Input(id='manual-alpha', type='number', min=0, max=1, step=0.01, value=0.5, style={'width': '70px'}),
        ], style={'width': '10%', 'display': 'inline-block', 'paddingRight': '10px'}),
        
        html.Div([
            html.Label('Beta:', style={'color': '#2c3e50'}),
            dcc.Input(id='manual-beta', type='number', min=0, max=1, step=0.01, value=0.5, style={'width': '70px'}),
        ], style={'width': '10%', 'display': 'inline-block', 'paddingRight': '10px'}),
        
        html.Div([
        html.Label('Model Size (Mb):', style={'color': '#2c3e50', 'width': '30%', 'display': 'inline-block'}),
        dcc.Dropdown(id='manual-size', options=[{'label': str(size), 'value': size} for size in distinct_sizes], value=distinct_sizes[0], style={'width': '65%', 'display': 'inline-block'}),
    ], style={'width': '30%', 'display': 'inline-block'}),
        html.Div([
            html.Button('Find Best Model', id='find-best-model', n_clicks=0, style={'marginTop': '20px', 'width': '100%', 'backgroundColor': '#2980b9', 'color': '#ecf0f1', 'border': 'none', 'padding': '10px', 'fontSize': '16px'})
        ], style={'width': '25%', 'display': 'inline-block', 'paddingTop': '30px'})
    ], style={'textAlign': 'center', 'padding': '20px', 'backgroundColor': '#ecf0f1', 'borderRadius': '10px'}),
    
    html.Div(id='best-model-output', style={'textAlign': 'center', 'color': '#27ae60', 'marginTop': '20px', 'fontSize': '18px'})
])

@app.callback(
    Output('model-graph', 'figure'),
    [Input('alpha-slider', 'value'),
     Input('beta-slider', 'value')]
)
def update_graph(selected_alpha, selected_beta):
    fig, _ = select_best_model(df, distinct_sizes, selected_alpha, [selected_beta])
    return fig

@app.callback(
    Output('best-model-output', 'children'),
    Input('find-best-model', 'n_clicks'),
    State('manual-alpha', 'value'),
    State('manual-beta', 'value'),
    State('manual-size', 'value')
)
def find_best_model(n_clicks, manual_alpha, manual_beta, manual_size):
    if n_clicks > 0:
        _, result_df = select_best_model(df, [manual_size], manual_alpha, [manual_beta])
        if not result_df.empty:
            best_model_row = result_df.iloc[0]
            best_model = best_model_row['Best Model']
            utility = best_model_row['Utility']
            return html.Div([
                html.H3(f'Best Model: {best_model}', style={'color': '#2c3e50'}),
                html.P(f'Utility: {utility:.4f}', style={'color': '#2c3e50'})
            ])
        else:
            return 'No suitable model found.'
    return ''

if __name__ == '__main__':
    app.run_server(debug=True, port=8080)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/