In [1]:
import pandas as pd
results_df = pd.read_csv('gru_evaluation_results.csv')
results_df.head()

Unnamed: 0,Ticker,Accuracy,Precision,Recall,F1-Score,RMSE
0,FANG,0.422131,0.426357,0.45082,0.438247,0.018188
1,BKR,0.446721,0.429752,0.440678,0.435146,0.01806
2,CDNS,0.540984,0.530769,0.575,0.552,0.022079
3,CSCO,0.491803,0.470085,0.470085,0.470085,0.011839
4,MSTR,0.5,0.469027,0.46087,0.464912,0.068998


In [2]:
# Round the results_df.describe in 6 digits
results_df.describe().round(6)

Unnamed: 0,Accuracy,Precision,Recall,F1-Score,RMSE
count,101.0,101.0,101.0,101.0,101.0
mean,0.496673,0.491819,0.495038,0.49304,0.023009
std,0.032724,0.042587,0.043925,0.040876,0.011694
min,0.422131,0.4,0.4,0.410714,0.00966
25%,0.47541,0.467213,0.46087,0.466667,0.014467
50%,0.495902,0.487603,0.491935,0.491525,0.020373
75%,0.520492,0.52,0.533333,0.528,0.028592
max,0.577869,0.619835,0.583333,0.592885,0.076168


In [3]:
import plotly.express as px

# Suppose your dataframe is called results_df
# Reshape to long format
df_long = pd.melt(results_df, id_vars='Ticker',
                  value_vars=['Accuracy', 'Precision', 'Recall', 'F1-Score'],
                  var_name='Metric', value_name='Value')

# Create violin plot with boxplot and scatter points
fig = px.violin(df_long,
                x='Metric',
                y='Value',
                box=True,          # Add boxplot inside violin
                points='all',      # Show all data points
                color='Metric',    # Different color per metric
                hover_data=['Ticker'])

fig.update_layout(title='Distribution of Classification Metrics across Tickers',
                  yaxis_title='Value',
                  xaxis_title='Metric',
                  legend_title='Metric',
                  width=900,
                  height=500)

fig.show()

In [4]:
df_long = pd.melt(results_df, id_vars='Ticker',
                  value_vars=['RMSE'],
                  var_name='Metric', value_name='Value')

# Create violin plot with boxplot and scatter points
fig = px.violin(df_long,
                x='Metric',
                y='Value',
                box=True,          # Add boxplot inside violin
                points='all',      # Show all data points
                color='Metric',    # Different color per metric
                hover_data=['Ticker'])

fig.update_layout(title='Distribution of Regression Metric across Tickers',
                  yaxis_title='Value',
                  xaxis_title='Metric',
                  legend_title='Metric',
                  width=900,
                  height=500)

fig.show()

In [5]:
import pandas as pd
import plotly.express as px

# Calcular IQR y filtrar outliers por cada métrica
Q1 = df_long.groupby('Metric')['Value'].transform(lambda x: x.quantile(0.25))
Q3 = df_long.groupby('Metric')['Value'].transform(lambda x: x.quantile(0.75))
IQR = Q3 - Q1

# Mantener solo valores dentro de [Q1 - 1.5*IQR, Q3 + 1.5*IQR]
df_long_no_outliers = df_long[
    (df_long['Value'] >= (Q1 - 1.5 * IQR)) &
    (df_long['Value'] <= (Q3 + 1.5 * IQR))
]

# Graficar
fig = px.violin(df_long_no_outliers,
                x='Metric',
                y='Value',
                box=True,
                points='all',
                color='Metric',
                hover_data=['Ticker'])

fig.update_layout(title='Distribution of Regression Metric across Tickers (No Outliers)',
                  yaxis_title='Value',
                  xaxis_title='Metric',
                  legend_title='Metric',
                  width=900,
                  height=500)

fig.show()

In [6]:
summary_rows = []

metrics = ["Accuracy", "Precision", "Recall", "F1-Score", "RMSE"]

for metric in metrics:
    max_row = results_df.loc[results_df[metric].idxmax(), ["Ticker", metric]]
    min_row = results_df.loc[results_df[metric].idxmin(), ["Ticker", metric]]

    summary_rows.append({
        "Metric": metric,
        "Max_Ticker": max_row["Ticker"],
        "Max_Value": max_row[metric],
        "Min_Ticker": min_row["Ticker"],
        "Min_Value": min_row[metric]
    })

summary_df = pd.DataFrame(summary_rows)

# Formatear a 6 decimales si quieres verlo más limpio
summary_df["Max_Value"] = summary_df["Max_Value"].round(6)
summary_df["Min_Value"] = summary_df["Min_Value"].round(6)

print(summary_df)

      Metric Max_Ticker  Max_Value Min_Ticker  Min_Value
0   Accuracy        ARM   0.577869       FANG   0.422131
1  Precision        ARM   0.619835        ROP   0.400000
2     Recall       CPRT   0.583333       DXCM   0.400000
3   F1-Score        ARM   0.592885        HON   0.410714
4       RMSE       GEHC   0.076168        LIN   0.009660
