In [2]:
import pandas as pd
results_df = pd.read_csv('lstm_evaluation_results_sc.csv')
results_df.head()

Unnamed: 0,Ticker,Accuracy,Precision,Recall,F1-Score,RMSE
0,FANG,0.52459,0.578125,0.544118,0.560606,22.037217
1,BKR,0.508197,0.512397,0.504065,0.508197,2.252786
2,CDNS,0.540984,0.546875,0.564516,0.555556,85.658883
3,CSCO,0.540984,0.585366,0.541353,0.5625,1.180957
4,MSTR,0.504098,0.508333,0.495935,0.502058,124.394575


In [3]:
# Round the results_df.describe in 6 digits
results_df.describe().round(6)

Unnamed: 0,Accuracy,Precision,Recall,F1-Score,RMSE
count,101.0,101.0,101.0,101.0,101.0
mean,0.500487,0.52301,0.510933,0.516035,48.193376
std,0.0272,0.039853,0.05025,0.040436,121.576903
min,0.430328,0.413793,0.38843,0.417391,0.847245
25%,0.487705,0.5,0.474138,0.490119,5.20126
50%,0.5,0.523077,0.516129,0.519685,14.279883
75%,0.516393,0.548148,0.544118,0.550523,48.736907
max,0.569672,0.619048,0.629032,0.601399,1127.861715


In [4]:
import plotly.express as px

# Suppose your dataframe is called results_df
# Reshape to long format
df_long = pd.melt(results_df, id_vars='Ticker',
                  value_vars=['Accuracy', 'Precision', 'Recall', 'F1-Score'],
                  var_name='Metric', value_name='Value')

# Create violin plot with boxplot and scatter points
fig = px.violin(df_long,
                x='Metric',
                y='Value',
                box=True,          # Add boxplot inside violin
                points='all',      # Show all data points
                color='Metric',    # Different color per metric
                hover_data=['Ticker'])

fig.update_layout(title='Distribution of Classification Metrics across Tickers',
                  yaxis_title='Value',
                  xaxis_title='Metric',
                  legend_title='Metric',
                  width=900,
                  height=500)

fig.show()

In [5]:
df_long = pd.melt(results_df, id_vars='Ticker',
                  value_vars=['RMSE'],
                  var_name='Metric', value_name='Value')

# Create violin plot with boxplot and scatter points
fig = px.violin(df_long,
                x='Metric',
                y='Value',
                box=True,          # Add boxplot inside violin
                points='all',      # Show all data points
                color='Metric',    # Different color per metric
                hover_data=['Ticker'])

fig.update_layout(title='Distribution of Regression Metric across Tickers',
                  yaxis_title='Value',
                  xaxis_title='Metric',
                  legend_title='Metric',
                  width=900,
                  height=500)

fig.show()

In [7]:
import pandas as pd
import plotly.express as px

# Calcular IQR y filtrar outliers por cada métrica
Q1 = df_long.groupby('Metric')['Value'].transform(lambda x: x.quantile(0.25))
Q3 = df_long.groupby('Metric')['Value'].transform(lambda x: x.quantile(0.75))
IQR = Q3 - Q1

# Mantener solo valores dentro de [Q1 - 1.5*IQR, Q3 + 1.5*IQR]
df_long_no_outliers = df_long[
    (df_long['Value'] >= (Q1 - 1.5 * IQR)) &
    (df_long['Value'] <= (Q3 + 1.5 * IQR))
]

# Graficar
fig = px.violin(df_long_no_outliers,
                x='Metric',
                y='Value',
                box=True,
                points='all',
                color='Metric',
                hover_data=['Ticker'])

fig.update_layout(title='Distribution of Regression Metric across Tickers (No Outliers)',
                  yaxis_title='Value',
                  xaxis_title='Metric',
                  legend_title='Metric',
                  width=900,
                  height=500)

fig.show()

In [6]:
summary_rows = []

metrics = ["Accuracy", "Precision", "Recall", "F1-Score", "RMSE"]

for metric in metrics:
    max_row = results_df.loc[results_df[metric].idxmax(), ["Ticker", metric]]
    min_row = results_df.loc[results_df[metric].idxmin(), ["Ticker", metric]]

    summary_rows.append({
        "Metric": metric,
        "Max_Ticker": max_row["Ticker"],
        "Max_Value": max_row[metric],
        "Min_Ticker": min_row["Ticker"],
        "Min_Value": min_row[metric]
    })

summary_df = pd.DataFrame(summary_rows)

# Formatear a 6 decimales si quieres verlo más limpio
summary_df["Max_Value"] = summary_df["Max_Value"].round(6)
summary_df["Min_Value"] = summary_df["Min_Value"].round(6)

print(summary_df)

      Metric Max_Ticker    Max_Value Min_Ticker  Min_Value
0   Accuracy        KHC     0.569672       CTAS   0.430328
1  Precision       BKNG     0.619048       SBUX   0.413793
2     Recall       AMAT     0.629032       INTC   0.388430
3   F1-Score       AAPL     0.601399       SBUX   0.417391
4       RMSE       BKNG  1127.861715        KHC   0.847245
