In [10]:
import pandas as pd
results_df = pd.read_csv('tft_return_evaluation_results.csv')
results_df.head()

Unnamed: 0,Ticker,Accuracy,Precision,Recall,F1-Score,RMSE
0,FANG,0.465587,0.463415,0.463415,0.463415,0.141178
1,BKR,0.522088,0.508772,0.479339,0.493617,0.020343
2,CDNS,0.626506,0.622951,0.617886,0.620408,0.029268
3,CSCO,0.53012,0.515625,0.545455,0.53012,0.013221
4,MSTR,0.433735,0.396396,0.372881,0.384279,0.073538


In [11]:
# Round the results_df.describe in 6 digits
results_df.describe().round(6)

Unnamed: 0,Accuracy,Precision,Recall,F1-Score,RMSE
count,100.0,100.0,100.0,100.0,100.0
mean,0.537588,0.536782,0.534166,0.53481,0.03042
std,0.075881,0.084781,0.075983,0.077892,0.018138
min,0.349398,0.343137,0.289256,0.313901,0.010317
25%,0.477912,0.475308,0.483468,0.481633,0.01891
50%,0.528112,0.518017,0.536888,0.52906,0.025292
75%,0.587349,0.594682,0.58997,0.589147,0.037534
max,0.702811,0.754545,0.68254,0.692607,0.141178


In [12]:
import plotly.express as px

# Suppose your dataframe is called results_df
# Reshape to long format
df_long = pd.melt(results_df, id_vars='Ticker',
                  value_vars=['Accuracy', 'Precision', 'Recall', 'F1-Score'],
                  var_name='Metric', value_name='Value')

# Create violin plot with boxplot and scatter points
fig = px.violin(df_long,
                x='Metric',
                y='Value',
                box=True,          # Add boxplot inside violin
                points='all',      # Show all data points
                color='Metric',    # Different color per metric
                hover_data=['Ticker'])

fig.update_layout(title='Distribution of Classification Metrics across Tickers',
                  yaxis_title='Value',
                  xaxis_title='Metric',
                  legend_title='Metric',
                  width=900,
                  height=500)

fig.show()

In [13]:
df_long = pd.melt(results_df, id_vars='Ticker',
                  value_vars=['RMSE'],
                  var_name='Metric', value_name='Value')

# Create violin plot with boxplot and scatter points
fig = px.violin(df_long,
                x='Metric',
                y='Value',
                box=True,          # Add boxplot inside violin
                points='all',      # Show all data points
                color='Metric',    # Different color per metric
                hover_data=['Ticker'])

fig.update_layout(title='Distribution of Regression Metric across Tickers',
                  yaxis_title='Value',
                  xaxis_title='Metric',
                  legend_title='Metric',
                  width=900,
                  height=500)

fig.show()

In [14]:
import pandas as pd
import plotly.express as px

# Calcular IQR y filtrar outliers por cada métrica
Q1 = df_long.groupby('Metric')['Value'].transform(lambda x: x.quantile(0.25))
Q3 = df_long.groupby('Metric')['Value'].transform(lambda x: x.quantile(0.75))
IQR = Q3 - Q1

# Mantener solo valores dentro de [Q1 - 1.5*IQR, Q3 + 1.5*IQR]
df_long_no_outliers = df_long[
    (df_long['Value'] >= (Q1 - 1.5 * IQR)) &
    (df_long['Value'] <= (Q3 + 1.5 * IQR))
]

# Graficar
fig = px.violin(df_long_no_outliers,
                x='Metric',
                y='Value',
                box=True,
                points='all',
                color='Metric',
                hover_data=['Ticker'])

fig.update_layout(title='Distribution of Regression Metric across Tickers (No Outliers)',
                  yaxis_title='Value',
                  xaxis_title='Metric',
                  legend_title='Metric',
                  width=900,
                  height=500)

fig.show()

In [15]:
summary_rows = []

metrics = ["Accuracy", "Precision", "Recall", "F1-Score", "RMSE"]

for metric in metrics:
    max_row = results_df.loc[results_df[metric].idxmax(), ["Ticker", metric]]
    min_row = results_df.loc[results_df[metric].idxmin(), ["Ticker", metric]]

    summary_rows.append({
        "Metric": metric,
        "Max_Ticker": max_row["Ticker"],
        "Max_Value": max_row[metric],
        "Min_Ticker": min_row["Ticker"],
        "Min_Value": min_row[metric]
    })

summary_df = pd.DataFrame(summary_rows)

# Formatear a 6 decimales si quieres verlo más limpio
summary_df["Max_Value"] = summary_df["Max_Value"].round(6)
summary_df["Min_Value"] = summary_df["Min_Value"].round(6)

print(summary_df)

      Metric Max_Ticker  Max_Value Min_Ticker  Min_Value
0   Accuracy       KLAC   0.702811       VRTX   0.349398
1  Precision       KLAC   0.754545       ROST   0.343137
2     Recall       ISRG   0.682540       ROST   0.289256
3   F1-Score       NXPI   0.692607       ROST   0.313901
4       RMSE       FANG   0.141178        ADP   0.010317
