In [3]:
import pandas as pd
import numpy as np
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

import torch
from chronos import ChronosPipeline
from chronos import BaseChronosPipeline
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Directional accuracy functions for chronos 
def directional_up_accuracy_single(y_true, y_pred):
    return int(y_true > 0 and y_pred > 0)

def directional_down_accuracy_single(y_true, y_pred):
    return int(y_true < 0 and y_pred < 0)


In [None]:
# top_50_df = pd.read_csv("top50_estimation.csv") 
# top_50_out_df = pd.read_csv("top50_outofsample.csv") 

# top_50_df['date'] = pd.to_datetime(top_50_df['date'])
# top_50_out_df['date'] = pd.to_datetime(top_50_out_df['date'])

TARGET = 'EXCESS_RET'

Average market Cap MAX 

In [6]:

avg_market_caps = (
    main_df.groupby('PERMNO')['MKT_CAP']
    .mean()
    .sort_values(ascending=False)
)

display((avg_market_caps))

top_n = max(1, int(len(avg_market_caps) * 0.25))

# Get top 25% PERMNOs
top_25_pct_permnos = avg_market_caps.sort_values(ascending=False).head(top_n).index.tolist()
# Get top 25% PERMNOs
min_25_pct_permnos = avg_market_caps.sort_values(ascending=True).head(top_n).index.tolist()

print("Top 25% PERMNOs by Market Cap:", top_25_pct_permnos)
print("Top 25% PERMNOs by Market Cap:", min_25_pct_permnos)

PERMNO
66093    1.686185e+11
29647    3.100828e+10
76226    2.923047e+10
78405    1.376805e+10
80924    1.289335e+10
86996    1.167744e+10
78840    8.132226e+09
80089    5.775804e+09
83604    2.752245e+09
63773    2.715150e+09
75104    2.603908e+09
86869    2.491985e+09
75320    2.304083e+09
87075    2.117229e+09
81043    1.470334e+09
81740    1.054692e+09
83989    6.317436e+08
81133    5.857122e+08
78971    3.309630e+08
11174    3.276144e+08
79033    1.865222e+08
86916    1.124923e+08
39731    6.842959e+07
Name: MKT_CAP, dtype: float64

Top 25% PERMNOs by Market Cap: [66093, 29647, 76226, 78405, 80924]
Top 25% PERMNOs by Market Cap: [39731, 86916, 79033, 11174, 78971]


In [None]:
import torch
import pandas as pd
import numpy as np
from chronos import ChronosPipeline
from sklearn.metrics import r2_score, mean_squared_error

# Reload data


main_df = pd.read_csv("main_data.csv") 
est_start = "2000-01-01"
est_end = "2015-12-31"
oos_start = "2016-01-01"
oos_end = "2024-12-31"

main_df["date"] = pd.to_datetime(main_df["date"])

# Settings
TARGET = "EXCESS_RET"
context_length = 30
prediction_length = 12

# Load Chronos model once
pipeline = ChronosPipeline.from_pretrained(
    "amazon/chronos-t5-small",
    device_map="cpu",
    torch_dtype=torch.float32
)

results = []

# Loop with minimal memory footprint
for permno in top_50_out_df["PERMNO"].unique():
    try:
        train_df = top_50_df[top_50_df["PERMNO"] == permno].sort_values("date")
        test_df = top_50_out_df[top_50_out_df["PERMNO"] == permno].sort_values("date")

        train_series = train_df[TARGET].dropna().values.astype("float32")
        test_series = test_df[TARGET].dropna().values.astype("float32")

        if len(train_series) < context_length or len(test_series) < prediction_length:
            continue

        context = torch.tensor(train_series[-context_length:])
        train_mean = train_series.mean()
        true_future = test_series[:prediction_length]

        quantiles, mean = pipeline.predict_quantiles(
            context=context,
            prediction_length=prediction_length,
            quantile_levels=[0.1, 0.5, 0.9]
        )

        q50 = quantiles[:, :, 1].squeeze().numpy()

        r2 = r2_score(true_future, q50)
        r2_out = 1 - np.sum((true_future - q50) ** 2) / np.sum((true_future - train_mean) ** 2)
        mse = mean_squared_error(true_future, q50)
        directional = np.mean(np.sign(np.diff(true_future)) == np.sign(np.diff(q50)))

        results.append({
            "PERMNO": permno,
            "R2": r2,
            "R2_out_of_sample": r2_out,
            "MSE": mse,
            "Directional Accuracy": directional,
            "Directional Up'" :directional_up_accuracy_single(true_future,q50),
            "Directional Down" : directional_down_accuracy_single(true_future,q50),
        })

    except Exception as e:
        results.append({
            "PERMNO": permno,
            "Error": str(e)
        })



In [14]:
final_result=pd.DataFrame(results)
display(final_result)

Unnamed: 0,PERMNO,R2,R2_out_of_sample,MSE,Directional Accuracy
0,11174,-0.42461,-0.020701,0.001054,0.363636
1,12173,-0.178229,-0.029025,0.00057,0.636364
2,29647,-0.001568,0.015665,0.000173,0.636364
3,39731,-0.03852,0.134079,0.000911,0.454545
4,47677,-0.111965,0.094797,0.000858,0.454545
5,63773,-0.36817,0.065013,0.000611,0.909091
6,66093,-0.034038,-0.017757,0.000148,0.545455
7,66835,-0.002632,0.012277,0.00028,0.545455
8,68857,0.039826,0.049853,0.000124,0.818182
9,75049,-0.213795,0.085794,0.000223,0.181818


In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Force CPU usage

import torch
import pandas as pd
import numpy as np
from chronos import ChronosPipeline
from sklearn.metrics import r2_score, mean_squared_error

# Load data
top_50_df = pd.read_csv("top50_estimation.csv")
top_50_out_df = pd.read_csv("top50_outofsample.csv")

top_50_df["date"] = pd.to_datetime(top_50_df["date"])
top_50_out_df["date"] = pd.to_datetime(top_50_out_df["date"])

# Chronos config
TARGET = "EXCESS_RET"
context_length = 30
prediction_length = 12
ROLLING_WINDOWS = [5, 21, 252, 512]

# Load model once
pipeline = ChronosPipeline.from_pretrained(
    "amazon/chronos-t5-small",
    device_map="cpu",
    torch_dtype=torch.float32
)

results = []

# Loop over rolling windows
for window in ROLLING_WINDOWS:
    rolling_col = f"rolling_mean_{window}"

    for permno in top_50_out_df["PERMNO"].unique():
        try:
            train_df = top_50_df[top_50_df["PERMNO"] == permno].sort_values("date")
            test_df = top_50_out_df[top_50_out_df["PERMNO"] == permno].sort_values("date")

            # Drop rows with NaNs in target or rolling column
            train_df = train_df.dropna(subset=[TARGET, rolling_col])
            test_df = test_df.dropna(subset=[TARGET, rolling_col])

            train_series = train_df[TARGET].values.astype("float32")
            test_series = test_df[TARGET].values.astype("float32")

            if len(train_series) < context_length or len(test_series) < prediction_length:
                continue

            context = torch.tensor(train_series[-context_length:])
            train_mean = train_series.mean()
            true_future = test_series[:prediction_length]

            quantiles, mean = pipeline.predict_quantiles(
                context=context,
                prediction_length=prediction_length,
                quantile_levels=[0.1, 0.5, 0.9]
            )

            q50 = quantiles[:, :, 1].reshape(-1).numpy()
            true_future = np.atleast_1d(test_series[:prediction_length])
            
            r2 = r2_score(true_future, q50)
            r2_out = 1 - np.sum((true_future - q50) ** 2) / np.sum((true_future - train_mean) ** 2)
            mse = mean_squared_error(true_future, q50)
            directional = np.mean(np.sign(np.diff(true_future)) == np.sign(np.diff(q50)))

            results.append({
                "Model": "Chronos",
                "Rolling Window": window,
                "PERMNO": permno,
                "R2": r2,
                "R2_out_of_sample": r2_out,
                "MSE": mse,
                "Directional Up" :directional_up_accuracy_single(true_future,q50),
                "Directional Down" : directional_down_accuracy_single(true_future,q50),
                "Directional Accuracy": directional
            })

        except Exception as e:
            results.append({
                "Model": "Chronos",
                "Rolling Window": window,
                "PERMNO": permno,
                "Error": str(e)
            })

# Final DataFrame
chronos_small_result_df = pd.DataFrame(results)


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=

In [11]:
display(chronos_small_result_df)
# chronos_small_result_df.to_csv("Results/chronos_t5_small_results.csv", index=False)

Unnamed: 0,Model,Rolling Window,PERMNO,R2,R2_out_of_sample,MSE,directional_up,directional_down,Directional Accuracy
0,Chronos,5,11174,,-0.000804,4.507616e-03,,,
1,Chronos,5,12173,,-0.012182,2.918922e-04,,,
2,Chronos,5,29647,,0.170381,3.537717e-04,,,
3,Chronos,5,39731,,0.525932,1.222217e-03,,,
4,Chronos,5,47677,,0.107067,7.675753e-04,,,
...,...,...,...,...,...,...,...,...,...
159,Chronos,512,86996,,0.499916,8.295847e-08,,,
160,Chronos,512,87075,,0.184874,6.947169e-04,,,
161,Chronos,512,87121,,0.657513,1.678969e-04,,,
162,Chronos,512,87339,,-0.426518,9.036565e-04,,,


In [9]:

summary_all = chronos_small_result_df[[
    "R2", "R2_out_of_sample", "MSE", "directional_up", "directional_down", "Directional Accuracy"
]].mean()

top_10pct = chronos_small_result_df[chronos_small_result_df["PERMNO"].isin(top_25_pct_permnos)]
bottom_10pct = chronos_small_result_df[chronos_small_result_df["PERMNO"].isin(min_25_pct_permnos)]

summary_top = top_10pct.drop(columns='PERMNO').mean()
summary_bottom = bottom_10pct.drop(columns='PERMNO').mean()

# Combine into one table
summary = pd.DataFrame({
    "All Stocks": summary_all,
    "Top 25%": summary_top,
    "Bottom 25%": summary_bottom
})

# Transpose the table for readability
summary_transposed = summary.T.round(3)
display(summary_transposed)

TypeError: Could not convert ['ChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronosChronos'] to numeric

In [4]:
display(chronos_result_df)
chronos_result_df.to_csv("Results/chronos_t5_large_results.csv", index=False)


Unnamed: 0,Model,Rolling Window,PERMNO,R2,R2_out_of_sample,MSE,Directional Accuracy
0,Chronos,5,11174,0.100083,0.100680,0.000773,0.363636
1,Chronos,5,12173,-0.152752,-0.062207,0.000376,0.181818
2,Chronos,5,29647,-0.009858,0.007852,0.000325,0.454545
3,Chronos,5,39731,-0.046022,-0.012927,0.001303,0.545455
4,Chronos,5,47677,-0.033624,0.001304,0.000689,0.454545
...,...,...,...,...,...,...,...
158,Chronos,512,86996,-0.145142,-0.008289,0.000164,0.454545
159,Chronos,512,87075,-0.036718,-0.010003,0.000224,0.363636
160,Chronos,512,87121,-0.095043,-0.032916,0.000195,0.272727
161,Chronos,512,87339,-0.061051,-0.011221,0.000876,0.636364


In [17]:
chronos_avg_by_window = chronos_result_df.groupby("Rolling Window")[
        ["R2", "R2_out_of_sample", "MSE", "Directional Accuracy"]
    ].mean().reset_index()

display(chronos_avg_by_window)

NameError: name 'chronos_result_df' is not defined

NEWWWWWW

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Force CPU usage

import torch
import pandas as pd
import numpy as np
from chronos import ChronosPipeline
from sklearn.metrics import r2_score, mean_squared_error

# Load data
top_50_df = pd.read_csv("top50_estimation.csv")
top_50_out_df = pd.read_csv("top50_outofsample.csv")

top_50_df["date"] = pd.to_datetime(top_50_df["date"])
top_50_out_df["date"] = pd.to_datetime(top_50_out_df["date"])

# Chronos config
TARGET = "EXCESS_RET"
CONTEXT_LENGTHS = [5, 21, 252, 512]
PREDICTION_LENGTH = 1  # change if needed

# Load Chronos model
pipeline = ChronosPipeline.from_pretrained(
    "amazon/chronos-t5-large",
    device_map="cpu",
    torch_dtype=torch.float32
)

results = []

# Loop over context lengths
for context_length in CONTEXT_LENGTHS:
    print(f"\nEvaluating Context Length: {context_length}")
    
    for permno in top_50_out_df["PERMNO"].unique():
        try:
            # Filter and sort
            train_df = top_50_df[top_50_df["PERMNO"] == permno].sort_values("date")
            test_df = top_50_out_df[top_50_out_df["PERMNO"] == permno].sort_values("date")

            train_df = train_df.dropna(subset=[TARGET])
            test_df = test_df.dropna(subset=[TARGET])

            train_series = train_df[TARGET].values.astype("float32")
            test_series = test_df[TARGET].values.astype("float32")

            if len(train_series) < context_length or len(test_series) < PREDICTION_LENGTH:
                continue

            context = torch.tensor(train_series[-context_length:], dtype=torch.float32).reshape(1, -1)
            true_future = test_series[:PREDICTION_LENGTH]
            train_mean = train_series.mean()

            quantiles, mean = pipeline.predict_quantiles(
                context=context,
                prediction_length=PREDICTION_LENGTH,
                quantile_levels=[0.1, 0.5, 0.9]
            )

            q50 = quantiles[:, :, 1].squeeze().numpy()
            if np.isscalar(q50):
                q50 = np.array([q50])

            r2 = r2_score(true_future, q50)
            r2_out = 1 - np.sum((true_future - q50) ** 2) / np.sum((true_future - train_mean) ** 2)
            mse = mean_squared_error(true_future, q50)
            directional = (
                np.mean(np.sign(np.diff(true_future)) == np.sign(np.diff(q50)))
                if len(true_future) > 1 else np.nan
            )

            results.append({
                "Model": "Chronos",
                "Context Length": context_length,
                "PERMNO": permno,
                "R2": r2,
                "R2_out_of_sample": r2_out,
                "MSE": mse,
                "Directional Accuracy": directional
            })

        except Exception as e:
            results.append({
                "Model": "Chronos",
                "Context Length": context_length,
                "PERMNO": permno,
                "Error": str(e)
            })

# Save results
chronos_result_df = pd.DataFrame(results)
chronos_result_df = chronos_result_df.dropna(subset=["R2", "R2_out_of_sample", "MSE", "Directional Accuracy"])
chronos_result_df.to_csv("Results/chronos_t5large_context_eval.csv", index=False)

print("\n✅ Results saved to Results/chronos_t5large_context_eval.csv")


  from .autonotebook import tqdm as notebook_tqdm



Evaluating Context Length: 5

Evaluating Context Length: 21

Evaluating Context Length: 252

Evaluating Context Length: 512


KeyError: ['R2', 'R2_out_of_sample', 'MSE', 'Directional Accuracy']

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Force CPU usage

import torch
import pandas as pd
import numpy as np
from chronos import ChronosPipeline
from sklearn.metrics import r2_score, mean_squared_error

# --- Config ---
TARGET = "EXCESS_RET"
CONTEXT_LENGTHS = [5, 21, 252, 512]
PREDICTION_LENGTH = 1
QUANTILES = [0.1, 0.5, 0.9]

# --- Load and merge ---
top_50_df = pd.read_csv("top50_estimation.csv")
top_50_out_df = pd.read_csv("top50_outofsample.csv")

top_50_df["date"] = pd.to_datetime(top_50_df["date"])
top_50_out_df["date"] = pd.to_datetime(top_50_out_df["date"])

full_df = pd.concat([top_50_df, top_50_out_df], ignore_index=True)
full_df = full_df.dropna(subset=[TARGET]).sort_values(["PERMNO", "date"])

# --- Load model ---
pipeline = ChronosPipeline.from_pretrained(
    "amazon/chronos-t5-large",
    device_map="cpu",
    torch_dtype=torch.float32
)

results = []

# --- Loop over context lengths ---
for context_length in CONTEXT_LENGTHS:
    print(f"\nEvaluating Context Length: {context_length}")
    
    for permno in full_df["PERMNO"].unique():
        perm_df = full_df[full_df["PERMNO"] == permno].reset_index(drop=True)

        if len(perm_df) <= context_length + PREDICTION_LENGTH:
            continue

        train_mean = perm_df[TARGET].mean()

        for i in range(context_length, len(perm_df) - PREDICTION_LENGTH):
            try:
                context = perm_df[TARGET].iloc[i - context_length:i].values.astype("float32")
                true_future = perm_df[TARGET].iloc[i:i + PREDICTION_LENGTH].values.astype("float32")

                context_tensor = torch.tensor(context).reshape(1, -1)

                quantiles, mean = pipeline.predict_quantiles(
                    context=context_tensor,
                    prediction_length=PREDICTION_LENGTH,
                    quantile_levels=QUANTILES
                )

                q50 = quantiles[:, :, 1].squeeze().numpy()
                if np.isscalar(q50):
                    q50 = np.array([q50])

                r2 = r2_score(true_future, q50)
                r2_out = 1 - np.sum((true_future - q50) ** 2) / np.sum((true_future - train_mean) ** 2)
                mse = mean_squared_error(true_future, q50)
                directional = float(np.sign(true_future[0]) == np.sign(q50[0]))

                results.append({
                    "Model": "Chronos",
                    "Context Length": context_length,
                    "PERMNO": permno,
                    "Date": perm_df["date"].iloc[i + PREDICTION_LENGTH - 1],
                    "True": true_future[0],
                    "Predicted_q50": q50[0],
                    "R2": r2,
                    "R2_out_of_sample": r2_out,
                    "MSE": mse,
                    "Directional Accuracy": directional
                })

            except Exception as e:
                results.append({
                    "Model": "Chronos",
                    "Context Length": context_length,
                    "PERMNO": permno,
                    "Date": perm_df["date"].iloc[i + PREDICTION_LENGTH - 1],
                    "Error": str(e)
                })

# --- Save results ---
os.makedirs("Results", exist_ok=True)
chronos_result_df = pd.DataFrame(results)
chronos_result_df.to_csv("Results/chronos_t5large_rolling_eval.csv", index=False)

print("\n✅ Results saved to Results/chronos_t5large_rolling_eval.csv")



Evaluating Context Length: 5


In [8]:
display(chronos_result_df)

Unnamed: 0,Model,Context Length,PERMNO,Error
0,Chronos,5,11174,Input should have at least 1 dimension i.e. sa...
1,Chronos,5,12173,Input should have at least 1 dimension i.e. sa...
2,Chronos,5,29647,Input should have at least 1 dimension i.e. sa...
3,Chronos,5,39731,Input should have at least 1 dimension i.e. sa...
4,Chronos,5,47677,Input should have at least 1 dimension i.e. sa...
...,...,...,...,...
167,Chronos,512,87121,Input should have at least 1 dimension i.e. sa...
168,Chronos,512,87255,Input should have at least 1 dimension i.e. sa...
169,Chronos,512,87339,Input should have at least 1 dimension i.e. sa...
170,Chronos,512,87342,Input should have at least 1 dimension i.e. sa...


In [None]:
import pandas as pd
import numpy as np
import torch
from chronos import ChronosPipeline  # ← this is correct for chronos-forecasting
import traceback

# Load model
pipeline = ChronosPipeline.from_pretrained(
    "amazon/chronos-t5-small",
    device_map="cpu",  # use "cuda" if available
    torch_dtype=torch.float32
)

# Config
TARGET = "EXCESS_RET"
context_length = 5
prediction_length = 1
start_date = pd.to_datetime("2016-01-01")
end_date = pd.to_datetime("2024-12-31")

# Load your dataset
main_df = pd.read_csv("main_data.csv")
main_df["date"] = pd.to_datetime(main_df["date"])
main_df = main_df.sort_values(["PERMNO", "date"])

# Collect predictions
all_predictions = []

for permno in main_df["PERMNO"].unique():
    df = main_df[main_df["PERMNO"] == permno].dropna(subset=[TARGET]).reset_index(drop=True)

    for i in range(context_length, len(df)):
        current_date = df.loc[i, "date"]
        if not (start_date <= current_date <= end_date):
            continue

        try:
            context_vals = df.iloc[i - context_length:i][TARGET].values.astype("float32")
            if len(context_vals) != context_length:
                print(f"[SKIP] PERMNO {permno}, date {current_date} — context too short")
                continue

            context_tensor = torch.tensor(context_vals).unsqueeze(0)

            print(f"[INFO] Predicting: PERMNO {permno}, date {current_date}, shape: {context_tensor.shape}")

            output = pipeline.predict(context_tensor, prediction_length=1)

            # output is just a Tensor, no dictionary!
            pred_mean = float(output.detach().cpu().numpy().flatten()[0])

            true_val = float(df.loc[i, TARGET])

            all_predictions.append({
                "PERMNO": permno,
                "date": current_date,
                "true": true_val,
                "predicted": pred_mean
            })

        except Exception as e:
            print(f"[EXCEPTION] PERMNO {permno}, date {current_date}")
            import traceback
            traceback.print_exc()
            continue



# Save results
pred_df = pd.DataFrame(all_predictions)
pred_df.to_csv("chronos_t5_nextday_predictions_5.csv", index=False)


[INFO] Predicting: PERMNO 11174, date 2016-01-04 00:00:00, shape: torch.Size([1, 30])
[INFO] Predicting: PERMNO 11174, date 2016-01-05 00:00:00, shape: torch.Size([1, 30])
[INFO] Predicting: PERMNO 11174, date 2016-01-06 00:00:00, shape: torch.Size([1, 30])
[INFO] Predicting: PERMNO 11174, date 2016-01-07 00:00:00, shape: torch.Size([1, 30])
[INFO] Predicting: PERMNO 11174, date 2016-01-08 00:00:00, shape: torch.Size([1, 30])
[INFO] Predicting: PERMNO 11174, date 2016-01-11 00:00:00, shape: torch.Size([1, 30])
[INFO] Predicting: PERMNO 11174, date 2016-01-12 00:00:00, shape: torch.Size([1, 30])
[INFO] Predicting: PERMNO 11174, date 2016-01-13 00:00:00, shape: torch.Size([1, 30])
[INFO] Predicting: PERMNO 11174, date 2016-01-14 00:00:00, shape: torch.Size([1, 30])
[INFO] Predicting: PERMNO 11174, date 2016-01-15 00:00:00, shape: torch.Size([1, 30])
[INFO] Predicting: PERMNO 11174, date 2016-01-19 00:00:00, shape: torch.Size([1, 30])
[INFO] Predicting: PERMNO 11174, date 2016-01-20 00:00

In [35]:
import pandas as pd
import numpy as np
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# Load predictions
df = pd.read_csv("chronos_t5_nextday_predictions.csv")

# Define directional metrics
def directional_accuracy(y_true, y_pred):
    return np.mean(np.sign(y_true) == np.sign(y_pred))

def directional_up_accuracy(y_true, y_pred):
    return np.mean((y_true > 0) & (y_pred > 0))

def directional_down_accuracy(y_true, y_pred):
    return np.mean((y_true < 0) & (y_pred < 0))

# Aggregate metrics by PERMNO
results = []

for permno, group in df.groupby("PERMNO"):
    y_true = group["true"].values
    y_pred = group["predicted"].values

    if len(y_true) < 5:
        continue  # skip small series

    results.append({
        'Model': "Chronos-T5",
        'Rolling Window': "Daily",
        'PERMNO': permno,
        'R2': r2_score(y_true, y_pred),
        'MSE': mean_squared_error(y_true, y_pred),
        'MAE': mean_absolute_error(y_true, y_pred),
        'Directional Accuracy': directional_accuracy(y_true, y_pred),
        'Directional Up': directional_up_accuracy(y_true, y_pred),
        'Directional Down': directional_down_accuracy(y_true, y_pred)
    })

# Save metrics
results_df = pd.DataFrame(results)
display(results_df)
results_df.to_csv("chronos_t5_metrics.csv", index=False)


Unnamed: 0,Model,Rolling Window,PERMNO,R2,MSE,MAE,Directional Accuracy,Directional Up,Directional Down
0,Chronos-T5,Daily,11174,-0.094721,0.001179,0.022465,0.476148,0.214664,0.261484
1,Chronos-T5,Daily,29647,-0.068422,0.000147,0.007728,0.486749,0.27341,0.213339
2,Chronos-T5,Daily,39731,-0.051723,0.00187,0.028786,0.480124,0.17712,0.303004
3,Chronos-T5,Daily,63773,-0.077046,0.001225,0.018841,0.495141,0.261484,0.233657
4,Chronos-T5,Daily,66093,-0.077082,0.000232,0.010281,0.479682,0.261484,0.218198
5,Chronos-T5,Daily,75104,-0.168424,0.000988,0.019727,0.482332,0.215548,0.266784
6,Chronos-T5,Daily,75320,-0.028573,0.001126,0.017697,0.500442,0.260601,0.239841
7,Chronos-T5,Daily,76226,-0.106535,0.000997,0.020072,0.486307,0.241166,0.245141
8,Chronos-T5,Daily,78405,-0.065057,0.000212,0.009814,0.487191,0.261042,0.226148
9,Chronos-T5,Daily,78840,-0.063851,0.000657,0.017937,0.493816,0.253092,0.240724
