# Neutral Sentiment A/B Test (Upvote vs FinBERT) with Robustness

Compare TFT+Reddit when neutral class is defined by Upvote ratio (A) vs FinBERT (B), and run robustness across multiple seeds (mean/std).


In [1]:
# Path setup
import os, sys
PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), '..'))
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)
DATA_DIR = os.path.join(PROJECT_ROOT, 'data', 'processed')
A_PATH = os.path.join(DATA_DIR, 'tsla_price_sentiment_spike.csv')       # Upvote neutral
B_PATH = os.path.join(DATA_DIR, 'tsla_price_sentiment_spike_new.csv')  # FinBERT neutral
RESULTS_DIR = os.path.join(PROJECT_ROOT, 'results')
print('A:', A_PATH)
print('B:', B_PATH)



A: /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/data/processed/tsla_price_sentiment_spike.csv
B: /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/data/processed/tsla_price_sentiment_spike_new.csv


In [2]:
# Deterministic seed helper
import os, random, numpy as np, torch
try:
    import lightning.pytorch as pl
except Exception:
    pl = None

def set_seed(seed: int):
    os.environ["PYTHONHASHSEED"] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if pl is not None:
        try:
            pl.seed_everything(seed, workers=False)
        except Exception:
            pass
    try:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
    except Exception:
        pass


In [3]:
# Runner wrappers
from importlib import reload
from models import tft_with_reddit_sentiment as tft_r
reload(tft_r)

def run_with_data_seeded(data_path: str, seed: int):
    set_seed(seed)
    config = tft_r.get_user_config()
    config['train_start'] = '2025-02-01'
    config['training_days'] = 96
    config['prediction_days'] = 5
    df = tft_r.load_and_prepare_data(data_path, config)
    training_dataset, _ = tft_r.create_tft_dataset(df, config)
    tft, train_loader = tft_r.create_model_and_dataloader(training_dataset, config)
    trainer = tft_r.train_model(tft, train_loader, training_dataset, config)
    best_tft, val_loader = tft_r.load_best_model_and_validate(trainer, training_dataset, df, tft, config)
    preds, actuals = tft_r.make_predictions(best_tft, val_loader)
    return tft_r.evaluate_performance(preds, actuals)


In [None]:
# Override runner to be fully non-interactive (no input prompts)
from importlib import reload
from models import tft_with_reddit_sentiment as tft_r
reload(tft_r)

def run_with_data_seeded(data_path: str, seed: int):
    set_seed(seed)
    # Fixed config (no prompts)
    config = {
        'training_type': 'date_anchor',
        'train_start': '2025-02-01',
        'training_days': 96,
        'prediction_days': 5,
        'max_epochs': 30,
        'batch_size': 128,
        'learning_rate': 0.03,
    }
    df = tft_r.load_and_prepare_data(data_path, config)
    training_dataset, _ = tft_r.create_tft_dataset(df, config)
    tft, train_loader = tft_r.create_model_and_dataloader(training_dataset, config)
    trainer = tft_r.train_model(tft, train_loader, training_dataset, config)
    best_tft, val_loader = tft_r.load_best_model_and_validate(trainer, training_dataset, df, tft, config)
    preds, actuals = tft_r.make_predictions(best_tft, val_loader)
    return tft_r.evaluate_performance(preds, actuals)


In [4]:
# A/B single run (optional)
import time, pandas as pd
print('Running A (Upvote neutral) ...')
start = time.time(); A = run_with_data_seeded(A_PATH, 42); A_t = time.time()-start
print('A:', A, f'({A_t:.2f}s)')
print('\nRunning B (FinBERT neutral) ...')
start = time.time(); B = run_with_data_seeded(B_PATH, 42); B_t = time.time()-start
print('B:', B, f'({B_t:.2f}s)')

cmp = pd.DataFrame({'A Upvote': A, 'B FinBERT': B})
print('\nSingle-seed comparison:')
print(cmp)



Seed set to 42


Running A (Upvote neutral) ...

=== TFT Configuration (96-day training, 5-day prediction) ===

✓ Configuration set:
  - Training start: 2025-02-01
  - Training days: 96
  - Prediction days: 5
  - Max epochs: 30
  - Batch size: 128
  - Learning rate: 0.03

=== Loading and Preparing Data ===
✓ Data loaded successfully from /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/data/processed/tsla_price_sentiment_spike.csv
  - Shape: (232, 19)
  - Date range: 2024-06-04 to 2025-07-22

Data columns:
['time_idx', 'date', 'close', 'volume', 'days_since_earning', 'month', 'day_of_week', 'quarter', 'year', 'is_month_end', 'is_month_start', 'rolling_volatility', 'return_1d', 'cumulative_return', 'unique_id', 'daily_sentiment', 'post_count', 'spike_presence', 'spike_intensity', 'daily_sentiment_lag1', 'daily_sentiment_lag2', 'daily_sentiment_lag3', 'daily_sentiment_lag4', 'daily_sentiment_lag5', 'daily_sentiment_mean_3', 'daily_sentiment

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


✓ TFT model created with 50343 parameters
  - Learning rate: 0.03
  - Hidden size: 16
  - Attention heads: 1

=== Training TFT Model ===
✓ Trainer configured with 30 max epochs (monitoring train_loss)
  - Early stopping enabled
  - Learning rate monitoring enabled
  - Model checkpointing enabled

Starting model training...



   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | TorchMetricWrapper              | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 1      | train
3  | prescalers                         | ModuleDict                      | 432    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 48     | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 19.3 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 17.2 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K  | train
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K  

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved. New best score: 254.523


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 58.860 >= min_delta = 0.0001. New best score: 195.663


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 124.237 >= min_delta = 0.0001. New best score: 71.426


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 20.703 >= min_delta = 0.0001. New best score: 50.723


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 6.994 >= min_delta = 0.0001. New best score: 43.729


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 12.662 >= min_delta = 0.0001. New best score: 31.068


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 17.611 >= min_delta = 0.0001. New best score: 13.456


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 5.246 >= min_delta = 0.0001. New best score: 8.211


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 2.875 >= min_delta = 0.0001. New best score: 5.336


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric train_loss did not improve in the last 5 records. Best score: 5.336. Signaling Trainer to stop.


✅ Training completed!

=== Loading Best Model and Creating Validation Dataset ===


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


✓ Best model loaded from: /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/notebooks/checkpoints/tft-epoch=19-train_loss=5.3358-v14.ckpt
✓ Validation dataset created with 1 samples

=== Making Predictions ===


Seed set to 42


✓ Predictions made successfully
  - Prediction shape: torch.Size([1, 5])
  - Actuals shape: torch.Size([1, 5])

=== Performance Evaluation ===

📊 Model Performance Metrics:
  Metric    Value
0    MAE   4.3978
1    MSE  20.1926
2   RMSE   4.4936
3   MAPE   1.3903
A: {'MAE': 4.397833347320557, 'MSE': 20.192596435546875, 'RMSE': 4.493617534637451, 'MAPE': 1.39031982421875} (80.50s)

Running B (FinBERT neutral) ...

=== TFT Configuration (96-day training, 5-day prediction) ===

✓ Configuration set:
  - Training start: 2025-02-01
  - Training days: 96
  - Prediction days: 5
  - Max epochs: 30
  - Batch size: 128
  - Learning rate: 0.03

=== Loading and Preparing Data ===
✓ Data loaded successfully from /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/data/processed/tsla_price_sentiment_spike_new.csv
  - Shape: (232, 20)
  - Date range: 2024-06-04 to 2025-07-22

Data columns:
['time_idx', 'date', 'close', 'volume', 'days_since_

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


✓ Training dataset created with 2 samples

=== Creating Model and DataLoader ===
✓ DataLoader created with batch size 128
✓ TFT model created with 50343 parameters
  - Learning rate: 0.03
  - Hidden size: 16
  - Attention heads: 1

=== Training TFT Model ===
✓ Trainer configured with 30 max epochs (monitoring train_loss)
  - Early stopping enabled
  - Learning rate monitoring enabled
  - Model checkpointing enabled

Starting model training...



   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | TorchMetricWrapper              | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 1      | train
3  | prescalers                         | ModuleDict                      | 432    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 48     | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 19.3 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 17.2 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K  | train
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K  

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved. New best score: 263.357


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 208.562 >= min_delta = 0.0001. New best score: 54.796


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 0.161 >= min_delta = 0.0001. New best score: 54.635


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 4.076 >= min_delta = 0.0001. New best score: 50.559


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 13.255 >= min_delta = 0.0001. New best score: 37.304


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 23.498 >= min_delta = 0.0001. New best score: 13.806


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric train_loss did not improve in the last 5 records. Best score: 13.806. Signaling Trainer to stop.


✅ Training completed!

=== Loading Best Model and Creating Validation Dataset ===


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


✓ Best model loaded from: /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/notebooks/checkpoints/tft-epoch=10-train_loss=13.8061.ckpt
✓ Validation dataset created with 1 samples

=== Making Predictions ===
✓ Predictions made successfully
  - Prediction shape: torch.Size([1, 5])
  - Actuals shape: torch.Size([1, 5])

=== Performance Evaluation ===

📊 Model Performance Metrics:
  Metric    Value
0    MAE   5.5511
1    MSE  34.0470
2   RMSE   5.8350
3   MAPE   1.7598
B: {'MAE': 5.551110744476318, 'MSE': 34.046958923339844, 'RMSE': 5.834977149963379, 'MAPE': 1.7598493099212646} (61.47s)

Single-seed comparison:
       A Upvote  B FinBERT
MAE    4.397833   5.551111
MSE   20.192596  34.046959
RMSE   4.493618   5.834977
MAPE   1.390320   1.759849


In [6]:
# Robustness: seeds mean/std
import time, pandas as pd
seeds = [42, 43, 44, 45, 46]
rows_A, rows_B = [], []
for s in seeds:
    t0 = time.time(); mA = run_with_data_seeded(A_PATH, s); dtA = time.time()-t0
    rows_A.append({"seed": s, **mA, "Time(s)": dtA})
    t0 = time.time(); mB = run_with_data_seeded(B_PATH, s); dtB = time.time()-t0
    rows_B.append({"seed": s, **mB, "Time(s)": dtB})
A_df = pd.DataFrame(rows_A).set_index('seed')
B_df = pd.DataFrame(rows_B).set_index('seed')

summary = pd.DataFrame({
    ('A Upvote','MAE mean'): [A_df['MAE'].mean()],
    ('A Upvote','MAE std'): [A_df['MAE'].std(ddof=1)],
    ('A Upvote','RMSE mean'): [A_df['RMSE'].mean()],
    ('A Upvote','RMSE std'): [A_df['RMSE'].std(ddof=1)],
    ('A Upvote','MAPE mean'): [A_df['MAPE'].mean()],
    ('A Upvote','MAPE std'): [A_df['MAPE'].std(ddof=1)],
    ('B FinBERT','MAE mean'): [B_df['MAE'].mean()],
    ('B FinBERT','MAE std'): [B_df['MAE'].std(ddof=1)],
    ('B FinBERT','RMSE mean'): [B_df['RMSE'].mean()],
    ('B FinBERT','RMSE std'): [B_df['RMSE'].std(ddof=1)],
    ('B FinBERT','MAPE mean'): [B_df['MAPE'].mean()],
    ('B FinBERT','MAPE std'): [B_df['MAPE'].std(ddof=1)],
}).T
summary.columns = ['value']

print('\nPer-seed A (Upvote):\n', A_df)
print('\nPer-seed B (FinBERT):\n', B_df)
print('\nRobustness summary (mean±std):\n', summary)

# Save
import os
A_df.to_csv(os.path.join(RESULTS_DIR, 'TSLA_neutral_ab_seeds_A_upvote.csv'))
B_df.to_csv(os.path.join(RESULTS_DIR, 'TSLA_neutral_ab_seeds_B_finbert.csv'))
summary.to_csv(os.path.join(RESULTS_DIR, 'TSLA_neutral_ab_summary.csv'))
print('Saved CSVs to', RESULTS_DIR)


Seed set to 42



=== TFT Configuration (96-day training, 5-day prediction) ===

✓ Configuration set:
  - Training start: 2025-02-01
  - Training days: 96
  - Prediction days: 5
  - Max epochs: 30
  - Batch size: 128
  - Learning rate: 0.03

=== Loading and Preparing Data ===
✓ Data loaded successfully from /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/data/processed/tsla_price_sentiment_spike.csv
  - Shape: (232, 19)
  - Date range: 2024-06-04 to 2025-07-22

Data columns:
['time_idx', 'date', 'close', 'volume', 'days_since_earning', 'month', 'day_of_week', 'quarter', 'year', 'is_month_end', 'is_month_start', 'rolling_volatility', 'return_1d', 'cumulative_return', 'unique_id', 'daily_sentiment', 'post_count', 'spike_presence', 'spike_intensity', 'daily_sentiment_lag1', 'daily_sentiment_lag2', 'daily_sentiment_lag3', 'daily_sentiment_lag4', 'daily_sentiment_lag5', 'daily_sentiment_mean_3', 'daily_sentiment_std_3', 'daily_sentiment_mean_

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


✓ Trainer configured with 30 max epochs (monitoring train_loss)
  - Early stopping enabled
  - Learning rate monitoring enabled
  - Model checkpointing enabled

Starting model training...



   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | TorchMetricWrapper              | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 1      | train
3  | prescalers                         | ModuleDict                      | 432    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 48     | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 19.3 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 17.2 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K  | train
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K  

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved. New best score: 254.523


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 58.860 >= min_delta = 0.0001. New best score: 195.663


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 124.237 >= min_delta = 0.0001. New best score: 71.426


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 20.703 >= min_delta = 0.0001. New best score: 50.723


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 6.994 >= min_delta = 0.0001. New best score: 43.729


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 12.662 >= min_delta = 0.0001. New best score: 31.068


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 17.611 >= min_delta = 0.0001. New best score: 13.456


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 5.246 >= min_delta = 0.0001. New best score: 8.211


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 2.875 >= min_delta = 0.0001. New best score: 5.336


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric train_loss did not improve in the last 5 records. Best score: 5.336. Signaling Trainer to stop.


✅ Training completed!

=== Loading Best Model and Creating Validation Dataset ===


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.


✓ Best model loaded from: /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/notebooks/checkpoints/tft-epoch=19-train_loss=5.3358-v15.ckpt
✓ Validation dataset created with 1 samples

=== Making Predictions ===


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Seed set to 42


✓ Predictions made successfully
  - Prediction shape: torch.Size([1, 5])
  - Actuals shape: torch.Size([1, 5])

=== Performance Evaluation ===

📊 Model Performance Metrics:
  Metric    Value
0    MAE   4.3978
1    MSE  20.1926
2   RMSE   4.4936
3   MAPE   1.3903

=== TFT Configuration (96-day training, 5-day prediction) ===


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



✓ Configuration set:
  - Training start: 2025-02-01
  - Training days: 96
  - Prediction days: 5
  - Max epochs: 30
  - Batch size: 128
  - Learning rate: 0.03

=== Loading and Preparing Data ===
✓ Data loaded successfully from /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/data/processed/tsla_price_sentiment_spike_new.csv
  - Shape: (232, 20)
  - Date range: 2024-06-04 to 2025-07-22

Data columns:
['time_idx', 'date', 'close', 'volume', 'days_since_earning', 'month', 'day_of_week', 'quarter', 'year', 'is_month_end', 'is_month_start', 'rolling_volatility', 'return_1d', 'cumulative_return', 'unique_id', 'daily_sentiment', 'post_count', 'spike_presence', 'spike_intensity', 'log_close', 'daily_sentiment_lag1', 'daily_sentiment_lag2', 'daily_sentiment_lag3', 'daily_sentiment_lag4', 'daily_sentiment_lag5', 'daily_sentiment_mean_3', 'daily_sentiment_std_3', 'daily_sentiment_mean_7', 'daily_sentiment_std_7', 'daily_sentiment_


   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | TorchMetricWrapper              | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 1      | train
3  | prescalers                         | ModuleDict                      | 432    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 48     | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 19.3 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 17.2 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K  | train
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K  

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved. New best score: 263.357


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 208.562 >= min_delta = 0.0001. New best score: 54.796


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 0.161 >= min_delta = 0.0001. New best score: 54.635


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 4.076 >= min_delta = 0.0001. New best score: 50.559


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 13.255 >= min_delta = 0.0001. New best score: 37.304


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 23.498 >= min_delta = 0.0001. New best score: 13.806


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric train_loss did not improve in the last 5 records. Best score: 13.806. Signaling Trainer to stop.


✅ Training completed!

=== Loading Best Model and Creating Validation Dataset ===


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


✓ Best model loaded from: /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/notebooks/checkpoints/tft-epoch=10-train_loss=13.8061-v1.ckpt
✓ Validation dataset created with 1 samples

=== Making Predictions ===


Seed set to 43


✓ Predictions made successfully
  - Prediction shape: torch.Size([1, 5])
  - Actuals shape: torch.Size([1, 5])

=== Performance Evaluation ===

📊 Model Performance Metrics:
  Metric    Value
0    MAE   5.5511
1    MSE  34.0470
2   RMSE   5.8350
3   MAPE   1.7598

=== TFT Configuration (96-day training, 5-day prediction) ===


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



✓ Configuration set:
  - Training start: 2025-02-01
  - Training days: 96
  - Prediction days: 5
  - Max epochs: 30
  - Batch size: 128
  - Learning rate: 0.03

=== Loading and Preparing Data ===
✓ Data loaded successfully from /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/data/processed/tsla_price_sentiment_spike.csv
  - Shape: (232, 19)
  - Date range: 2024-06-04 to 2025-07-22

Data columns:
['time_idx', 'date', 'close', 'volume', 'days_since_earning', 'month', 'day_of_week', 'quarter', 'year', 'is_month_end', 'is_month_start', 'rolling_volatility', 'return_1d', 'cumulative_return', 'unique_id', 'daily_sentiment', 'post_count', 'spike_presence', 'spike_intensity', 'daily_sentiment_lag1', 'daily_sentiment_lag2', 'daily_sentiment_lag3', 'daily_sentiment_lag4', 'daily_sentiment_lag5', 'daily_sentiment_mean_3', 'daily_sentiment_std_3', 'daily_sentiment_mean_7', 'daily_sentiment_std_7', 'daily_sentiment_mean_14', 'daily_


   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | TorchMetricWrapper              | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 1      | train
3  | prescalers                         | ModuleDict                      | 432    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 48     | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 19.3 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 17.2 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K  | train
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K  

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved. New best score: 451.978


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 339.759 >= min_delta = 0.0001. New best score: 112.219


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 25.297 >= min_delta = 0.0001. New best score: 86.923


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 15.824 >= min_delta = 0.0001. New best score: 71.098


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 1.197 >= min_delta = 0.0001. New best score: 69.902


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 16.168 >= min_delta = 0.0001. New best score: 53.734


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 2.496 >= min_delta = 0.0001. New best score: 51.238


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 4.820 >= min_delta = 0.0001. New best score: 46.417


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 0.064 >= min_delta = 0.0001. New best score: 46.353


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 4.050 >= min_delta = 0.0001. New best score: 42.303


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 5.881 >= min_delta = 0.0001. New best score: 36.423


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 7.453 >= min_delta = 0.0001. New best score: 28.970


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 6.783 >= min_delta = 0.0001. New best score: 22.187


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 5.612 >= min_delta = 0.0001. New best score: 16.575


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric train_loss did not improve in the last 5 records. Best score: 16.575. Signaling Trainer to stop.


✅ Training completed!

=== Loading Best Model and Creating Validation Dataset ===


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


✓ Best model loaded from: /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/notebooks/checkpoints/tft-epoch=18-train_loss=16.5751.ckpt
✓ Validation dataset created with 1 samples

=== Making Predictions ===


Seed set to 43


✓ Predictions made successfully
  - Prediction shape: torch.Size([1, 5])
  - Actuals shape: torch.Size([1, 5])

=== Performance Evaluation ===

📊 Model Performance Metrics:
  Metric    Value
0    MAE   3.4582
1    MSE  13.4551
2   RMSE   3.6681
3   MAPE   1.0946

=== TFT Configuration (96-day training, 5-day prediction) ===

✓ Configuration set:
  - Training start: 2025-02-01
  - Training days: 96
  - Prediction days: 5
  - Max epochs: 30
  - Batch size: 128
  - Learning rate: 0.03

=== Loading and Preparing Data ===
✓ Data loaded successfully from /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/data/processed/tsla_price_sentiment_spike_new.csv
  - Shape: (232, 20)
  - Date range: 2024-06-04 to 2025-07-22

Data columns:
['time_idx', 'date', 'close', 'volume', 'days_since_earning', 'month', 'day_of_week', 'quarter', 'year', 'is_month_end', 'is_month_start', 'rolling_volatility', 'return_1d', 'cumulative_return', 'unique_i

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


✓ Using 96 training days + 5 prediction days from 2025-02-01
  - Total data range: 2025-02-03 00:00:00 to 2025-07-17 00:00:00
  - Total data points: 101

First few rows:
     time_idx       date       close    volume  days_since_earning  month  \
128         0 2025-02-03  383.679993  93732100                   4      2   
129         1 2025-02-04  392.209992  57072200                   5      2   
130         2 2025-02-05  378.170013  57223300                   6      2   
131         3 2025-02-06  374.320007  77918200                   7      2   
132         4 2025-02-07  361.619995  70298300                   8      2   

     day_of_week  quarter  year  is_month_end  ...  daily_sentiment_mean_7  \
128            0        1  2025             0  ...                0.362728   
129            1        1  2025             0  ...                0.413299   
130            2        1  2025             0  ...                0.222823   
131            3        1  2025             0  ...     


   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | TorchMetricWrapper              | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 1      | train
3  | prescalers                         | ModuleDict                      | 432    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 48     | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 19.3 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 17.2 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K  | train
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K  

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved. New best score: 406.194


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 369.534 >= min_delta = 0.0001. New best score: 36.660


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 6.882 >= min_delta = 0.0001. New best score: 29.777


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 24.827 >= min_delta = 0.0001. New best score: 4.950


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric train_loss did not improve in the last 5 records. Best score: 4.950. Signaling Trainer to stop.


✅ Training completed!

=== Loading Best Model and Creating Validation Dataset ===


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


✓ Best model loaded from: /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/notebooks/checkpoints/tft-epoch=10-train_loss=4.9501.ckpt
✓ Validation dataset created with 1 samples

=== Making Predictions ===


Seed set to 44


✓ Predictions made successfully
  - Prediction shape: torch.Size([1, 5])
  - Actuals shape: torch.Size([1, 5])

=== Performance Evaluation ===

📊 Model Performance Metrics:
  Metric    Value
0    MAE   6.7474
1    MSE  47.7138
2   RMSE   6.9075
3   MAPE   2.1312

=== TFT Configuration (96-day training, 5-day prediction) ===

✓ Configuration set:
  - Training start: 2025-02-01
  - Training days: 96
  - Prediction days: 5
  - Max epochs: 30
  - Batch size: 128
  - Learning rate: 0.03

=== Loading and Preparing Data ===
✓ Data loaded successfully from /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/data/processed/tsla_price_sentiment_spike.csv
  - Shape: (232, 19)
  - Date range: 2024-06-04 to 2025-07-22

Data columns:
['time_idx', 'date', 'close', 'volume', 'days_since_earning', 'month', 'day_of_week', 'quarter', 'year', 'is_month_end', 'is_month_start', 'rolling_volatility', 'return_1d', 'cumulative_return', 'unique_id', 

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | TorchMetricWrapper              | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 1      | train
3  | prescalers                         | ModuleDict                      | 432    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 48     | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 19.3 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 17.2 K | train
7  | static_context_variable_selection  | GatedResidualNetw

✓ Trainer configured with 30 max epochs (monitoring train_loss)
  - Early stopping enabled
  - Learning rate monitoring enabled
  - Model checkpointing enabled

Starting model training...


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved. New best score: 1853.620


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 713.455 >= min_delta = 0.0001. New best score: 1140.165


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 831.023 >= min_delta = 0.0001. New best score: 309.141


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 153.717 >= min_delta = 0.0001. New best score: 155.424


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 18.809 >= min_delta = 0.0001. New best score: 136.616


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 72.220 >= min_delta = 0.0001. New best score: 64.395


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 14.590 >= min_delta = 0.0001. New best score: 49.805


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 3.218 >= min_delta = 0.0001. New best score: 46.588


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 2.841 >= min_delta = 0.0001. New best score: 43.747


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 13.099 >= min_delta = 0.0001. New best score: 30.648


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 6.211 >= min_delta = 0.0001. New best score: 24.437


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 2.291 >= min_delta = 0.0001. New best score: 22.146


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 7.725 >= min_delta = 0.0001. New best score: 14.421


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 4.871 >= min_delta = 0.0001. New best score: 9.550


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=30` reached.


✅ Training completed!

=== Loading Best Model and Creating Validation Dataset ===


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


✓ Best model loaded from: /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/notebooks/checkpoints/tft-epoch=27-train_loss=9.5502.ckpt
✓ Validation dataset created with 1 samples

=== Making Predictions ===


Seed set to 44


✓ Predictions made successfully
  - Prediction shape: torch.Size([1, 5])
  - Actuals shape: torch.Size([1, 5])

=== Performance Evaluation ===

📊 Model Performance Metrics:
  Metric   Value
0    MAE  1.7114
1    MSE  3.8613
2   RMSE  1.9650
3   MAPE  0.5430

=== TFT Configuration (96-day training, 5-day prediction) ===

✓ Configuration set:
  - Training start: 2025-02-01
  - Training days: 96
  - Prediction days: 5
  - Max epochs: 30
  - Batch size: 128
  - Learning rate: 0.03

=== Loading and Preparing Data ===
✓ Data loaded successfully from /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/data/processed/tsla_price_sentiment_spike_new.csv
  - Shape: (232, 20)
  - Date range: 2024-06-04 to 2025-07-22

Data columns:
['time_idx', 'date', 'close', 'volume', 'days_since_earning', 'month', 'day_of_week', 'quarter', 'year', 'is_month_end', 'is_month_start', 'rolling_volatility', 'return_1d', 'cumulative_return', 'unique_id', '

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


✓ Training dataset created with 2 samples

=== Creating Model and DataLoader ===
✓ DataLoader created with batch size 128
✓ TFT model created with 50343 parameters
  - Learning rate: 0.03
  - Hidden size: 16
  - Attention heads: 1

=== Training TFT Model ===
✓ Trainer configured with 30 max epochs (monitoring train_loss)
  - Early stopping enabled
  - Learning rate monitoring enabled
  - Model checkpointing enabled

Starting model training...



   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | TorchMetricWrapper              | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 1      | train
3  | prescalers                         | ModuleDict                      | 432    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 48     | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 19.3 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 17.2 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K  | train
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K  

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved. New best score: 1870.110


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 1027.540 >= min_delta = 0.0001. New best score: 842.570


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 307.427 >= min_delta = 0.0001. New best score: 535.143


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 472.953 >= min_delta = 0.0001. New best score: 62.190


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric train_loss did not improve in the last 5 records. Best score: 62.190. Signaling Trainer to stop.


✅ Training completed!

=== Loading Best Model and Creating Validation Dataset ===


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


✓ Best model loaded from: /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/notebooks/checkpoints/tft-epoch=04-train_loss=62.1899.ckpt
✓ Validation dataset created with 1 samples

=== Making Predictions ===


Seed set to 45


✓ Predictions made successfully
  - Prediction shape: torch.Size([1, 5])
  - Actuals shape: torch.Size([1, 5])

=== Performance Evaluation ===

📊 Model Performance Metrics:
  Metric     Value
0    MAE   14.7557
1    MSE  235.4535
2   RMSE   15.3445
3   MAPE    4.6841

=== TFT Configuration (96-day training, 5-day prediction) ===

✓ Configuration set:
  - Training start: 2025-02-01
  - Training days: 96
  - Prediction days: 5
  - Max epochs: 30
  - Batch size: 128
  - Learning rate: 0.03

=== Loading and Preparing Data ===
✓ Data loaded successfully from /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/data/processed/tsla_price_sentiment_spike.csv
  - Shape: (232, 19)
  - Date range: 2024-06-04 to 2025-07-22

Data columns:
['time_idx', 'date', 'close', 'volume', 'days_since_earning', 'month', 'day_of_week', 'quarter', 'year', 'is_month_end', 'is_month_start', 'rolling_volatility', 'return_1d', 'cumulative_return', 'unique_

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


✓ Training dataset created with 2 samples

=== Creating Model and DataLoader ===
✓ DataLoader created with batch size 128
✓ TFT model created with 50343 parameters
  - Learning rate: 0.03
  - Hidden size: 16
  - Attention heads: 1

=== Training TFT Model ===
✓ Trainer configured with 30 max epochs (monitoring train_loss)
  - Early stopping enabled
  - Learning rate monitoring enabled
  - Model checkpointing enabled

Starting model training...



   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | TorchMetricWrapper              | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 1      | train
3  | prescalers                         | ModuleDict                      | 432    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 48     | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 19.3 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 17.2 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K  | train
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K  

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved. New best score: 136.524


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 73.328 >= min_delta = 0.0001. New best score: 63.196


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 36.137 >= min_delta = 0.0001. New best score: 27.059


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 16.288 >= min_delta = 0.0001. New best score: 10.771


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 3.668 >= min_delta = 0.0001. New best score: 7.103


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 1.292 >= min_delta = 0.0001. New best score: 5.811


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 0.818 >= min_delta = 0.0001. New best score: 4.993


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 3.124 >= min_delta = 0.0001. New best score: 1.869


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric train_loss did not improve in the last 5 records. Best score: 1.869. Signaling Trainer to stop.


✅ Training completed!

=== Loading Best Model and Creating Validation Dataset ===


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


✓ Best model loaded from: /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/notebooks/checkpoints/tft-epoch=23-train_loss=1.8686.ckpt
✓ Validation dataset created with 1 samples

=== Making Predictions ===


Seed set to 45


✓ Predictions made successfully
  - Prediction shape: torch.Size([1, 5])
  - Actuals shape: torch.Size([1, 5])

=== Performance Evaluation ===

📊 Model Performance Metrics:
  Metric   Value
0    MAE  1.3124
1    MSE  3.1258
2   RMSE  1.7680
3   MAPE  0.4190

=== TFT Configuration (96-day training, 5-day prediction) ===

✓ Configuration set:
  - Training start: 2025-02-01
  - Training days: 96
  - Prediction days: 5
  - Max epochs: 30
  - Batch size: 128
  - Learning rate: 0.03

=== Loading and Preparing Data ===
✓ Data loaded successfully from /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/data/processed/tsla_price_sentiment_spike_new.csv
  - Shape: (232, 20)
  - Date range: 2024-06-04 to 2025-07-22


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Data columns:
['time_idx', 'date', 'close', 'volume', 'days_since_earning', 'month', 'day_of_week', 'quarter', 'year', 'is_month_end', 'is_month_start', 'rolling_volatility', 'return_1d', 'cumulative_return', 'unique_id', 'daily_sentiment', 'post_count', 'spike_presence', 'spike_intensity', 'log_close', 'daily_sentiment_lag1', 'daily_sentiment_lag2', 'daily_sentiment_lag3', 'daily_sentiment_lag4', 'daily_sentiment_lag5', 'daily_sentiment_mean_3', 'daily_sentiment_std_3', 'daily_sentiment_mean_7', 'daily_sentiment_std_7', 'daily_sentiment_mean_14', 'daily_sentiment_std_14', 'spike_presence_sum_3', 'spike_presence_sum_7', 'spike_presence_sum_14', 'spike_intensity_max_3', 'spike_intensity_max_7', 'spike_intensity_max_14']
✓ Using date_anchor approach with user-specified start date
  - Training start: 2025-02-01
  - Training days: 96
  - Prediction days: 5
✓ Using 96 training days + 5 prediction days from 2025-02-01
  - Total data range: 2025-02-03 00:00:00 to 2025-07-17 00:00:00
  - Tota


   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | TorchMetricWrapper              | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 1      | train
3  | prescalers                         | ModuleDict                      | 432    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 48     | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 19.3 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 17.2 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K  | train
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K  

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved. New best score: 149.690


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 65.602 >= min_delta = 0.0001. New best score: 84.088


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 24.243 >= min_delta = 0.0001. New best score: 59.845


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 5.410 >= min_delta = 0.0001. New best score: 54.435


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 10.361 >= min_delta = 0.0001. New best score: 44.074


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 1.032 >= min_delta = 0.0001. New best score: 43.043


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 9.829 >= min_delta = 0.0001. New best score: 33.213


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 7.795 >= min_delta = 0.0001. New best score: 25.419


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 8.110 >= min_delta = 0.0001. New best score: 17.309


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 4.020 >= min_delta = 0.0001. New best score: 13.289


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 3.330 >= min_delta = 0.0001. New best score: 9.960


Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 0.370 >= min_delta = 0.0001. New best score: 9.590


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric train_loss did not improve in the last 5 records. Best score: 9.590. Signaling Trainer to stop.


✅ Training completed!

=== Loading Best Model and Creating Validation Dataset ===


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


✓ Best model loaded from: /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/notebooks/checkpoints/tft-epoch=18-train_loss=9.5899.ckpt
✓ Validation dataset created with 1 samples

=== Making Predictions ===


Seed set to 46


✓ Predictions made successfully
  - Prediction shape: torch.Size([1, 5])
  - Actuals shape: torch.Size([1, 5])

=== Performance Evaluation ===

📊 Model Performance Metrics:
  Metric   Value
0    MAE  1.8132
1    MSE  5.9111
2   RMSE  2.4313
3   MAPE  0.5761

=== TFT Configuration (96-day training, 5-day prediction) ===

✓ Configuration set:
  - Training start: 2025-02-01
  - Training days: 96
  - Prediction days: 5
  - Max epochs: 30
  - Batch size: 128
  - Learning rate: 0.03

=== Loading and Preparing Data ===
✓ Data loaded successfully from /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/data/processed/tsla_price_sentiment_spike.csv
  - Shape: (232, 19)
  - Date range: 2024-06-04 to 2025-07-22

Data columns:
['time_idx', 'date', 'close', 'volume', 'days_since_earning', 'month', 'day_of_week', 'quarter', 'year', 'is_month_end', 'is_month_start', 'rolling_volatility', 'return_1d', 'cumulative_return', 'unique_id', 'dail

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


     time_idx       date       close    volume  days_since_earning  month  \
128         0 2025-02-03  383.679993  93732100                   4      2   
129         1 2025-02-04  392.209992  57072200                   5      2   
130         2 2025-02-05  378.170013  57223300                   6      2   
131         3 2025-02-06  374.320007  77918200                   7      2   
132         4 2025-02-07  361.619995  70298300                   8      2   

     day_of_week  quarter  year  is_month_end  ...  daily_sentiment_mean_7  \
128            0        1  2025             0  ...                0.096856   
129            1        1  2025             0  ...                0.194673   
130            2        1  2025             0  ...                0.099435   
131            3        1  2025             0  ...                0.156311   
132            4        1  2025             0  ...                0.030148   

     daily_sentiment_std_7  daily_sentiment_mean_14  daily_sentiment


   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | TorchMetricWrapper              | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 1      | train
3  | prescalers                         | ModuleDict                      | 432    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 48     | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 19.3 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 17.2 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K  | train
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K  

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved. New best score: 260.259


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric train_loss did not improve in the last 5 records. Best score: 260.259. Signaling Trainer to stop.


✅ Training completed!

=== Loading Best Model and Creating Validation Dataset ===


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


✓ Best model loaded from: /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/notebooks/checkpoints/tft-epoch=00-train_loss=260.2586.ckpt
✓ Validation dataset created with 1 samples

=== Making Predictions ===


Seed set to 46


✓ Predictions made successfully
  - Prediction shape: torch.Size([1, 5])
  - Actuals shape: torch.Size([1, 5])

=== Performance Evaluation ===

📊 Model Performance Metrics:
  Metric      Value
0    MAE    80.7637
1    MSE  6544.5747
2   RMSE    80.8985
3   MAPE    25.5217

=== TFT Configuration (96-day training, 5-day prediction) ===

✓ Configuration set:
  - Training start: 2025-02-01
  - Training days: 96
  - Prediction days: 5
  - Max epochs: 30
  - Batch size: 128
  - Learning rate: 0.03

=== Loading and Preparing Data ===
✓ Data loaded successfully from /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/data/processed/tsla_price_sentiment_spike_new.csv
  - Shape: (232, 20)
  - Date range: 2024-06-04 to 2025-07-22

Data columns:
['time_idx', 'date', 'close', 'volume', 'days_since_earning', 'month', 'day_of_week', 'quarter', 'year', 'is_month_end', 'is_month_start', 'rolling_volatility', 'return_1d', 'cumulative_return',

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


✓ Training dataset created with 2 samples

=== Creating Model and DataLoader ===
✓ DataLoader created with batch size 128
✓ TFT model created with 50343 parameters
  - Learning rate: 0.03
  - Hidden size: 16
  - Attention heads: 1

=== Training TFT Model ===
✓ Trainer configured with 30 max epochs (monitoring train_loss)
  - Early stopping enabled
  - Learning rate monitoring enabled
  - Model checkpointing enabled

Starting model training...



   | Name                               | Type                            | Params | Mode 
------------------------------------------------------------------------------------------------
0  | loss                               | TorchMetricWrapper              | 0      | train
1  | logging_metrics                    | ModuleList                      | 0      | train
2  | input_embeddings                   | MultiEmbedding                  | 1      | train
3  | prescalers                         | ModuleDict                      | 432    | train
4  | static_variable_selection          | VariableSelectionNetwork        | 48     | train
5  | encoder_variable_selection         | VariableSelectionNetwork        | 19.3 K | train
6  | decoder_variable_selection         | VariableSelectionNetwork        | 17.2 K | train
7  | static_context_variable_selection  | GatedResidualNetwork            | 1.1 K  | train
8  | static_context_initial_hidden_lstm | GatedResidualNetwork            | 1.1 K  

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved. New best score: 259.777


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Metric train_loss improved by 196.726 >= min_delta = 0.0001. New best score: 63.051


Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Monitored metric train_loss did not improve in the last 5 records. Best score: 63.051. Signaling Trainer to stop.


✅ Training completed!

=== Loading Best Model and Creating Validation Dataset ===


💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


✓ Best model loaded from: /Users/hwang-yejin/Desktop/Financial Time Series Forecasting with Deep Learning Models and Social Media Sentiment/notebooks/checkpoints/tft-epoch=05-train_loss=63.0506.ckpt
✓ Validation dataset created with 1 samples

=== Making Predictions ===
✓ Predictions made successfully
  - Prediction shape: torch.Size([1, 5])
  - Actuals shape: torch.Size([1, 5])

=== Performance Evaluation ===

📊 Model Performance Metrics:
  Metric     Value
0    MAE   20.9365
1    MSE  455.0829
2   RMSE   21.3327
3   MAPE    6.6037

Per-seed A (Upvote):
             MAE          MSE       RMSE       MAPE    Time(s)
seed                                                         
42     4.397833    20.192596   4.493618   1.390320  63.757982
43     3.458209    13.455104   3.668120   1.094604  64.680809
44     1.711438     3.861286   1.965015   0.542998  61.537073
45     1.312439     3.125810   1.767996   0.419040  70.999236
46    80.763687  6544.574707  80.898544  25.521740  22.752243

Per

In [11]:
# Show per-seed and summary results instead of global results matrix
import os, pandas as pd

A_df_path = os.path.join(RESULTS_DIR, 'TSLA_neutral_ab_seeds_A_upvote.csv')
B_df_path = os.path.join(RESULTS_DIR, 'TSLA_neutral_ab_seeds_B_finbert.csv')
summary_path = os.path.join(RESULTS_DIR, 'TSLA_neutral_ab_summary.csv')

if os.path.exists(A_df_path) and os.path.exists(B_df_path) and os.path.exists(summary_path):
    A_df = pd.read_csv(A_df_path, index_col=0)
    B_df = pd.read_csv(B_df_path, index_col=0)
    summary = pd.read_csv(summary_path, index_col=0)
    print('\nPer-seed A (Upvote):\n', A_df)
    print('\nPer-seed B (FinBERT):\n', B_df)
    print('\nRobustness summary (mean±std):\n', summary)
else:
    print('One or more result CSVs not found in:', RESULTS_DIR)



Per-seed A (Upvote):
             MAE          MSE       RMSE       MAPE    Time(s)
seed                                                         
42     4.397833    20.192596   4.493618   1.390320  63.757982
43     3.458209    13.455104   3.668120   1.094604  64.680809
44     1.711438     3.861286   1.965015   0.542998  61.537073
45     1.312439     3.125810   1.767996   0.419040  70.999236
46    80.763687  6544.574707  80.898544  25.521740  22.752243

Per-seed B (FinBERT):
             MAE         MSE       RMSE      MAPE    Time(s)
seed                                                       
42     5.551111   34.046959   5.834977  1.759849  43.465069
43     6.747394   47.713783   6.907516  2.131168  50.900401
44    14.755689  235.453537  15.344496  4.684050  23.309640
45     1.813226    5.911119   2.431279  0.576063  51.303723
46    20.936548  455.082916  21.332672  6.603738  31.432009

Robustness summary (mean±std):
       A Upvote mean  A Upvote std  B FinBERT mean  B FinBERT std
M