In [1]:
import sys
import os

# Add Time-Series-Library to Python path so we can import layers
# The notebook is in other_models/, and Time-Series-Library is at the project root
cwd = os.getcwd()
if 'other_models' in cwd:
    # If we're in other_models directory, go up one level
    project_root = os.path.dirname(cwd)
else:
    # Otherwise assume we're at project root
    project_root = cwd

timeseries_lib_path = os.path.join(project_root, 'Time-Series-Library')

if os.path.exists(timeseries_lib_path):
    if timeseries_lib_path not in sys.path:
        sys.path.insert(0, timeseries_lib_path)
    print(f"✅ Added {timeseries_lib_path} to Python path")
else:
    print(f"⚠️  Warning: Could not find Time-Series-Library at {timeseries_lib_path}")
    print(f"   Current directory: {cwd}")
    print(f"   Looking for: {timeseries_lib_path}")

import torch    
import torch.nn as nn
import torch.nn.functional as F
import torch.fft
from layers.Embed import DataEmbedding
from layers.Conv_Blocks import Inception_Block_V1   
# convolution block used for convoluting the 2D time data, changeable

✅ Added /Users/loganyamamoto/Desktop/class/CSCI/566/project/StockPredictor/Time-Series-Library to Python path


# TimesNet Configuration Parameters Guide

This guide explains all configuration parameters needed for TimesNet model initialization.

## Required Parameters for Classification Task

Based on your data shape `(1445936, 31, 13)`, here are the key parameters:


In [2]:
# TimesNet Configuration Parameters Explained

class TimesNetConfig:
    """
    Complete guide to TimesNet configuration parameters.
    Based on your data: (batch_size=1445936, seq_len=31, features=13)
    """
    
    # ============================================
    # TASK CONFIGURATION
    # ============================================
    task_name = 'classification'  # Options: 'long_term_forecast', 'short_term_forecast', 
                                   #          'imputation', 'classification', 'anomaly_detection'
    
    # ============================================
    # DATA DIMENSIONS (CRITICAL FOR YOUR DATA)
    # ============================================
    seq_len = 31          # Input sequence length (timelength dimension)
                          # YOUR DATA: 31 days lookback window
    
    enc_in = 13           # Encoder input size = number of features per timestep
                          # YOUR DATA: 13 features per day
    
    c_out = 13            # Output size (for forecast/imputation/anomaly)
                          # For classification, this is ignored
    
    num_class = 3         # Number of classification classes
                          # YOUR TASK: Binary classification (up/down)
    
    # ============================================
    # MODEL ARCHITECTURE
    # ============================================
    d_model = 512         # Dimension of model embeddings (hidden dimension)
                          # Typical values: 64, 128, 256, 512, 1024
                          # Larger = more capacity but slower
    
    d_ff = 2048           # Dimension of feed-forward network (FFN)
                          # Usually 4x d_model (512 * 4 = 2048)
    
    e_layers = 2          # Number of encoder layers (TimesBlock layers)
                          # More layers = deeper model, but risk of overfitting
                          # Typical: 2-4 layers
    
    # ============================================
    # TIMESBLOCK SPECIFIC PARAMETERS
    # ============================================
    top_k = 5             # Number of top frequencies to consider in FFT
                          # TimesNet finds top_k periodic patterns
                          # Typical: 3-5
    
    num_kernels = 6       # Number of kernels in Inception_Block_V1
                          # More kernels = more diverse convolution patterns
                          # Typical: 6
    
    # ============================================
    # FORECASTING TASK PARAMETERS (not used for classification)
    # ============================================
    pred_len = None         # Prediction sequence length (for forecast tasks)
    label_len = None        # Start token length (for forecast tasks)
    
    # ============================================
    # EMBEDDING CONFIGURATION
    # ============================================
    embed = 'timeF'       # Time features encoding type
                          # Options: 'timeF', 'fixed', 'learned'
                          # - 'timeF': Time feature embedding (recommended)
                          # - 'fixed': Fixed sinusoidal positional embedding
                          # - 'learned': Learnable positional embedding
    
    freq = 'd'            # Frequency for time features encoding
                          # Options: 's' (secondly), 't' (minutely), 'h' (hourly),
                          #          'd' (daily), 'b' (business days), 'w' (weekly),
                          #          'm' (monthly)
                          # YOUR DATA: 'd' (daily stock data)
    
    dropout = 0.1         # Dropout rate (0.0 to 1.0)
                          # Prevents overfitting
                          # Typical: 0.1-0.3
    
    # ============================================
    # OPTIONAL PARAMETERS
    # ============================================
    use_norm = 1          # Whether to use normalization (1=True, 0=False)
    
    freeze_encoder = False  # If True, freezes encoder (enc_embedding, TimesBlock layers, layer_norm)
                            # and only trains the classifier head (projection layer)
                            # Useful for transfer learning or fine-tuning scenarios
                            # Default: False (train entire model)
    
    # ============================================
    # EXAMPLE CONFIG FOR YOUR STOCK PREDICTION TASK
    # ============================================
    
    @staticmethod
    def get_stock_config():
        """Example configuration for stock price classification"""
        from types import SimpleNamespace
        
        config = SimpleNamespace()
        
        # Task
        config.task_name = 'classification'
        
        # Data dimensions (from your data)
        config.seq_len = 240      # 240-day lookback window
        config.enc_in = 13       # 13 features per timestep
        config.num_class = 2     # Binary classification (up/down)
        
        # Model architecture
        config.d_model = 256     # Moderate size for efficiency
        config.d_ff = 1024       # 4x d_model
        config.e_layers = 2      # 2 TimesBlock layers
        
        # TimesBlock specific
        config.top_k = 5         # Find top 5 periodic patterns
        config.num_kernels = 6   # 6 convolution kernels
        
        # Embedding
        config.embed = 'timeF'   # Time feature embedding
        config.freq = 'd'        # Daily frequency
        config.dropout = 0.1     # 10% dropout
        
        # Encoder freezing (optional)
        config.freeze_encoder = False  # Set to True to freeze encoder and train only classifier head
        
        # Not used for classification but required
        config.pred_len = 0
        config.label_len = 0
        config.c_out = config.enc_in
        
        return config

In [3]:
# Add logan-version to Python path to import trainer
import sys
import os

# Get project root
cwd = os.getcwd()
if 'other_models' in cwd:
    project_root = os.path.dirname(cwd)
else:
    project_root = cwd

logan_version_path = os.path.join(project_root, 'logan-version')
if logan_version_path not in sys.path:
    sys.path.insert(0, logan_version_path)

from trainer import TrainerConfig, Trainer
from types import SimpleNamespace

# Create TimesNet-specific model configuration
# Note: seq_len is NOT included here - it should be set in TrainerConfig
timesnet_config = SimpleNamespace(
    # Task configuration
    task_name='classification',
    
    # Data dimensions (adjust based on your actual data)
    # seq_len is set in TrainerConfig, not here
    enc_in=13,           # Number of features per timestep
    num_class=3,         # Number of classification classes (binary: up/down)
    
    # Model architecture
    d_model=200,         # Dimension of model embeddings (hidden dimension)
    d_ff=800,           # Dimension of feed-forward network (usually 4x d_model)
    e_layers=1,          # Number of encoder layers (TimesBlock layers)
    
    # TimesBlock specific parameters
    top_k=3,             # Number of top frequencies to consider in FFT
    num_kernels=5,       # Number of kernels in Inception_Block_V1
    
    # Embedding configuration
    embed='timeF',       # Time features encoding type ('timeF', 'fixed', 'learned')
    freq='d',            # Frequency for time features ('d' for daily)
    dropout=0.1,         # Dropout rate
    
    # Not used for classification but required
    pred_len=0,
    label_len=0,
    c_out=13,           # Usually set to enc_in
    
    # Encoder freezing (optional)
    freeze_encoder=False  # If True, freezes encoder (enc_embedding, TimesBlock layers, layer_norm)
                          # and only trains the classifier head (projection layer)
                          # Useful for transfer learning or fine-tuning scenarios
)

# Create TrainerConfig with all parameters
config = TrainerConfig(
    stocks=["AAPL", "MSFT", "GOOGL"],  # List of stock tickers
    time_args=["1990-01-01", "2015-12-31"],  # Time range arguments
    batch_size=32,                      # Batch size for training
    num_epochs=1000,                    # Number of training epochs
    saved_model=None,                   # Path to saved model (None for new training)
    prediction_type="classification",    # Type of prediction task
    k=10,                               # Number of top/bottom positions for portfolio
    cost_bps_per_side=5.0,              # Transaction costs per side in basis points
    save_every_epochs=25,               # Save model every N epochs (0 to disable)
    model_type="TimesNet",              # Model type
    model_config=timesnet_config,       # TimesNet-specific configuration
    early_stop_patience=7,              # Early stopping patience (epochs)
    early_stop_min_delta=0.001,         # Minimum improvement for early stopping
    period_type="LS",                 # Period type for feature extraction ("240L20S21" or "full")
    seq_len=240,                        # Sequence length for data generation and model architecture
    use_nlp=False,                       # Whether to use NLP features
    nlp_method="aggregated"              # NLP method ("aggregated" or "individual")
)

print("✅ TrainerConfig created successfully!")
print(f"   Model type: {config.model_type}")
print(f"   Period type: {config.period_type}")
print(f"   Early stop patience: {config.early_stop_patience}")
print(f"   Sequence length (seq_len): {config.seq_len}")  # Now from TrainerConfig
print(f"   TimesNet enc_in: {config.model_config.enc_in}")
print(f"   TimesNet num_class: {config.model_config.num_class}")
print(f"   TimesNet freeze_encoder: {config.model_config.freeze_encoder}")

# Create Trainer instance with the config
trainer = Trainer(config=config)
print("✅ Trainer initialized successfully!")
print(f"   Trainer device: {trainer.device}")
print(f"   Trainer is_main: {trainer.is_main}")
if hasattr(trainer, 'Model') and trainer.Model is not None:
    total_params = sum(param.numel() for param in trainer.Model.parameters())
    print(f"   Model total parameters: {total_params:,}")




✅ TrainerConfig created successfully!
   Model type: TimesNet
   Period type: LS
   Early stop patience: 7
   Sequence length (seq_len): 240
   TimesNet enc_in: 13
   TimesNet num_class: 3
   TimesNet freeze_encoder: False
[distributed] is_dist=False, rank=0/1
[device] using mps
[mps] Apple Metal Performance Shaders backend
[dataloader] num_workers=1, persistent_workers=False, pin_memory=False


Loading training dataset (.npz): 100%|██████████| 3/3 [00:00<00:00, 743.10it/s]
Loading validation dataset (.npz): 100%|██████████| 3/3 [00:00<00:00, 1515.10it/s]
Loading test dataset (.npz): 100%|██████████| 3/3 [00:00<00:00, 1936.73it/s]
Loading metrics dataset (.npz): 100%|██████████| 3/3 [00:00<00:00, 3750.50it/s]

[data] Loaded from cache
[data] Data loaded successfully
  X_train shape: torch.Size([9779, 31, 3])
  X_val shape: torch.Size([2445, 31, 3])
  X_test shape: torch.Size([3057, 31, 3])
  Expected X shape: (num_samples, <seq_len=240, num_features) - sampled timesteps only
  Actual sequence length: 31
  Period type: LS (sampled timesteps, no gap filling)
[config] Data shape validation:
  X_train shape: (9779, 31, 3)
  Expected shape: (num_samples, 240, 3)
  ✓ Feature dimension matches: 3 == 3
  ⚠️ Sequence length: 31 != 240
[config] Determined input_shape from data: (31, 3) (seq_len=31, features=3)





[TimesNet] Model initialized with config:
  seq_len: 240
  enc_in: 13
  num_class: 3
  d_model: 200
  freeze_encoder: False
52957803 total parameters
✅ Trainer initialized successfully!
   Trainer device: mps
   Trainer is_main: True
   Model total parameters: 52,957,803


In [4]:
trainer.train_one_epoch(1)

Epoch: 2
--------------------------------------------


train 2/1000:   0%|          | 0/306 [00:03<?, ?it/s]


TypeError: Model.forward() missing 3 required positional arguments: 'x_mark_enc', 'x_dec', and 'x_mark_dec'

take in full time window