In [1]:
import torch
from transformers import TimeSeriesTransformerConfig
from transformers import TimeSeriesTransformerForPrediction

In [2]:
# Initializing a Time Series Transformer configuration with 10 time steps for prediction
configuration = TimeSeriesTransformerConfig(prediction_length=10,
                                           context_length=30,
                                           distribution_output='student_t',
                                           input_size=4,
                                           loss = 'nll',
                                           lags_sequence=[1,2,3,4],
                                           num_time_features=3,
                                           cardinality=None
                                            )

# Randomly initializing a model (with random weights) from the configuration
model = TimeSeriesTransformerForPrediction(configuration)

# Accessing the model configuration
configuration = model.config
configuration

TimeSeriesTransformerConfig {
  "activation_dropout": 0.1,
  "activation_function": "gelu",
  "attention_dropout": 0.1,
  "cardinality": [
    0
  ],
  "context_length": 30,
  "d_model": 64,
  "decoder_attention_heads": 2,
  "decoder_ffn_dim": 32,
  "decoder_layerdrop": 0.1,
  "decoder_layers": 2,
  "distribution_output": "student_t",
  "dropout": 0.1,
  "embedding_dimension": [
    0
  ],
  "encoder_attention_heads": 2,
  "encoder_ffn_dim": 32,
  "encoder_layerdrop": 0.1,
  "encoder_layers": 2,
  "feature_size": 27,
  "init_std": 0.02,
  "input_size": 4,
  "is_encoder_decoder": true,
  "lags_sequence": [
    1,
    2,
    3,
    4
  ],
  "loss": "nll",
  "model_type": "time_series_transformer",
  "num_dynamic_real_features": 0,
  "num_parallel_samples": 100,
  "num_static_categorical_features": 0,
  "num_static_real_features": 0,
  "num_time_features": 3,
  "prediction_length": 10,
  "scaling": "mean",
  "transformers_version": "4.45.2",
  "use_cache": true
}

In [3]:
# Generate a past_observed_mask tensor with shape [1000, 30,4], all elements equal to 1
past_observed_mask_tensor = torch.ones((185281, 34,4), dtype=torch.float)

# Check the shape and first few elements of the tensor
past_observed_mask_tensor.shape

torch.Size([185281, 34, 4])

In [4]:
past_time_features = torch.load('past_time_features_tensor.pt')
future_time_features=torch.load('future_time_features_tensor.pt')
past_values_tensors=torch.load('past_value_tensor.pt')
future_values_tensors=torch.load('future_value_tensor.pt')

In [5]:
past_time_features.shape, past_values_tensors.shape

(torch.Size([185281, 34, 3]), torch.Size([185281, 34, 4]))

In [6]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim

# Split the dataset into training and validation sets (e.g., 80% training, 20% validation)
train_past_values, val_past_values, \
train_past_time_features, val_past_time_features, \
train_past_observed_mask, val_past_observed_mask, \
train_future_values, val_future_values, \
train_future_time_features, val_future_time_features = train_test_split(
    past_values_tensors, past_time_features, past_observed_mask_tensor, 
    future_values_tensors, future_time_features, test_size=0.2, random_state=42)

# Create TensorDatasets for training and validation
train_dataset = TensorDataset(train_past_values, train_past_time_features, train_past_observed_mask, train_future_values, train_future_time_features)
val_dataset = TensorDataset(val_past_values, val_past_time_features, val_past_observed_mask, val_future_values, val_future_time_features)

# Create DataLoaders for training and validation
batch_size = 1024
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Initialize the optimizer with a learning rate
optimizer = optim.Adam(model.parameters(), lr=0.00001)

# Early stopping parameters
patience = 4  # Number of epochs with no improvement after which training will be stopped
best_val_loss = float('inf')  # Set best validation loss to infinity initially
counter = 0  # Counter to keep track of how many times the validation loss has increased

# File path to save the model
best_model_path = "best_model.pth"

In [7]:
num_epochs = 100
for epoch in range(num_epochs):
    running_loss = 0.0

    # Training loop
    for batch in train_loader:
        batch_past_values, batch_past_time_features, batch_past_observed_mask, batch_future_values, batch_future_time_features = batch
        
        # Forward pass
        outputs = model(
            past_values=batch_past_values,
            past_time_features=batch_past_time_features,
            past_observed_mask=batch_past_observed_mask,
            future_values=batch_future_values,
            future_time_features=batch_future_time_features
        )
        
        # Compute loss
        loss = outputs.loss
        
        # Backpropagation and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Accumulate training loss
        running_loss += loss.item()
    
    # Validation loop (without backpropagation)
    model.eval()  # Set the model to evaluation mode
    val_loss = 0.0
    with torch.no_grad():  # No gradient calculation during validation
        for batch in val_loader:
            batch_past_values, batch_past_time_features, batch_past_observed_mask, batch_future_values, batch_future_time_features = batch

            # Forward pass
            outputs = model(
                past_values=batch_past_values,
                past_time_features=batch_past_time_features,
                past_observed_mask=batch_past_observed_mask,
                future_values=batch_future_values,
                future_time_features=batch_future_time_features
            )
            
            # Compute validation loss
            val_loss += outputs.loss.item()

    # Calculate average losses
    avg_train_loss = running_loss / len(train_loader)
    avg_val_loss = val_loss / len(val_loader)

    # Print training and validation losses for the epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}")

    # Early stopping check and save the model if validation loss improves
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss  # Update best validation loss
        counter = 0  # Reset the counter if validation loss improves
        # Save the model
        torch.save(model.state_dict(), best_model_path)  # Save the model's state_dict to file
        print(f"Model saved at epoch {epoch+1} with validation loss {avg_val_loss:.4f}")
    else:
        counter += 1  # Increment the counter if validation loss does not improve
        if counter >= patience:
            print(f"Early stopping at epoch {epoch+1} due to no improvement in validation loss.")
            break  # Stop training if the validation loss increases for 'patience' epochs

Epoch [1/100], Training Loss: 22.2395, Validation Loss: 21.9111
Model saved at epoch 1 with validation loss 21.9111
Epoch [2/100], Training Loss: 21.5693, Validation Loss: 21.1813
Model saved at epoch 2 with validation loss 21.1813
Epoch [3/100], Training Loss: 20.8244, Validation Loss: 20.4788
Model saved at epoch 3 with validation loss 20.4788
Epoch [4/100], Training Loss: 20.2457, Validation Loss: 20.0373
Model saved at epoch 4 with validation loss 20.0373
Epoch [5/100], Training Loss: 19.9051, Validation Loss: 19.7739
Model saved at epoch 5 with validation loss 19.7739
Epoch [6/100], Training Loss: 19.6839, Validation Loss: 19.5867
Model saved at epoch 6 with validation loss 19.5867
Epoch [7/100], Training Loss: 19.5173, Validation Loss: 19.4385
Model saved at epoch 7 with validation loss 19.4385
Epoch [8/100], Training Loss: 19.3817, Validation Loss: 19.3138
Model saved at epoch 8 with validation loss 19.3138
Epoch [9/100], Training Loss: 19.2652, Validation Loss: 19.2051
Model sa

In [8]:
model.load_state_dict(torch.load(best_model_path))  # Load the saved best model checkpoint

<All keys matched successfully>

In [10]:
num_epochs = 200  # Set total number of epochs to 200

for epoch in range(100, num_epochs):  # Start from epoch 100 to 200
    running_loss = 0.0

    # Training loop (as before)
    for batch in train_loader:
        batch_past_values, batch_past_time_features, batch_past_observed_mask, batch_future_values, batch_future_time_features = batch
        
        # Forward pass
        outputs = model(
            past_values=batch_past_values,
            past_time_features=batch_past_time_features,
            past_observed_mask=batch_past_observed_mask,
            future_values=batch_future_values,
            future_time_features=batch_future_time_features
        )
        
        # Compute loss
        loss = outputs.loss
        
        # Backpropagation and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Accumulate training loss
        running_loss += loss.item()
    
    # Validation loop
    val_loss = 0.0
    with torch.no_grad():
        for batch in val_loader:
            batch_past_values, batch_past_time_features, batch_past_observed_mask, batch_future_values, batch_future_time_features = batch
            
            # Forward pass
            outputs = model(
                past_values=batch_past_values,
                past_time_features=batch_past_time_features,
                past_observed_mask=batch_past_observed_mask,
                future_values=batch_future_values,
                future_time_features=batch_future_time_features
            )
            
            # Compute validation loss
            val_loss += outputs.loss.item()
    
    # Calculate average losses
    avg_train_loss = running_loss / len(train_loader)
    avg_val_loss = val_loss / len(val_loader)

    # Print training and validation losses for the epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}")

    # Early stopping check and save the model if validation loss improves
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        counter = 0
        torch.save(model.state_dict(), best_model_path)
        print(f"Model saved at epoch {epoch+1} with validation loss {avg_val_loss:.4f}")
    else:
        counter += 1
        if counter >= patience:
            print(f"Early stopping at epoch {epoch+1} due to no improvement in validation loss.")
            break

Epoch [101/200], Training Loss: 13.7648, Validation Loss: 13.7374
Model saved at epoch 101 with validation loss 13.7374
Epoch [102/200], Training Loss: 13.7340, Validation Loss: 13.7082
Model saved at epoch 102 with validation loss 13.7082
Epoch [103/200], Training Loss: 13.7033, Validation Loss: 13.6767
Model saved at epoch 103 with validation loss 13.6767
Epoch [104/200], Training Loss: 13.6714, Validation Loss: 13.6477
Model saved at epoch 104 with validation loss 13.6477
Epoch [105/200], Training Loss: 13.6416, Validation Loss: 13.6199
Model saved at epoch 105 with validation loss 13.6199
Epoch [106/200], Training Loss: 13.6165, Validation Loss: 13.5898
Model saved at epoch 106 with validation loss 13.5898
Epoch [107/200], Training Loss: 13.5843, Validation Loss: 13.5601
Model saved at epoch 107 with validation loss 13.5601
Epoch [108/200], Training Loss: 13.5557, Validation Loss: 13.5331
Model saved at epoch 108 with validation loss 13.5331
Epoch [109/200], Training Loss: 13.5282,

In [11]:
num_epochs = 400  # Set total number of epochs to 200

for epoch in range(200, num_epochs):  # Start from epoch 100 to 200
    running_loss = 0.0

    # Training loop (as before)
    for batch in train_loader:
        batch_past_values, batch_past_time_features, batch_past_observed_mask, batch_future_values, batch_future_time_features = batch
        
        # Forward pass
        outputs = model(
            past_values=batch_past_values,
            past_time_features=batch_past_time_features,
            past_observed_mask=batch_past_observed_mask,
            future_values=batch_future_values,
            future_time_features=batch_future_time_features
        )
        
        # Compute loss
        loss = outputs.loss
        
        # Backpropagation and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Accumulate training loss
        running_loss += loss.item()
    
    # Validation loop
    val_loss = 0.0
    with torch.no_grad():
        for batch in val_loader:
            batch_past_values, batch_past_time_features, batch_past_observed_mask, batch_future_values, batch_future_time_features = batch
            
            # Forward pass
            outputs = model(
                past_values=batch_past_values,
                past_time_features=batch_past_time_features,
                past_observed_mask=batch_past_observed_mask,
                future_values=batch_future_values,
                future_time_features=batch_future_time_features
            )
            
            # Compute validation loss
            val_loss += outputs.loss.item()
    
    # Calculate average losses
    avg_train_loss = running_loss / len(train_loader)
    avg_val_loss = val_loss / len(val_loader)

    # Print training and validation losses for the epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}")

    # Early stopping check and save the model if validation loss improves
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        counter = 0
        torch.save(model.state_dict(), best_model_path)
        print(f"Model saved at epoch {epoch+1} with validation loss {avg_val_loss:.4f}")
    else:
        counter += 1
        if counter >= patience:
            print(f"Early stopping at epoch {epoch+1} due to no improvement in validation loss.")
            break

Epoch [201/400], Training Loss: 12.0642, Validation Loss: 12.0632
Model saved at epoch 201 with validation loss 12.0632
Epoch [202/400], Training Loss: 12.0601, Validation Loss: 12.0337
Model saved at epoch 202 with validation loss 12.0337
Epoch [203/400], Training Loss: 12.0428, Validation Loss: 12.0337
Model saved at epoch 203 with validation loss 12.0337
Epoch [204/400], Training Loss: 12.0363, Validation Loss: 12.0202
Model saved at epoch 204 with validation loss 12.0202
Epoch [205/400], Training Loss: 12.0288, Validation Loss: 12.0275
Epoch [206/400], Training Loss: 12.0174, Validation Loss: 11.9967
Model saved at epoch 206 with validation loss 11.9967
Epoch [207/400], Training Loss: 12.0056, Validation Loss: 11.9903
Model saved at epoch 207 with validation loss 11.9903
Epoch [208/400], Training Loss: 12.0023, Validation Loss: 11.9777
Model saved at epoch 208 with validation loss 11.9777
Epoch [209/400], Training Loss: 11.9906, Validation Loss: 11.9697
Model saved at epoch 209 wit

In [12]:
num_epochs = 2000  # Set total number of epochs to 2000

for epoch in range(400, num_epochs):  # Start from epoch 100 to 200
    running_loss = 0.0

    # Training loop (as before)
    for batch in train_loader:
        batch_past_values, batch_past_time_features, batch_past_observed_mask, batch_future_values, batch_future_time_features = batch
        
        # Forward pass
        outputs = model(
            past_values=batch_past_values,
            past_time_features=batch_past_time_features,
            past_observed_mask=batch_past_observed_mask,
            future_values=batch_future_values,
            future_time_features=batch_future_time_features
        )
        
        # Compute loss
        loss = outputs.loss
        
        # Backpropagation and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Accumulate training loss
        running_loss += loss.item()
    
    # Validation loop
    val_loss = 0.0
    with torch.no_grad():
        for batch in val_loader:
            batch_past_values, batch_past_time_features, batch_past_observed_mask, batch_future_values, batch_future_time_features = batch
            
            # Forward pass
            outputs = model(
                past_values=batch_past_values,
                past_time_features=batch_past_time_features,
                past_observed_mask=batch_past_observed_mask,
                future_values=batch_future_values,
                future_time_features=batch_future_time_features
            )
            
            # Compute validation loss
            val_loss += outputs.loss.item()
    
    # Calculate average losses
    avg_train_loss = running_loss / len(train_loader)
    avg_val_loss = val_loss / len(val_loader)

    # Print training and validation losses for the epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {avg_train_loss:.4f}, Validation Loss: {avg_val_loss:.4f}")

    # Early stopping check and save the model if validation loss improves
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        counter = 0
        torch.save(model.state_dict(), best_model_path)
        print(f"Model saved at epoch {epoch+1} with validation loss {avg_val_loss:.4f}")
    else:
        counter += 1
        if counter >= patience:
            print(f"Early stopping at epoch {epoch+1} due to no improvement in validation loss.")
            break

Epoch [401/2000], Training Loss: 11.4466, Validation Loss: 11.4165
Model saved at epoch 401 with validation loss 11.4165
Epoch [402/2000], Training Loss: 11.4465, Validation Loss: 11.4120
Model saved at epoch 402 with validation loss 11.4120
Epoch [403/2000], Training Loss: 11.4356, Validation Loss: 11.4574
Epoch [404/2000], Training Loss: 11.4376, Validation Loss: 11.4184
Epoch [405/2000], Training Loss: 11.4258, Validation Loss: 11.4711
Epoch [406/2000], Training Loss: 11.4212, Validation Loss: 11.3940
Model saved at epoch 406 with validation loss 11.3940
Epoch [407/2000], Training Loss: 11.4155, Validation Loss: 11.4345
Epoch [408/2000], Training Loss: 11.4236, Validation Loss: 11.4039
Epoch [409/2000], Training Loss: 11.4142, Validation Loss: 11.3929
Model saved at epoch 409 with validation loss 11.3929
Epoch [410/2000], Training Loss: 11.4076, Validation Loss: 11.4598
Epoch [411/2000], Training Loss: 11.4035, Validation Loss: 11.3833
Model saved at epoch 411 with validation loss 1