In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BartTokenizer, BartModel
from torch.optim import AdamW
from torch import nn
import os
from tqdm import tqdm

# Set random seed for reproducibility
torch.manual_seed(77)
np.random.seed(77)

# Step 1: Load and preprocess the data
def load_and_preprocess_data(file_path, is_test=False):
    df = pd.read_csv(file_path)
    print(f"Loaded data with {len(df)} rows and {len(df.columns)} columns")


    # Map test dataset columns to training feature names
    df.drop(columns = 'horse', inplace =True)

    column_mapping = {
        'horse_name': 'horse',
        'surface_code': 'surface_x',
        'purse': 'purse_x',
        'odds': 'dollar_odds',
        'sex': 'sex_code'
    }
    df = df.rename(columns=column_mapping)

    # Convert distance from yards to furlongs (1 furlong = 220 yards)
    if 'distance' in df.columns:
        df['distance_f'] = df['distance'] / 220.0

    # Define features
    features = [
        'horse', 'jockey', 'trainer', 'program_number', 'surface_x', 'distance_f', 'purse_x',
        'track_code', 'race_date', 'race_number', 'race_type', 'weight', 'age', 'sex_code',
        'dollar_odds', 'recent_finish_pos_1', 'recent_lengths_back_finish_1', 'recent_purse_1',
        'recent_finish_pos_2', 'recent_lengths_back_finish_2', 'recent_purse_2',
        'recent_finish_pos_3', 'recent_lengths_back_finish_3', 'recent_purse_3',
        'recent_finish_pos_4', 'recent_lengths_back_finish_4', 'recent_purse_4',
        'recent_finish_pos_5', 'recent_lengths_back_finish_5', 'recent_purse_5'
    ]
    targets = ['official_finish', 'speed_rating', 'win_time']

    # Add missing features for test dataset
    for i in range(1, 6):
        finish_pos_col = f'recent_finish_pos_{i}'
        lengths_col = f'recent_lengths_back_finish_{i}'
        purse_col = f'recent_purse_{i}'
        if finish_pos_col not in df.columns:
            df[finish_pos_col] = df.get(f'recentFinishPosition{i}', -1)
        if lengths_col not in df.columns:
            df[lengths_col] = -1
        if purse_col not in df.columns:
            df[purse_col] = -1

    # Drop rows with missing target values (only for training data)
    if not is_test and all(col in df.columns for col in targets):
        df = df.dropna(subset=targets).copy()
        print(f"After dropping NaN targets: {len(df)} rows")


    # Handle missing values
    numerical_cols = [
        'distance_f', 'purse_x', 'dollar_odds', 'weight', 'age',
        'recent_finish_pos_1', 'recent_lengths_back_finish_1', 'recent_purse_1',
        'recent_finish_pos_2', 'recent_lengths_back_finish_2', 'recent_purse_2',
        'recent_finish_pos_3', 'recent_lengths_back_finish_3', 'recent_purse_3',
        'recent_finish_pos_4', 'recent_lengths_back_finish_4', 'recent_purse_4',
        'recent_finish_pos_5', 'recent_lengths_back_finish_5', 'recent_purse_5'
    ]
    for col in features:
        if col in df.columns:
            # Use df[col].dtype instead of df[col].dtypes
            # Check if the column's dtype is a numerical type
            if pd.api.types.is_numeric_dtype(df[col]): # Corrected to check the column's dtype
                df[col] = df[col].fillna(-1)
            else:
                df[col] = df[col].fillna('unknown')
        else:
            df[col] = -1 if col in numerical_cols else 'unknown'




    # Normalize numerical features
    scaler = MinMaxScaler()
    for col in numerical_cols:
        if col not in df.columns:
            df[col] = -1
    df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

    # Create text sequences
    def create_text_sequence(row):
        text = (
            f"Horse: {row['horse']}, Jockey: {row['jockey']}, Trainer: {row['trainer']}, "
            f"Program: {row['program_number']}, Surface: {row['surface_x']}, "
            f"Distance: {row['distance_f']:.2f} furlongs, Purse: {row['purse_x']:.2f}, "
            f"Track: {row['track_code']}, Date: {row['race_date']}, Race: {row['race_number']}, "
            f"Type: {row['race_type']}, Weight: {row['weight']:.2f}, Age: {row['age']:.2f}, "
            f"Sex: {row['sex_code']}, Odds: {row['dollar_odds']:.2f}, "
            f"Recent Finish 1: {row['recent_finish_pos_1']:.2f}, Recent Lengths 1: {row['recent_lengths_back_finish_1']:.2f}, "
            f"Recent Purse 1: {row['recent_purse_1']:.2f}, "
            f"Recent Finish 2: {row['recent_finish_pos_2']:.2f}, Recent Lengths 2: {row['recent_lengths_back_finish_2']:.2f}, "
            f"Recent Purse 2: {row['recent_purse_2']:.2f}, "
            f"Recent Finish 3: {row['recent_finish_pos_3']:.2f}, Recent Lengths 3: {row['recent_lengths_back_finish_3']:.2f}, "
            f"Recent Purse 3: {row['recent_purse_3']:.2f}, "
            f"Recent Finish 4: {row['recent_finish_pos_4']:.2f}, Recent Lengths 4: {row['recent_lengths_back_finish_4']:.2f}, "
            f"Recent Purse 4: {row['recent_purse_4']:.2f}, "
            f"Recent Finish 5: {row['recent_finish_pos_5']:.2f}, Recent Lengths 5: {row['recent_lengths_back_finish_5']:.2f}, "
            f"Recent Purse 5: {row['recent_purse_5']:.2f}"
        )
        return text
    df['input_text'] = df.apply(create_text_sequence, axis=1)
    if not is_test and all(col in df.columns for col in targets):
        df['target_text'] = df.apply(
            lambda row: f"Finish: {row['official_finish']}, Speed: {row['speed_rating']}, Time: {row['win_time']}", axis=1
        )
    else:
        df['target_text'] = 'unknown'  # Placeholder for test data without targets

    # Preserve necessary columns
    columns_to_keep = ['input_text', 'target_text', 'horse', 'program_number', 'race_number'] + (targets if all(col in df.columns for col in targets) else [])
    return df[columns_to_keep], scaler # Modified to return necessary columns


# Step 2: Custom Dataset
class RaceDataset(Dataset):
    def __init__(self, df, tokenizer, max_length=512):
        self.df = df
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.has_targets = all(col in df.columns for col in ['official_finish', 'speed_rating', 'win_time'])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        input_text = self.df.iloc[idx]['input_text']
        target_text = self.df.iloc[idx]['target_text']
        targets = self.df.iloc[idx][['official_finish', 'speed_rating', 'win_time']].values.astype(float) if self.has_targets else np.zeros(3)

        # Tokenize input
        input_encoding = self.tokenizer(
            input_text,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        # Tokenize target (for decoder input)
        target_encoding = self.tokenizer(
            target_text,
            max_length=32,
            padding='max_length',
            truncation=True,
            return_tensors='pt'
        )

        return {
            'input_ids': input_encoding['input_ids'].squeeze(),
            'attention_mask': input_encoding['attention_mask'].squeeze(),
            'decoder_input_ids': target_encoding['input_ids'].squeeze(),
            'decoder_attention_mask': target_encoding['attention_mask'].squeeze(),
            'labels': torch.tensor(targets, dtype=torch.float) if self.has_targets else torch.zeros(3, dtype=torch.float)
        }

# Step 3: Custom BART Model for Regression
class BartForRegression(nn.Module):
    def __init__(self, bart_model):
        super(BartForRegression, self).__init__()
        self.bart = bart_model
        self.regression_head = nn.Linear(self.bart.config.d_model, 3)  # 3 outputs: official_finish, speed_rating, win_time

    def forward(self, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask):
        outputs = self.bart(
            input_ids=input_ids,
            attention_mask=attention_mask,
            decoder_input_ids=decoder_input_ids,
            decoder_attention_mask=decoder_attention_mask
        )
        last_hidden_state = outputs.last_hidden_state[:, 0, :]
        predictions = self.regression_head(last_hidden_state)
        return predictions

# Step 4: Training Loop
def train_model(model, train_loader, val_loader, device, epochs=5):
    optimizer = AdamW(model.parameters(), lr=5e-5)
    best_val_loss = float('inf')
    patience = 2
    patience_counter = 0

    for epoch in tqdm(range(epochs), desc = 'Epoch Training' ):
        model.train()
        train_loss = 0
        for k, batch in enumerate(train_loader):
            print('batch: ', k,'/',len(train_loader))
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            decoder_input_ids = batch['decoder_input_ids'].to(device)
            decoder_attention_mask = batch['decoder_attention_mask'].to(device)
            labels = batch['labels'].to(device)

            optimizer.zero_grad()
            predictions = model(input_ids, attention_mask, decoder_input_ids, decoder_attention_mask)
            loss = nn.MSELoss()(predictions, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        train_loss /= len(train_loader)
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}")

        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                decoder_input_ids = batch['decoder_input_ids'].to(device)
                decoder_attention_mask = batch['decoder_attention_mask'].to(device)
                labels = batch['labels'].to(device)

                predictions = model(input_ids, attention_mask, decoder_input_ids, decoder_attention_mask)
                loss = nn.MSELoss()(predictions, labels)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        print(f"Epoch {epoch+1}, Val Loss: {val_loss:.4f}")

        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), 'best_bart_model.pt')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered")
                break

# Step 5: Evaluate on Test Set
def evaluate_model(model, test_file_or_df, tokenizer, scaler, device, is_external_test=False):
    # Load test data if a file path is provided
    if isinstance(test_file_or_df, str):
        test_df, _ = load_and_preprocess_data(test_file_or_df, is_test=True)
    else:
        test_df = test_file_or_df

    test_dataset = RaceDataset(test_df, tokenizer)
    test_loader = DataLoader(test_dataset, batch_size=8)

    model.eval()
    predictions = []
    actuals = []

    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            decoder_input_ids = batch['decoder_input_ids'].to(device)
            decoder_attention_mask = batch['decoder_attention_mask'].to(device)
            labels = batch['labels'].to(device)

            preds = model(input_ids, attention_mask, decoder_input_ids, decoder_attention_mask)
            predictions.append(preds.cpu().numpy())
            if test_dataset.has_targets:
                actuals.append(labels.cpu().numpy())

    predictions = np.vstack(predictions)
    output_file = 'external_test_predictions.csv' if is_external_test else 'internal_test_predictions.csv'

    if test_dataset.has_targets:
        actuals = np.vstack(actuals)
        mse = mean_squared_error(actuals, predictions, multioutput='raw_values')
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(actuals, predictions, multioutput='raw_values')

        print("\nTest Set Evaluation Metrics:")
        for i, metric in enumerate(['Official Finish', 'Speed Rating', 'Win Time']):
            print(f"{metric}:")
            print(f"  MSE: {mse[i]:.4f}")
            print(f"  RMSE: {rmse[i]:.4f}")
            print(f"  MAE: {mae[i]:.4f}")

        results = pd.DataFrame({
            'actual_finish': actuals[:, 0],
            'predicted_finish': predictions[:, 0],
            'actual_speed': actuals[:, 1],
            'predicted_speed': predictions[:, 1],
            'actual_time': actuals[:, 2],
            'predicted_time': predictions[:, 2],
            'horse': test_df['horse'],
            'program_number': test_df['program_number'],
            'race_number': test_df['race_number']
        })
    else:
        print("\nNo target columns in test data. Saving predictions only.")
        results = pd.DataFrame({
            'predicted_finish': predictions[:, 0],
            'predicted_speed': predictions[:, 1],
            'predicted_time': predictions[:, 2],
            'horse': test_df['horse'],
            'program_number': test_df['program_number'],
            'race_number': test_df['race_number']
        })

    results.to_csv(output_file, index=False)
    print(f"\nTest predictions saved to '{output_file}'")
    return predictions, actuals if test_dataset.has_targets else None

In [2]:
train_file = 'merged_race_data_20250509_modified.csv'
external_test_file = '/content/CDX0426_filtered.csv'

# Load and preprocess
df, scaler = load_and_preprocess_data(train_file)

Loaded data with 1757 rows and 122 columns
After dropping NaN targets: 1481 rows


In [3]:
train_val_df, test_df = train_test_split(df, test_size=0.15, random_state=77)
train_df, val_df = train_test_split(train_val_df, test_size=0.1765, random_state=77)  # 0.1765 ≈ 0.15/(1-0.15)
print(f"Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}")


Train: 1035, Val: 223, Test: 223


In [4]:
  # Initialize tokenizer and model
tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')
bart_model = BartModel.from_pretrained('facebook/bart-base')
model = BartForRegression(bart_model)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


BartForRegression(
  (bart): BartModel(
    (shared): BartScaledWordEmbedding(50265, 768, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): BartScaledWordEmbedding(50265, 768, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 768)
      (layers): ModuleList(
        (0-5): 6 x BartEncoderLayer(
          (self_attn): BartSdpaAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=True)
            (v_proj): Linear(in_features=768, out_features=768, bias=True)
            (q_proj): Linear(in_features=768, out_features=768, bias=True)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (final_layer_norm): La

In [5]:
train_dataset = RaceDataset(train_df, tokenizer)
val_dataset = RaceDataset(val_df, tokenizer)
test_dataset = RaceDataset(test_df, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64)
test_loader = DataLoader(test_dataset, batch_size=64)

In [6]:
train_model(model, train_loader, val_loader, device, epochs=100)

Epoch Training:   0%|          | 0/100 [00:00<?, ?it/s]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 1, Train Loss: 3519.8296
Epoch 1, Val Loss: 2987.5911


Epoch Training:   1%|          | 1/100 [01:06<1:49:24, 66.30s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 2, Train Loss: 2821.9818
Epoch 2, Val Loss: 2791.2872


Epoch Training:   2%|▏         | 2/100 [02:15<1:50:54, 67.90s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 3, Train Loss: 2699.2934
Epoch 3, Val Loss: 2694.3449


Epoch Training:   3%|▎         | 3/100 [03:24<1:51:02, 68.68s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 4, Train Loss: 2560.5677
Epoch 4, Val Loss: 2605.0007


Epoch Training:   4%|▍         | 4/100 [05:19<2:18:47, 86.75s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 5, Train Loss: 2449.1194
Epoch 5, Val Loss: 2518.0794


Epoch Training:   5%|▌         | 5/100 [06:34<2:10:49, 82.62s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 6, Train Loss: 2362.2235
Epoch 6, Val Loss: 2433.2755


Epoch Training:   6%|▌         | 6/100 [07:45<2:02:55, 78.46s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 7, Train Loss: 2269.6188
Epoch 7, Val Loss: 2351.0057


Epoch Training:   7%|▋         | 7/100 [09:00<1:59:52, 77.34s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 8, Train Loss: 2223.3305
Epoch 8, Val Loss: 2267.2156


Epoch Training:   8%|▊         | 8/100 [10:10<1:55:07, 75.09s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 9, Train Loss: 2103.9626
Epoch 9, Val Loss: 2185.0050


Epoch Training:   9%|▉         | 9/100 [11:20<1:51:35, 73.57s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 10, Train Loss: 2005.2342
Epoch 10, Val Loss: 2104.9087


Epoch Training:  10%|█         | 10/100 [12:30<1:48:39, 72.43s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 11, Train Loss: 1928.1589
Epoch 11, Val Loss: 2027.8267


Epoch Training:  11%|█         | 11/100 [13:40<1:46:25, 71.75s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 12, Train Loss: 1864.2482
Epoch 12, Val Loss: 1950.9089


Epoch Training:  12%|█▏        | 12/100 [14:51<1:44:41, 71.38s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 13, Train Loss: 1793.8119
Epoch 13, Val Loss: 1875.0674


Epoch Training:  13%|█▎        | 13/100 [16:06<1:45:02, 72.44s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 14, Train Loss: 1722.2325
Epoch 14, Val Loss: 1802.4011


Epoch Training:  14%|█▍        | 14/100 [17:21<1:45:16, 73.44s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 15, Train Loss: 1648.0719
Epoch 15, Val Loss: 1730.5142


Epoch Training:  15%|█▌        | 15/100 [18:32<1:42:41, 72.49s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 16, Train Loss: 1595.2971
Epoch 16, Val Loss: 1660.8503


Epoch Training:  16%|█▌        | 16/100 [19:47<1:42:50, 73.46s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 17, Train Loss: 1529.2385
Epoch 17, Val Loss: 1593.6407


Epoch Training:  17%|█▋        | 17/100 [20:57<1:40:04, 72.34s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 18, Train Loss: 1457.7164
Epoch 18, Val Loss: 1527.0044


Epoch Training:  18%|█▊        | 18/100 [22:07<1:37:52, 71.61s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 19, Train Loss: 1382.0902
Epoch 19, Val Loss: 1463.1737


Epoch Training:  19%|█▉        | 19/100 [23:22<1:38:01, 72.61s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 20, Train Loss: 1322.1540
Epoch 20, Val Loss: 1402.6795


Epoch Training:  20%|██        | 20/100 [24:32<1:35:55, 71.95s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 21, Train Loss: 1272.8630
Epoch 21, Val Loss: 1343.6499


Epoch Training:  21%|██        | 21/100 [25:42<1:33:49, 71.26s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 22, Train Loss: 1228.7702
Epoch 22, Val Loss: 1285.4503


Epoch Training:  22%|██▏       | 22/100 [26:52<1:32:02, 70.80s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 23, Train Loss: 1164.4398
Epoch 23, Val Loss: 1230.0309


Epoch Training:  23%|██▎       | 23/100 [28:01<1:30:25, 70.46s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 24, Train Loss: 1121.0097
Epoch 24, Val Loss: 1177.0064


Epoch Training:  24%|██▍       | 24/100 [29:11<1:28:57, 70.23s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 25, Train Loss: 1055.3458
Epoch 25, Val Loss: 1126.3724


Epoch Training:  25%|██▌       | 25/100 [30:26<1:29:31, 71.61s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 26, Train Loss: 1020.4011
Epoch 26, Val Loss: 1077.7220


Epoch Training:  26%|██▌       | 26/100 [31:37<1:27:56, 71.31s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 27, Train Loss: 971.6829
Epoch 27, Val Loss: 1031.4314


Epoch Training:  27%|██▋       | 27/100 [32:50<1:27:39, 72.05s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 28, Train Loss: 932.9011
Epoch 28, Val Loss: 986.9223


Epoch Training:  28%|██▊       | 28/100 [34:00<1:25:41, 71.40s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 29, Train Loss: 891.9329
Epoch 29, Val Loss: 948.8464


Epoch Training:  29%|██▉       | 29/100 [35:10<1:23:53, 70.89s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 30, Train Loss: 862.1367
Epoch 30, Val Loss: 907.0513


Epoch Training:  30%|███       | 30/100 [36:19<1:22:13, 70.49s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 31, Train Loss: 802.2879
Epoch 31, Val Loss: 872.7259


Epoch Training:  31%|███       | 31/100 [37:33<1:21:57, 71.27s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 32, Train Loss: 753.6618
Epoch 32, Val Loss: 836.0000


Epoch Training:  32%|███▏      | 32/100 [38:42<1:20:04, 70.66s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 33, Train Loss: 724.4825
Epoch 33, Val Loss: 798.3639


Epoch Training:  33%|███▎      | 33/100 [39:54<1:19:35, 71.27s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 34, Train Loss: 690.2071
Epoch 34, Val Loss: 763.5360


Epoch Training:  34%|███▍      | 34/100 [41:04<1:17:52, 70.80s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 35, Train Loss: 664.6565
Epoch 35, Val Loss: 741.1872


Epoch Training:  35%|███▌      | 35/100 [42:14<1:16:28, 70.60s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 36, Train Loss: 631.3660
Epoch 36, Val Loss: 701.3163


Epoch Training:  36%|███▌      | 36/100 [43:28<1:16:11, 71.43s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 37, Train Loss: 587.2476
Epoch 37, Val Loss: 687.0123


Epoch Training:  37%|███▋      | 37/100 [44:39<1:14:54, 71.34s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 38, Train Loss: 558.4833


Epoch Training:  38%|███▊      | 38/100 [45:41<1:10:52, 68.59s/it]

Epoch 38, Val Loss: 721.6355
batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 39, Train Loss: 536.4359
Epoch 39, Val Loss: 617.3776


Epoch Training:  39%|███▉      | 39/100 [46:51<1:10:05, 68.94s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 40, Train Loss: 524.7570
Epoch 40, Val Loss: 610.4154


Epoch Training:  40%|████      | 40/100 [48:06<1:10:44, 70.75s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 41, Train Loss: 485.2920
Epoch 41, Val Loss: 573.9823


Epoch Training:  41%|████      | 41/100 [49:16<1:09:29, 70.68s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 42, Train Loss: 455.4939
Epoch 42, Val Loss: 558.1944


Epoch Training:  42%|████▏     | 42/100 [50:26<1:07:59, 70.34s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 43, Train Loss: 441.1056
Epoch 43, Val Loss: 554.4842


Epoch Training:  43%|████▎     | 43/100 [51:35<1:06:38, 70.15s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 44, Train Loss: 412.8643
Epoch 44, Val Loss: 520.4083


Epoch Training:  44%|████▍     | 44/100 [52:45<1:05:19, 69.99s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 45, Train Loss: 393.0932
Epoch 45, Val Loss: 501.3279


Epoch Training:  45%|████▌     | 45/100 [53:55<1:04:02, 69.87s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 46, Train Loss: 376.8095
Epoch 46, Val Loss: 488.3748


Epoch Training:  46%|████▌     | 46/100 [55:14<1:05:19, 72.59s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 47, Train Loss: 366.0553
Epoch 47, Val Loss: 472.1149


Epoch Training:  47%|████▋     | 47/100 [56:24<1:03:30, 71.90s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 48, Train Loss: 346.3002
Epoch 48, Val Loss: 465.1018


Epoch Training:  48%|████▊     | 48/100 [57:35<1:02:02, 71.59s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 49, Train Loss: 328.2370
Epoch 49, Val Loss: 454.4396


Epoch Training:  49%|████▉     | 49/100 [58:45<1:00:26, 71.11s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 50, Train Loss: 321.1124
Epoch 50, Val Loss: 431.6397


Epoch Training:  50%|█████     | 50/100 [59:55<59:05, 70.90s/it]  

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 51, Train Loss: 297.7093
Epoch 51, Val Loss: 420.3298


Epoch Training:  51%|█████     | 51/100 [1:01:05<57:33, 70.48s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 52, Train Loss: 288.4898
Epoch 52, Val Loss: 405.7594


Epoch Training:  52%|█████▏    | 52/100 [1:02:14<56:12, 70.27s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 53, Train Loss: 281.9552


Epoch Training:  53%|█████▎    | 53/100 [1:03:17<53:08, 67.84s/it]

Epoch 53, Val Loss: 407.7028
batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 54, Train Loss: 259.6474
Epoch 54, Val Loss: 381.1857


Epoch Training:  54%|█████▍    | 54/100 [1:04:28<52:43, 68.77s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 55, Train Loss: 250.2934
Epoch 55, Val Loss: 366.0383


Epoch Training:  55%|█████▌    | 55/100 [1:05:37<51:42, 68.94s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 56, Train Loss: 247.3592


Epoch Training:  56%|█████▌    | 56/100 [1:06:39<49:07, 66.98s/it]

Epoch 56, Val Loss: 374.9276
batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 57, Train Loss: 235.0807
Epoch 57, Val Loss: 353.4031


Epoch Training:  57%|█████▋    | 57/100 [1:07:49<48:29, 67.65s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 58, Train Loss: 223.2078
Epoch 58, Val Loss: 338.1395


Epoch Training:  58%|█████▊    | 58/100 [1:09:02<48:31, 69.32s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 59, Train Loss: 217.5192


Epoch Training:  59%|█████▉    | 59/100 [1:10:04<45:56, 67.22s/it]

Epoch 59, Val Loss: 349.8461
batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 60, Train Loss: 207.1370
Epoch 60, Val Loss: 332.3684


Epoch Training:  60%|██████    | 60/100 [1:11:14<45:17, 67.95s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 61, Train Loss: 206.4195
Epoch 61, Val Loss: 329.6123


Epoch Training:  61%|██████    | 61/100 [1:12:23<44:30, 68.47s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 62, Train Loss: 192.9927
Epoch 62, Val Loss: 295.7678


Epoch Training:  62%|██████▏   | 62/100 [1:13:38<44:33, 70.35s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 63, Train Loss: 189.0661


Epoch Training:  63%|██████▎   | 63/100 [1:14:41<41:55, 67.99s/it]

Epoch 63, Val Loss: 309.9224
batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 64, Train Loss: 176.7928
Epoch 64, Val Loss: 293.6374


Epoch Training:  64%|██████▍   | 64/100 [1:15:50<41:04, 68.46s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 65, Train Loss: 167.8367
Epoch 65, Val Loss: 289.0398


Epoch Training:  65%|██████▌   | 65/100 [1:17:00<40:08, 68.83s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 66, Train Loss: 159.1317
Epoch 66, Val Loss: 273.6692


Epoch Training:  66%|██████▌   | 66/100 [1:18:10<39:11, 69.17s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 67, Train Loss: 150.7154


Epoch Training:  67%|██████▋   | 67/100 [1:19:12<36:56, 67.16s/it]

Epoch 67, Val Loss: 274.0427
batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 68, Train Loss: 156.5345
Epoch 68, Val Loss: 262.6877


Epoch Training:  68%|██████▊   | 68/100 [1:20:23<36:21, 68.18s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 69, Train Loss: 138.7344
Epoch 69, Val Loss: 255.5255


Epoch Training:  69%|██████▉   | 69/100 [1:21:32<35:24, 68.53s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 70, Train Loss: 139.5070


Epoch Training:  70%|███████   | 70/100 [1:22:34<33:19, 66.65s/it]

Epoch 70, Val Loss: 260.0246
batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 71, Train Loss: 139.4786
Epoch 71, Val Loss: 246.0770


Epoch Training:  71%|███████   | 71/100 [1:23:49<33:21, 69.02s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 72, Train Loss: 123.3705


Epoch Training:  72%|███████▏  | 72/100 [1:24:52<31:17, 67.07s/it]

Epoch 72, Val Loss: 252.5586
batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 73, Train Loss: 125.8987
Epoch 73, Val Loss: 234.1427


Epoch Training:  73%|███████▎  | 73/100 [1:26:01<30:32, 67.85s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 74, Train Loss: 122.1429


Epoch Training:  74%|███████▍  | 74/100 [1:27:04<28:41, 66.23s/it]

Epoch 74, Val Loss: 236.4677
batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 75, Train Loss: 113.2009
Epoch 75, Val Loss: 231.1497


Epoch Training:  75%|███████▌  | 75/100 [1:28:13<28:00, 67.20s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 76, Train Loss: 109.5499
Epoch 76, Val Loss: 222.8310


Epoch Training:  76%|███████▌  | 76/100 [1:29:24<27:16, 68.18s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 77, Train Loss: 103.6107
Epoch 77, Val Loss: 217.9930


Epoch Training:  77%|███████▋  | 77/100 [1:30:33<26:19, 68.69s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 78, Train Loss: 104.5631
Epoch 78, Val Loss: 215.4954


Epoch Training:  78%|███████▊  | 78/100 [1:31:44<25:24, 69.29s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 79, Train Loss: 97.4253


Epoch Training:  79%|███████▉  | 79/100 [1:32:46<23:30, 67.16s/it]

Epoch 79, Val Loss: 216.1969
batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 80, Train Loss: 98.1036
Epoch 80, Val Loss: 208.1152


Epoch Training:  80%|████████  | 80/100 [1:33:56<22:39, 68.00s/it]

batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 81, Train Loss: 91.8575


Epoch Training:  81%|████████  | 81/100 [1:34:58<20:58, 66.26s/it]

Epoch 81, Val Loss: 213.8422
batch:  0 / 17
batch:  1 / 17
batch:  2 / 17
batch:  3 / 17
batch:  4 / 17
batch:  5 / 17
batch:  6 / 17
batch:  7 / 17
batch:  8 / 17
batch:  9 / 17
batch:  10 / 17
batch:  11 / 17
batch:  12 / 17
batch:  13 / 17
batch:  14 / 17
batch:  15 / 17
batch:  16 / 17
Epoch 82, Train Loss: 93.0675


Epoch Training:  81%|████████  | 81/100 [1:36:00<22:31, 71.12s/it]

Epoch 82, Val Loss: 208.2762
Early stopping triggered





In [10]:
torch.save(model.state_dict(), 'best_bart_model_manually_epoch_81.pt') # When manually stop run this code

In [11]:
model.load_state_dict(torch.load('best_bart_model.pt'))
model.to(device)

BartForRegression(
  (bart): BartModel(
    (shared): BartScaledWordEmbedding(50265, 768, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): BartScaledWordEmbedding(50265, 768, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 768)
      (layers): ModuleList(
        (0-5): 6 x BartEncoderLayer(
          (self_attn): BartSdpaAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=True)
            (v_proj): Linear(in_features=768, out_features=768, bias=True)
            (q_proj): Linear(in_features=768, out_features=768, bias=True)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (final_layer_norm): La

In [12]:
# Evaluate on internal test set
print("\nEvaluating on internal test set from training data...")
evaluate_model(model, test_df, tokenizer, scaler, device, is_external_test=False)



print("\nModel trained, evaluated, and predictions saved.")


Evaluating on internal test set from training data...

Test Set Evaluation Metrics:
Official Finish:
  MSE: 8.8168
  RMSE: 2.9693
  MAE: 2.4076
Speed Rating:
  MSE: 379.7609
  RMSE: 19.4875
  MAE: 15.0905
Win Time:
  MSE: 149.7183
  RMSE: 12.2359
  MAE: 6.6227

Test predictions saved to 'internal_test_predictions.csv'

Model trained, evaluated, and predictions saved.
