In [1]:
# import pyarrow

import pandas as pd
import numpy as np

from scipy.stats import pearsonr
from tqdm.notebook import trange, tqdm

import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch import optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

# 1. Clean Data

In [2]:
# Access data
parquet_file = 'data/jane-street-real-time-market-data-forecasting/train.parquet/partition_id=0/part-0.parquet'
df = pd.read_parquet(parquet_file, engine="pyarrow")

In [3]:
# combine date_id and time_id to create a time-based sequence variable
shrink = 10000
id = (df['date_id']/shrink)*(max(df['time_id'])+1) + df['time_id']/shrink # the base is max(df['time_id'])+1; similar to base 10 system
df['seq_id'] = id
df = df.reindex(columns=['seq_id'] + list(df.columns[:-1]))
df.drop(columns=['date_id', 'time_id'], inplace=True)

In [4]:
# Normalize and impute NaN with -1 after min/max norm
def minmax_neg1nan(df):
    # Find the columns not containing "id" in their name
    norm_columns = [col for col in df.columns if "id" not in col]
    norm_columns.remove('weight') # probably good to keep weight as is

    # Min-Max Normalisation to [0,1] for non-id nor weight columns
    df[norm_columns] = df[norm_columns].apply(lambda col: (col - col.min()) / (col.max() - col.min()))

    # fill NaN values with -1
    df.fillna(-1, inplace=True)

    return df
df = minmax_neg1nan(df)
df.head()

Unnamed: 0,seq_id,symbol_id,weight,feature_00,feature_01,feature_02,feature_03,feature_04,feature_05,feature_06,...,feature_78,responder_0,responder_1,responder_2,responder_3,responder_4,responder_5,responder_6,responder_7,responder_8
0,0.0,1,3.889038,-1.0,-1.0,-1.0,-1.0,-1.0,0.465815,0.514922,...,0.066075,0.573849,0.493044,0.638087,0.700535,0.518602,0.621837,0.577598,0.5347,0.50955
1,0.0,7,1.370613,-1.0,-1.0,-1.0,-1.0,-1.0,0.459389,0.513077,...,0.06563,0.796589,0.619008,0.4476,0.884992,0.762698,1.0,0.570367,0.521668,0.577864
2,0.0,9,2.285698,-1.0,-1.0,-1.0,-1.0,-1.0,0.473393,0.513791,...,0.069997,0.413551,0.47197,0.46733,0.537578,0.627129,0.509979,0.710935,0.567088,0.577283
3,0.0,10,0.690606,-1.0,-1.0,-1.0,-1.0,-1.0,0.476461,0.515538,...,0.065762,0.54085,0.522399,0.729489,0.609744,0.622587,0.622538,0.611414,0.57752,0.362048
4,0.0,14,0.44057,-1.0,-1.0,-1.0,-1.0,-1.0,0.469661,0.515316,...,0.144631,0.462661,0.449724,0.465198,0.107185,0.340863,0.0,0.142718,0.391088,0.0


In [5]:
# drop responders (other than responder 6) from df
df.drop(columns=[f"responder_{i}" for i in [0,1,2,3,4,5,7,8]], inplace=True)
df.head()

Unnamed: 0,seq_id,symbol_id,weight,feature_00,feature_01,feature_02,feature_03,feature_04,feature_05,feature_06,...,feature_70,feature_71,feature_72,feature_73,feature_74,feature_75,feature_76,feature_77,feature_78,responder_6
0,0.0,1,3.889038,-1.0,-1.0,-1.0,-1.0,-1.0,0.465815,0.514922,...,0.010971,0.005625,0.00632,-1.0,-1.0,0.081308,0.078341,0.06661,0.066075,0.577598
1,0.0,7,1.370613,-1.0,-1.0,-1.0,-1.0,-1.0,0.459389,0.513077,...,0.010513,0.002409,0.005603,-1.0,-1.0,0.080878,0.078968,0.068394,0.06563,0.570367
2,0.0,9,2.285698,-1.0,-1.0,-1.0,-1.0,-1.0,0.473393,0.513791,...,0.013406,0.003304,0.005881,-1.0,-1.0,0.095183,0.089571,0.071146,0.069997,0.710935
3,0.0,10,0.690606,-1.0,-1.0,-1.0,-1.0,-1.0,0.476461,0.515538,...,0.018484,0.032793,0.017134,-1.0,-1.0,0.082058,0.077466,0.06899,0.065762,0.611414
4,0.0,14,0.44057,-1.0,-1.0,-1.0,-1.0,-1.0,0.469661,0.515316,...,0.009153,0.005315,0.006302,-1.0,-1.0,0.166908,0.144226,0.125198,0.144631,0.142718


In [6]:
# Choose a subset of 10 features with closest simarility to the target: responder 6
def subset_features(df, n):
    corr_list = []
    n_cols = df.shape[1]
    x = df["responder_6"]
    for i in tqdm(range(3, n_cols-1)): # exclude responder 6 at end
        y = df.iloc[:, i] # column i
        mask = ~np.isnan(x) & ~np.isnan(y) # mask out NaN values
        corr = np.nan if len(y[mask]) < 2 else abs(pearsonr(x[mask], y[mask]).correlation)
        corr_list.append(tuple([i, corr]))

    # sort column indices by most -> least correlated, take top n
    out_tuple = sorted(
        corr_list,
        key=lambda x: (np.isnan(x[1]), -x[1] if not np.isnan(x[1]) else float("-inf")),
        reverse=False,
    )[:n]

    out = [x[0] for x in out_tuple]

    return sorted(out) # return in index order

In [7]:
subset_n = 10
subset_columns = [0,1,2] + subset_features(df, n=subset_n) + [df.columns.get_loc("responder_6")]

split_ratio = 0.8
train_df = df.iloc[:int(len(df)*split_ratio), subset_columns]
test_df = df.iloc[int(len(df)*split_ratio):, subset_columns]

  0%|          | 0/79 [00:00<?, ?it/s]

  corr = np.nan if len(y[mask]) < 2 else abs(pearsonr(x[mask], y[mask]).correlation)


In [8]:
train_df.head()

Unnamed: 0,seq_id,symbol_id,weight,feature_05,feature_06,feature_07,feature_19,feature_39,feature_51,feature_53,feature_56,feature_68,feature_69,responder_6
0,0.0,1,3.889038,0.465815,0.514922,0.425142,0.279095,-1.0,0.449066,-1.0,0.264526,0.002818,0.010717,0.577598
1,0.0,7,1.370613,0.459389,0.513077,0.423677,0.281537,-1.0,0.544227,-1.0,0.338956,0.003411,0.009337,0.570367
2,0.0,9,2.285698,0.473393,0.513791,0.424863,0.24646,-1.0,0.481614,-1.0,0.234111,0.002999,0.008997,0.710935
3,0.0,10,0.690606,0.476461,0.515538,0.426033,0.350599,-1.0,0.666179,-1.0,0.410094,0.008494,0.014574,0.611414
4,0.0,14,0.44057,0.469661,0.515316,0.426816,0.290998,-1.0,0.552196,-1.0,0.414646,0.002691,0.011307,0.142718


In [9]:
test_df.head()

Unnamed: 0,seq_id,symbol_id,weight,feature_05,feature_06,feature_07,feature_19,feature_39,feature_51,feature_53,feature_56,feature_68,feature_69,responder_6
1555368,12.1925,33,1.301134,0.439631,0.511814,0.423044,0.494335,0.256487,0.450238,0.522075,0.495673,0.001625,0.009133,0.516518
1555369,12.1925,34,1.437518,0.440414,0.512207,0.422065,0.485144,0.595078,0.339397,0.573141,0.556594,0.00183,0.009276,0.428489
1555370,12.1925,38,1.926398,0.438835,0.511766,0.421562,0.252044,0.473257,0.214426,0.557326,0.486511,0.001676,0.008339,0.45616
1555371,12.1926,0,2.116013,0.442453,0.515137,0.42255,0.516998,0.387863,0.438227,0.588736,0.511437,0.001664,0.011333,0.463442
1555372,12.1926,1,3.343626,0.44068,0.512819,0.422627,0.445221,0.369758,0.239692,0.550703,0.57481,0.00188,0.010765,0.545143


# 2. Reformat Data to Multiple Tensors for Pickeling

In [10]:
symbols = train_df['symbol_id'].unique() # based on train symbols
symbols.sort()

seq_ids_train = train_df['seq_id'].unique()
seq_ids_test = test_df['seq_id'].unique()

def create_tensor_from_df(df, seq_ids, sym):
    # filter for symbol
    symbol_df = df[df.symbol_id==sym]

    # Add additional rows for all seq_ids that are missing
    missing_seq_ids = np.setdiff1d(seq_ids, symbol_df.seq_id.unique())
    missing_rows = pd.DataFrame({
        'seq_id': missing_seq_ids, 
        'symbol_id': [sym]*len(missing_seq_ids),
        })
    for col in df.columns:
        if col not in missing_rows.columns:
            missing_rows[col] = np.nan

    # Horizontally concatenate the two DataFrames
    symbol_df = pd.concat([symbol_df, missing_rows], axis=0, ignore_index=True)

    # Sort the DataFrame by 'seq_id' to maintain order
    symbol_df = symbol_df.sort_values(by='seq_id').reset_index(drop=True)
    
    # Impute NaNs with the previous values
    symbol_df.iloc[0] = symbol_df.iloc[0].fillna(-1) # fill first nan values with -1
    symbol_df = symbol_df.fillna(method='ffill')

    # Extract feature columns -> np.arrays
    n_cols = symbol_df.shape[1]
    features_arrs = []
    for i in range(2, n_cols): # exclude seq_id and symbol_id
        arr = symbol_df.iloc[:,i].to_numpy()
        features_arrs.append(arr)

    # stack into a tensor
    stacked_features = np.column_stack(features_arrs)
    tensor_features = torch.tensor(stacked_features)
    
    return tensor_features


for sym in symbols:
    train_tensor_features = create_tensor_from_df(train_df, seq_ids_train, sym)
    test_tensor_features = create_tensor_from_df(test_df, seq_ids_test, sym)

    # pickle and save
    f_train = f"./data/pickled_data/symbol{sym}_train.pt"
    f_test = f"./data/pickled_data/symbol{sym}_test.pt"
    torch.save(train_tensor_features, f_train)
    torch.save(test_tensor_features, f_test)


  symbol_df = pd.concat([symbol_df, missing_rows], axis=0, ignore_index=True)
  symbol_df = symbol_df.fillna(method='ffill')
  symbol_df = pd.concat([symbol_df, missing_rows], axis=0, ignore_index=True)
  symbol_df = symbol_df.fillna(method='ffill')
  symbol_df = pd.concat([symbol_df, missing_rows], axis=0, ignore_index=True)
  symbol_df = symbol_df.fillna(method='ffill')
  symbol_df = pd.concat([symbol_df, missing_rows], axis=0, ignore_index=True)
  symbol_df = symbol_df.fillna(method='ffill')
  symbol_df = pd.concat([symbol_df, missing_rows], axis=0, ignore_index=True)
  symbol_df = symbol_df.fillna(method='ffill')
  symbol_df = pd.concat([symbol_df, missing_rows], axis=0, ignore_index=True)
  symbol_df = symbol_df.fillna(method='ffill')
  symbol_df = pd.concat([symbol_df, missing_rows], axis=0, ignore_index=True)
  symbol_df = symbol_df.fillna(method='ffill')
  symbol_df = pd.concat([symbol_df, missing_rows], axis=0, ignore_index=True)
  symbol_df = symbol_df.fillna(method='ffill')


# 3. Create Model

In [11]:
# Define a residual MLP block (same as before)
class ResBlockMLP(nn.Module):
    def __init__(self, input_size, output_size):
        super(ResBlockMLP, self).__init__()
        self.norm1 = nn.LayerNorm(input_size)
        self.fc1 = nn.Linear(input_size, input_size // 2)
        self.norm2 = nn.LayerNorm(input_size // 2)
        self.fc2 = nn.Linear(input_size // 2, output_size)
        self.fc_skip = nn.Linear(input_size, output_size)
        self.act = nn.ELU()


    def forward(self, x):
        # overall structure: input -> norm -> act -> skip
        #                    input -> norm -> act -> fc1 -> norm -> act -> fc2
        #                    return skip + fc2
        x = self.act(self.norm1(x))
        skip = self.fc_skip(x)
        x = self.fc2(self.act(self.norm2(self.fc1(x))))
        return x + skip


class LSTM(nn.Module):
    def __init__(self, seq_len, n_cols, seq_len_out, num_blocks=1, hidden_size = 128): # NOTE: removed buffer_size arg from RNN class
        super(LSTM, self).__init__()

        # calculate sequence length after flatten
        seq_data_len = seq_len * n_cols

        # Same as with mlp before
        self.input_mlp = nn.Sequential(
            nn.Linear(seq_data_len, 4 * seq_data_len),
            nn.ELU(),  
            nn.Linear(4 * seq_data_len, hidden_size) 
        )

        # Define LSTM block
        self.lstm = nn.LSTM(input_size = hidden_size, hidden_size = hidden_size, num_layers = num_blocks, batch_first=True) 

        blocks = [ResBlockMLP(hidden_size, hidden_size) for _ in range(num_blocks)]
        self.res_blocks = nn.Sequential(*blocks)
        self.fc_out = nn.Linear(hidden_size, seq_len_out) # this is for output
        self.act = nn.ELU()

    def forward(self, input_seq, hidden_in, mem_in):
        # flatten
        B,_,_ = input_seq.shape
        input_seq = torch.reshape(input_seq, (B, -1))

        # Pass through MLP
        input_vec = self.input_mlp(input_seq)
        input_vec = input_vec.unsqueeze(1)

        # Pass through LSTM
        output, (hidden_out, mem_out) = self.lstm(input_vec, (hidden_in, mem_in))

        # Pass LSTM output through residual blocks
        x = self.act(self.res_blocks(output)).squeeze(0)

        # Compute the final output
        return self.fc_out(x), hidden_out, mem_out

In [12]:
# create dataloader
class StockDataset(Dataset):
    def __init__(self, f, seq_len=100):
        self.seq_len = seq_len
        self.data = torch.load(f)
    def __len__(self): # number input output combos
        return len(self.data) - self.seq_len
    def __getitem__(self, idx):
        x = self.data[idx:idx+self.seq_len,:11] # 1 to 11
        y = self.data[idx+self.seq_len,11] # column 12
        return x, y

def get_dataloader(f, seq_len=100, batch_size=32):
    dataset = StockDataset(f, seq_len)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, pin_memory=True, drop_last=True)
    return dataloader

In [None]:
# Train model
seq_len = 100 # train 100 rows at a time
no_cols = subset_n + 1 # 10 features + 1 weight
seq_out_size = 1 # predict 1 rows at a time
batch_size = 32
hidden_size = 128
stock_lstm = LSTM(seq_len, no_cols, seq_out_size, hidden_size=hidden_size)

# Define optimizer
lr = 0.001
optimizer = optim.Adam(stock_lstm.parameters(), lr=lr)

# Define loss function
loss_fn = nn.MSELoss()

for sym in symbols:
    print("Training for symbol", sym)
    # Load pickled data
    f_train = f"./data/pickled_data/symbol{sym}_train.pt"
    train_dataloader = get_dataloader(f_train, seq_len=seq_len, batch_size=batch_size)
    train_dataloader_iterator = iter(train_dataloader)

    epochs = len(train_dataloader) # Number of batches, i.e. Number of Examples / Batch Size
    for epoch in tqdm(range(epochs), "Training Epochs"):

        predictors_train, targets_train = next(train_dataloader_iterator)
        targets_train = targets_train.unsqueeze(1)

        # Define hidden and memory states
        hidden_train = torch.zeros(1, batch_size, hidden_size)
        memory_train = torch.zeros(1, batch_size, hidden_size)

        # Train model
        stock_lstm.train()
        # forward pass
        outputs_train, hidden_train_new, memory_train_new = stock_lstm(predictors_train, hidden_train, memory_train)

        # calc loss
        loss = loss_fn(outputs_train, targets_train)
        # zero grad
        optimizer.zero_grad()
        # loss backward
        loss.backward()
        # optimizer step
        optimizer.step()

        # Detach hidden and memory states
        hidden_train = hidden_train_new.detach() # so version is 0 when backpropagating
        memory_train = memory_train_new.detach()

        if epoch % (epochs//4) == 0:            
            # Test model
            f_test = f"./data/pickled_data/symbol{sym}_test.pt"
            test_dataloader = get_dataloader(f_test, seq_len=seq_len, batch_size=batch_size)
            test_dataloader_iterator = iter(test_dataloader)

            test_batch_count = len(test_dataloader)
            stock_lstm.eval()
            with torch.no_grad():
                total_loss = 0
                for test_epoch in tqdm(range(test_batch_count), "Test Epoch"):
                    predictors_test, targets_test = next(test_dataloader_iterator)
                    
                    # Define hidden and memory states
                    hidden_test = torch.zeros(1, batch_size, hidden_size)
                    memory_test = torch.zeros(1, batch_size, hidden_size)

                    outputs_test, hidden_test, memory_test = stock_lstm(predictors_test, hidden_test, memory_test)
                    batch_test_loss = loss_fn(outputs_test, targets_test)
                    total_loss += batch_test_loss.item()
                test_loss = total_loss / test_batch_count
                tqdm.write(f"Epoch {epoch + 1}, Symbol {sym}, Train Loss: {loss.item()}")
                tqdm.write(f"Epoch {epoch + 1}, Symbol {sym}, Test Loss: {test_loss}")
print("Done :)")

Training for symbol 0


  self.data = torch.load(f)


Training Epochs:   0%|          | 0/3807 [00:00<?, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1, Symbol 0, Train Loss: 0.3496879041194916
Epoch 1, Symbol 0, Test Loss: 1.0615185678774863


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 952, Symbol 0, Train Loss: 0.005025885067880154
Epoch 952, Symbol 0, Test Loss: 0.007369507842735952


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 1903, Symbol 0, Train Loss: 0.0793580487370491
Epoch 1903, Symbol 0, Test Loss: 0.006814557665636979


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 2854, Symbol 0, Train Loss: 0.002839675871655345
Epoch 2854, Symbol 0, Test Loss: 0.00774073179652901


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 3805, Symbol 0, Train Loss: 0.003555622883141041
Epoch 3805, Symbol 0, Test Loss: 0.00679251238926871
Training for symbol 1


Training Epochs:   0%|          | 0/3807 [00:00<?, ?it/s]

Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 1, Symbol 1, Train Loss: 0.005826271139085293
Epoch 1, Symbol 1, Test Loss: 0.005648885053379389


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 952, Symbol 1, Train Loss: 0.0041917660273611546
Epoch 952, Symbol 1, Test Loss: 0.005425619958416216


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 1903, Symbol 1, Train Loss: 0.005741640459746122
Epoch 1903, Symbol 1, Test Loss: 0.005342391260112589


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 2854, Symbol 1, Train Loss: 0.0037733367644250393
Epoch 2854, Symbol 1, Test Loss: 0.005313063693103224


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 3805, Symbol 1, Train Loss: 0.0011915079085156322
Epoch 3805, Symbol 1, Test Loss: 0.00528441931918087
Training for symbol 2


Training Epochs:   0%|          | 0/3807 [00:00<?, ?it/s]

Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 1, Symbol 2, Train Loss: 0.01484135165810585
Epoch 1, Symbol 2, Test Loss: 0.009312709163269095


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 952, Symbol 2, Train Loss: 0.07874444872140884
Epoch 952, Symbol 2, Test Loss: 0.009620834024051148


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 1903, Symbol 2, Train Loss: 0.0013705973979085684
Epoch 1903, Symbol 2, Test Loss: 0.009380594242986831


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 2854, Symbol 2, Train Loss: 0.0021086479537189007
Epoch 2854, Symbol 2, Test Loss: 0.009907079135686416


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 3805, Symbol 2, Train Loss: 0.07014008611440659
Epoch 3805, Symbol 2, Test Loss: 0.01047717292400256
Training for symbol 3


Training Epochs:   0%|          | 0/3807 [00:00<?, ?it/s]

Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 1, Symbol 3, Train Loss: 0.003249447327107191
Epoch 1, Symbol 3, Test Loss: 0.03565558918632856


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 952, Symbol 3, Train Loss: 0.0740150585770607
Epoch 952, Symbol 3, Test Loss: 0.03582662921078193


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 1903, Symbol 3, Train Loss: 0.007413134910166264
Epoch 1903, Symbol 3, Test Loss: 0.03650569295613536


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 2854, Symbol 3, Train Loss: 0.07239172607660294
Epoch 2854, Symbol 3, Test Loss: 0.035998744663276


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 3805, Symbol 3, Train Loss: 0.08008686453104019
Epoch 3805, Symbol 3, Test Loss: 0.0352789427516541
Training for symbol 7


Training Epochs:   0%|          | 0/3807 [00:00<?, ?it/s]

Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 1, Symbol 7, Train Loss: 0.007712051272392273
Epoch 1, Symbol 7, Test Loss: 0.010356634729668777


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 952, Symbol 7, Train Loss: 0.004461636766791344
Epoch 952, Symbol 7, Test Loss: 0.010172700270418469


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 1903, Symbol 7, Train Loss: 0.011262756772339344
Epoch 1903, Symbol 7, Test Loss: 0.010463269007855885


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 2854, Symbol 7, Train Loss: 0.009641147218644619
Epoch 2854, Symbol 7, Test Loss: 0.010139608630440177


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 3805, Symbol 7, Train Loss: 0.0030413770582526922
Epoch 3805, Symbol 7, Test Loss: 0.010598228050468
Training for symbol 8


Training Epochs:   0%|          | 0/3807 [00:00<?, ?it/s]

Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 1, Symbol 8, Train Loss: 0.21969705820083618
Epoch 1, Symbol 8, Test Loss: 0.009108831500367313


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 952, Symbol 8, Train Loss: 0.1392022967338562
Epoch 952, Symbol 8, Test Loss: 0.011079103584699278


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 1903, Symbol 8, Train Loss: 0.07115951925516129
Epoch 1903, Symbol 8, Test Loss: 0.009855938480079303


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 2854, Symbol 8, Train Loss: 0.14565755426883698
Epoch 2854, Symbol 8, Test Loss: 0.010549391021917293


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 3805, Symbol 8, Train Loss: 0.13978978991508484
Epoch 3805, Symbol 8, Test Loss: 0.00927901540437267
Training for symbol 9


Training Epochs:   0%|          | 0/3807 [00:00<?, ?it/s]

Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 1, Symbol 9, Train Loss: 0.004346300382167101
Epoch 1, Symbol 9, Test Loss: 0.0077054922559486975


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 952, Symbol 9, Train Loss: 0.0022791295778006315
Epoch 952, Symbol 9, Test Loss: 0.007868595959797737


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 1903, Symbol 9, Train Loss: 0.0013499298365786672
Epoch 1903, Symbol 9, Test Loss: 0.00769010665452367


Test Epoch:   0%|          | 0/697 [00:00<?, ?it/s]

Epoch 2854, Symbol 9, Train Loss: 0.0031865923665463924
Epoch 2854, Symbol 9, Test Loss: 0.007852083226061898
