In [1]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Thu Feb 22 12:02:04 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   43C    P8               9W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [2]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 54.8 gigabytes of available RAM

You are using a high-RAM runtime!


In [1]:
use_gpu = True
use_ramdon_split = False
use_dataparallel = True

import os
import sys
sys.path.insert(0, '..')

import time
import datetime
import numpy as np
import pandas as pd
from tqdm import tqdm
import math
import importlib
import matplotlib.pyplot as plt
from datetime import datetime
import re

import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split

# Load data

In [2]:
torch.manual_seed(42)

IMAGE_WIDTH = {5: 40}
IMAGE_HEIGHT = {5: 105}

asset_list = ['AAPL', 'ABBV', 'ABNB', 'ABT', 'ACN', 'ADBE', 'ADI', 'ADP', 'AFL','AIG', 'ALL', 'AMAT', 'AMD', 'AMT', 'AMZN', 'ANET', 'ASML', 'AVGO','AXP', 'AZN', 'BABA', 'BAC', 'BA', 'BBVA', 'BDX', 'BHP', 'BKNG','BLK', 'BMY', 'BNS', 'BP', 'BSX', 'BTI', 'BUD', 'BX', 'CARR','CCI', 'CDNS', 'CL', 'CMCSA', 'COF', 'COP', 'COST', 'CP', 'CRH','CRM', 'CSCO', 'CSX', 'CVS', 'CVX', 'C', 'DASH', 'DEO','DE', 'DHI', 'DIS', 'DUK', 'EL', 'EMR', 'ENB', 'EOG', 'EPD','EQNR', 'ETN', 'ET', 'EW', 'FCX', 'FTNT', 'GD', 'GE', 'GILD', 'GM','GOOGL', 'GSK', 'GWW', 'HCA', 'HDB', 'HD', 'HLT', 'HMC', 'HON','HSBC', 'HUM', 'IBM', 'IBN', 'ICE', 'INFY', 'ING', 'INTC', 'ISRG','ITW', 'JNJ', 'JPM', 'KHC', 'KKR', 'KLAC', 'KO', 'LIN', 'LLY','LMT', 'LOW', 'LRCX', 'LULU', 'MAR', 'MA', 'MCD', 'MCHP', 'MCO','MDLZ', 'MDT', 'MELI', 'META', 'MET', 'MMM', 'MNST', 'MO', 'MPC','MRK', 'MRVL', 'MSCI', 'MSFT', 'MS', 'MU', 'NEE', 'NFLX', 'NGG','NKE', 'NOW', 'NSC', 'NTES', 'NUE', 'NVDA', 'NVO', 'NVS', 'NXPI','ORCL', 'ORLY', 'OXY', 'PANW', 'PAYX', 'PBR', 'PCAR', 'PEP','PFE', 'PGR', 'PG', 'PLD', 'PM', 'PNC', 'PSA', 'PSX', 'PXD','PYPL', 'QCOM', 'RACE', 'REGN', 'RELX', 'RIO', 'ROST', 'RY', 'SAN','SAP', 'SBUX', 'SCHW', 'SHEL', 'SHOP', 'SHW', 'SLB', 'SNOW','SNPS', 'SNY', 'SO', 'SPG', 'SPOT', 'STLA', 'SYK', 'TD', 'TEAM','TFC', 'TGT', 'TMUS', 'TRI', 'TSLA', 'TSM', 'TTE', 'TXN', 'T','UBER', 'UBS', 'UL', 'UNH', 'USB', 'VALE', 'VLO', 'VRTX', 'VZ','WELL', 'WFC', 'WM', 'ZTS']

original_starttime = datetime.fromisoformat("2023-03-21 09:30:00.000000+00:00")

level = 15
prediction_ahead = 4
depth = 5 # also 10, 30, 50
time_interval = '1S' #: 30S = 30sek, 10S = 10sek, 5S=5sek, 1S=1sek; 100L=0,1sek; 10L=0,01sek; 1L=0,001sek; 100U=0,001sek
window = 5 # also 3, 5, 10

# import the pickle file
df = pd.read_pickle('/home/ucloud/UCloud_input/combined_depth5_time1S_window5.pkl')

In [7]:
# only keep the rows there the ticker column is in the asset_list:
df = df[df['Ticker'].isin(asset_list)]

#sort df first by 'Time' and then by 'Ticker':
df = df.sort_values(by=['Time', 'Ticker'])

training_procent = 0.6

x = len(df)*training_procent

def closest_value(x):
    return int(x + 200 - x%200)

input = closest_value(x)


train_df = df[:int(input)]
test_df = df[int(1-input):]


# Count the number of 0s and 1s
count_0 = (train_df['midquote_target'] == 0).sum()
count_1 = (train_df['midquote_target'] == 1).sum()

# Determine the smaller count to equalize the distribution
min_count = min(count_0, count_1)

print(len(train_df))

# Sample min_count rows from each group
df_0_sample = train_df[train_df['midquote_target'] == 0].sample(n=min_count, random_state=42)
df_1_sample = train_df[train_df['midquote_target'] == 1].sample(n=min_count, random_state=42)


# only select 227271 random rows from the df_0_sample
df_0_sample = df_0_sample.sample(n=227271, random_state=42)
# only select 227271 random rows from the df_1_sample
df_1_sample = df_1_sample.sample(n=227271, random_state=42)

print(len(df_0_sample))
print(len(df_1_sample))

# Concatenate the samples to get a balanced DataFrame
df_balanced = pd.concat([df_0_sample, df_1_sample])

print(len(df_balanced))

# Optionally, shuffle the rows if you want a mixed order
train_df = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True)


sys.path.insert(0, '..')


def query_gpu(qargs=[]):
    qargs =['index','gpu_name', 'memory.free']+ qargs
    cmd = 'nvidia-smi --query-gpu={} --format=csv,noheader'.format(','.join(qargs))
    results = os.popen(cmd).readlines()
    return results

def select_gpu(results, thres=4096):
    avali = []
    try:
        for i, line in enumerate(results):
            if int(re.findall('(.*), (.*?) MiB', line)[0][-1]) > thres:
                avali.append(i)
        return avali
    except:
        return ''

if use_gpu:
    os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([ str(obj) for obj in select_gpu(query_gpu())])

# Set batch size
batch_size = 10

# Initialize an empty list to store batches
batched_images = []

# Process arrays in batches
for i in range(0, len(train_df), batch_size):
    batch = train_df['Arrays'].iloc[i:i+batch_size]
    batched_images.append(np.stack(batch))

# Concatenate batches to create the final array
images = np.concatenate(batched_images)

label_df = train_df

# Check the shape of the resulting array
print(images.shape)
print(label_df.shape)

700750
282002
282002
564004
(564004, 40, 105)
(564004, 11)


# build dataset

In [8]:
class MyDataset(Dataset):

    def __init__(self, img, label):
        self.img = torch.Tensor(img.copy())
        self.label = torch.Tensor(label)
        self.len = len(img)

    def __len__(self):
        return self.len

    def __getitem__(self, idx):
        return self.img[idx], self.label[idx]

In [9]:
if not use_ramdon_split:
    train_val_ratio = 0.7
    split_idx = int(images.shape[0] * 0.7)
    train_dataset = MyDataset(images[:split_idx], (label_df.midquote_target).values[:split_idx])
    val_dataset = MyDataset(images[split_idx:], (label_df.midquote_target).values[split_idx:])
else:
    dataset = MyDataset(images, (label_df.midquote_target).values)
    train_val_ratio = 0.7
    train_dataset, val_dataset = random_split(dataset, \
        [int(dataset.len*train_val_ratio), dataset.len-int(dataset.len*train_val_ratio)], \
        generator=torch.Generator().manual_seed(42))
    del dataset

train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True, pin_memory=True)
val_dataloader = DataLoader(val_dataset, batch_size=256, shuffle=False, pin_memory=True)

In [10]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.)
    elif isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_uniform_(m.weight)

In [11]:
# Original model
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=(5, 3), stride=(3, 1), dilation=(2, 1), padding=(12, 1)),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.MaxPool2d((2, 1), stride=(2, 1)),
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(5, 3), stride=(3, 1), dilation=(2, 1), padding=(12, 1)),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.MaxPool2d((2, 1), stride=(2, 1)),
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=(5, 3), stride=(3, 1), dilation=(2, 1), padding=(12, 1)),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.MaxPool2d((2, 1), stride=(2, 1)),
        )

        # Dynamically calculate the size of the FC layer
        self._temp_size = self._get_conv_output(torch.rand(1, 1, 40, 105)).nelement()

        self.fc1 = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(self._temp_size, 2),
        )
        self.softmax = nn.Softmax(dim=1)

    def _get_conv_output(self, input_tensor):
        with torch.no_grad():
            output = self._forward_features(input_tensor)
        return output

    def _forward_features(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return x

    def forward(self, x):
        x = self._forward_features(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc1(x)
        # x = self.softmax(x)  # It's more common to return logits directly and use softmax combined with the loss function.
        return x


In [None]:
## test model
#import torch
#import torch.nn as nn
#import torch.nn.functional as F
#
#class Net(nn.Module):
#    def __init__(self):
#        super(Net, self).__init__()
#        # Adjusted the first convolutional layer
#        self.layer1 = nn.Sequential(
#            nn.Conv2d(1, 64, kernel_size=(11, 11), stride=(1, 1), padding=(5, 5)),  # Reduced stride
#            nn.BatchNorm2d(64),
#            nn.LeakyReLU(negative_slope=0.01, inplace=True),
#            nn.MaxPool2d((2, 2), stride=(2, 2)),
#        )
#        # Adjusted the second convolutional layer
#        self.layer2 = nn.Sequential(
#            nn.Conv2d(64, 128, kernel_size=(11, 11), stride=(1, 1), padding=(5, 5)),  # Reduced stride
#            nn.BatchNorm2d(128),
#            nn.LeakyReLU(negative_slope=0.01, inplace=True),
#            nn.MaxPool2d((2, 2), stride=(2, 2)),
#        )
#        # Adjusted the third convolutional layer
#        self.layer3 = nn.Sequential(
#            nn.Conv2d(128, 256, kernel_size=(11, 11), stride=(1, 1), padding=(5, 5)),  # Reduced stride
#            nn.BatchNorm2d(256),
#            nn.LeakyReLU(negative_slope=0.01, inplace=True),
#            nn.MaxPool2d((2, 2), stride=(2, 2)),
#        )
#
#        # Dynamically calculate the size of the FC layer
#        self._temp_size = self._get_conv_output(torch.rand(1, 1, 40, 105)).nelement()
#
#        # Define the fully connected layer
#        self.fc1 = nn.Sequential(
#            nn.Dropout(p=0.5),
#            nn.Linear(self._temp_size, 2),
#        )
#
#    def _get_conv_output(self, input_tensor):
#        """Dynamically computes the output size of the convolutional layers."""
#        with torch.no_grad():
#            output = self._forward_features(input_tensor)
#        return output
#
#    def _forward_features(self, x):
#        """Passes the input tensor through convolutional layers."""
#        x = self.layer1(x)
#        x = self.layer2(x)
#        x = self.layer3(x)
#        return x
#
#    def forward(self, x):
#        """Defines the forward pass of the model."""
#        x = self._forward_features(x)
#        x = x.view(x.size(0), -1)  # Flatten the output for the fully connected layer
#        x = self.fc1(x)
#        return x
#

In [13]:
#import baseline_model

device = 'cuda' if torch.cuda.is_available() else 'cpu'
export_onnx = True
net = Net().to(device)
net.apply(init_weights)  # Ensure init_weights is defined elsewhere

if export_onnx:
    import torch.onnx
    x = torch.randn([1, 1, 40, 105]).to(device)
    torch.onnx.export(net,               # model being run
                      x,                         # model input (or a tuple for multiple inputs)
                      "/home/ucloud/UCloud_output/content/cnn_baseline.onnx",   # where to save the model (can be a file or file-like object)
                      export_params=False,        # store the trained parameter weights inside the model file
                      opset_version=10,          # the ONNX version to export the model to
                      do_constant_folding=False,  # whether to execute constant folding for optimization
                      input_names = ['input_images'],   # the model's input names
                      output_names = ['output_prob'], # the model's output names
                      dynamic_axes={'input_images' : {0 : 'batch_size'},    # variable length axes
                                     'output_prob' : {0 : 'batch_size'}})




In [14]:
count = 0
for name, parameters in net.named_parameters():
    print(name, ':', parameters.size())
    count += parameters.numel()
print('total_parameters : {}'.format(count))

layer1.0.weight : torch.Size([64, 1, 5, 3])
layer1.0.bias : torch.Size([64])
layer1.1.weight : torch.Size([64])
layer1.1.bias : torch.Size([64])
layer2.0.weight : torch.Size([128, 64, 5, 3])
layer2.0.bias : torch.Size([128])
layer2.1.weight : torch.Size([128])
layer2.1.bias : torch.Size([128])
layer3.0.weight : torch.Size([256, 128, 5, 3])
layer3.0.bias : torch.Size([256])
layer3.1.weight : torch.Size([256])
layer3.1.bias : torch.Size([256])
fc1.1.weight : torch.Size([2, 80640])
fc1.1.bias : torch.Size([2])
total_parameters : 777986


In [15]:
#for images, labels in train_dataloader:
#    # Add a channel dimension to make it [batch_size, 1, height, width]
#    images = images.unsqueeze(1)  # This changes shape from [128, 40, 105] to [128, 1, 40, 105]
#    # Now you can forward pass this through your network
#    outputs = net(images.to(device))
#    # Rest of your training loop...
#    break  # This break is just to stop the loop here for demonstration

In [16]:
def train_loop(dataloader, net, loss_fn, optimizer):

    running_loss = 0.0
    current = 0
    net.train()

    with tqdm(dataloader) as t:
        for batch, (X, y) in enumerate(t):
            X = X.to(device).unsqueeze(1)
            X = X.to(device)
            y = y.to(device)
            print(X.shape)
            y_pred = net(X)
            loss = loss_fn(y_pred, y.long())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_loss = (len(X) * loss.item() + running_loss * current) / (len(X) + current)
            current += len(X)
            t.set_postfix({'running_loss':running_loss})

    return running_loss


In [17]:
def val_loop(dataloader, net, loss_fn):

    running_loss = 0.0
    current = 0
    net.eval()

    with torch.no_grad():
        with tqdm(dataloader) as t:
            for batch, (X, y) in enumerate(t):
                X = X.to(device).unsqueeze(1)
                X = X.to(device)
                y = y.to(device)
                y_pred = net(X)
                loss = loss_fn(y_pred, y.long())

                running_loss += loss.item()
                running_loss = (len(X) * running_loss + loss.item() * current) / (len(X) + current)
                current += len(X)

    return running_loss

In [18]:
if use_gpu and use_dataparallel and 'DataParallel' not in str(type(net)):
    net = net.to(device)
    net = nn.DataParallel(net)

In [None]:
import os
import time
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms

# Assuming other necessary imports, variable initializations, and net definition are done elsewhere

# Initialize TensorBoard
tb = SummaryWriter()

# Loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-5)

start_epoch = 0
min_val_loss = 1e9
last_min_ind = -1
early_stopping_epoch = 5

base_save_path = '/home/ucloud/UCloud_output/pt/'
best_model_path = ''

epochs = 100
for t in range(start_epoch, epochs):
    print(f"Epoch {t}\n-------------------------------")
    time.sleep(0.2)  # Simulate training/validation time

    # Training and validation loops
    train_loss = train_loop(train_dataloader, net, loss_fn, optimizer)
    val_loss = val_loop(val_dataloader, net, loss_fn)

    # Log losses to TensorBoard
    tb.add_scalar("Loss/Train", train_loss, t)
    tb.add_scalar("Loss/Validation", val_loss, t)

    # Optionally, log model weights and biases
    for name, weight in net.named_parameters():
        tb.add_histogram(f"{name}.grad", weight.grad, t)
        tb.add_histogram(name, weight, t)

    if val_loss < min_val_loss:
        last_min_ind = t
        min_val_loss = val_loss

        best_model_filename = f'best_model_epoch_{t}_train_{train_loss:.5f}_val_{val_loss:.5f}.pt'
        best_model_path = os.path.join(base_save_path, best_model_filename)

        os.makedirs(base_save_path, exist_ok=True)
        torch.save(net, best_model_path)

        print(f"New best model saved at epoch {t} with validation loss {val_loss:.5f}")

    elif t - last_min_ind >= early_stopping_epoch:
        print("Early stopping triggered.")
        break

# After training, add graph visualization
sample_inputs = next(iter(train_dataloader))[0]
tb.add_graph(net, sample_inputs.to(next(net.parameters()).device))

# Close the TensorBoard writer
tb.close()

print('Done!')
print(f'Best epoch: {last_min_ind}, val_loss: {min_val_loss}')
if best_model_path:
    print(f'Best model saved at: {best_model_path}')
else:
    print("No model was saved.")
