In [28]:
#create 1d cnn model
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class CNN1d(nn.Module):
    #create a 1d cnn regression model
    #the ouput is a target value of 60 minutes later, is a scalar
    def __init__(self, input_len, input_dim, kernel_size, layer_num, hidden_size,
                  dropout=0.5, batch_norm=True):
        #input shape: (batch_size, input_len, input_dim)
        super(CNN1d, self).__init__()
        self.input_len = input_len
        self.input_dim = input_dim
        self.kernel_size = kernel_size
        self.layer_num = layer_num
        self.hidden_size = hidden_size
        self.dropout = dropout
        self.batch_norm = batch_norm
        self.convs = nn.ModuleList([nn.Conv1d(self.input_dim, self.hidden_size, self.kernel_size, padding=self.kernel_size // 2)])
        self.bns = nn.ModuleList([nn.BatchNorm1d(self.hidden_size)])
        for i in range(self.layer_num - 1):
            self.convs.append(nn.Conv1d(self.hidden_size, self.hidden_size, self.kernel_size, padding=self.kernel_size // 2))
            self.bns.append(nn.BatchNorm1d(self.hidden_size))

        self.fc1 = nn.Linear(self.input_len * self.hidden_size, 1)
        self.dropout = nn.Dropout(self.dropout)
        self.init_weights()

    def init_weights(self):
        #initialize weights
        for conv in self.convs:
            torch.nn.init.xavier_uniform_(conv.weight)
        torch.nn.init.xavier_uniform_(self.fc1.weight)

    def forward(self, x):
        #forward pass
        #input shape: (batch_size, input_len, input_dim)
        #output shape: (batch_size, 1)
        x = x.transpose(1, 2)
        for i, conv in enumerate(self.convs):
            if self.batch_norm:
                x = F.relu(self.bns[i](conv(x)))
            else:
                x = F.relu(conv(x))
            x = self.dropout(x)
        print(x.shape)
        x = x.view(-1, (self.input_len * self.hidden_size))
        x = self.fc1(x)
        print(x.shape)
        return x
    
    def predict(self, x):
        #predict the target value
        return self.forward(x)
    
    def loss(self, x, y):
        #calculate loss
        return F.mse_loss(self.forward(x), y)




In [29]:
#positional encoding
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class PositionalEncoding(nn.Module):
    #add positional encoding to the input
    def __init__(self, input_len, input_dim, dropout=0.5):
        super(PositionalEncoding, self).__init__()
        self.input_len = input_len
        self.input_dim = input_dim
        self.dropout = nn.Dropout(dropout)
        self.pe = torch.zeros(self.input_len, self.input_dim)
        position = torch.arange(0, self.input_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, self.input_dim, 2).float() * (-math.log(10000.0) / self.input_dim))
        self.pe[:, 0::2] = torch.sin(position * div_term)
        self.pe[:, 1::2] = torch.cos(position * div_term)
        self.pe = self.pe.unsqueeze(0)
        self.pe.requires_grad = False

    def forward(self, x):
        #forward pass
        #input shape: (batch_size, input_len, input_dim)
        #output shape: (batch_size, input_len, input_dim)
        x = x + self.pe
        return self.dropout(x)


In [43]:
class Attention(nn.Module):
    #create MULTIHEADATTENTION model
    #the ouput is a target value of 60 minutes later, is a scalar
    def __init__(self, input_len, input_dim, hidden_size, num_heads, layer_num, dropout=0.5):
        super(Attention, self).__init__()
        self.input_len = input_len
        self.input_dim = input_dim
        self.hidden_size = hidden_size
        self.num_heads = num_heads
        self.layer_num = layer_num
        self.dropout = dropout
        #embedding
        self.embedding = nn.Linear(self.input_dim, self.hidden_size)
        self.q_linears = nn.ModuleList([nn.Linear(self.hidden_size, self.hidden_size) for i in range(self.layer_num)])
        self.k_linears = nn.ModuleList([nn.Linear(self.hidden_size, self.hidden_size) for i in range(self.layer_num)])
        self.v_linears = nn.ModuleList([nn.Linear(self.hidden_size, self.hidden_size) for i in range(self.layer_num)])
        self.multihead_attns = nn.ModuleList([nn.MultiheadAttention(self.hidden_size, self.num_heads) for i in range(self.layer_num)])
        self.norms = nn.ModuleList([nn.LayerNorm(self.hidden_size) for i in range(self.layer_num)])
        self.out = nn.Linear(self.hidden_size * self.input_len, 1)
        self.dropout = nn.Dropout(dropout)
        self.pe = torch.zeros(self.input_len, self.hidden_size)
        position = torch.arange(0, self.input_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, self.hidden_size, 2).float() * (-math.log(10000.0) / self.hidden_size))
        self.pe[:, 0::2] = torch.sin(position * div_term)
        self.pe[:, 1::2] = torch.cos(position * div_term)
        self.pe = self.pe.unsqueeze(0)
        self.pe.requires_grad = False
        self.init_weights()

    def init_weights(self):
        #initialize weights
        torch.nn.init.xavier_uniform_(self.embedding.weight)
        torch.nn.init.xavier_uniform_(self.out.weight)
        
    def positional_encoding(self, x):
        #use positional encoding
        x = x + self.pe.to(x.device)
        return self.dropout(x)
    def forward(self, x):
        #use positional encoding
        x = self.positional_encoding(x)
        #forward pass
        bs = x.size(0)
        x = self.embedding(x)
        for i in range(self.layer_num):
            q = self.q_linears[i](x)
            k = self.k_linears[i](x)
            v = self.v_linears[i](x)
            x, _ = self.multihead_attns[i](q, k, v)
            x = self.norms[i](x)
            x = self.dropout(x)
        x = x.view(bs, -1)
        x = self.out(x)
        return x

    def loss(self, x, y):
        #calculate loss
        return F.mse_loss(self.forward(x), y)
    


In [44]:

num_samples = 1000
#test the model
X = torch.randn(num_samples, 10)
Y = torch.randn(num_samples, 1)


In [45]:
#data loader
day = torch.randint(0, 30, (num_samples, 1))
class MyDataset(torch.utils.data.Dataset):

    def __init__(self, X, y, day, input_len=128):
        #the input data is a 1d array, indicate the minute of the day
        self.X = X
        self.y = y
        self.day = day
        self.input_len = input_len

    def __getitem__(self, index):
        #output previous self.input_len minutes data and target value
        #if the there is no enough data in the same day, pad with 0
        d = self.day[index]
        start = index - self.input_len
        if start < 0:
            start = 0
        if self.day[start] != d:
            while self.day[start] != d:
                start += 1
            #pad with 0 before start
        if index - start < self.input_len:
            x = torch.zeros(self.input_len, 10)
            x[self.input_len - index + start: self.input_len] = self.X[start: index].clone()
        else:
            x = self.X[start: index]
            
        y = self.y[index].clone()
        if x.shape != (self.input_len, 10):
            print(x.shape, index, start)
        return x, y
    def __len__(self):
        return len(self.X)
    
mydataset = MyDataset(X, Y, day, input_len=120)
train_index = range(0, int(num_samples * 0.8))
test_index = range(0, int(num_samples * 0.8))
#test_index = range(int(num_samples * 0.8), num_samples)

train_dataset = torch.utils.data.Subset(mydataset, train_index)
test_dataset = torch.utils.data.Subset(mydataset, test_index)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False)



In [46]:
#train the model with 4 GPUs
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn import DataParallel

model = Attention(input_len=120, input_dim=10, hidden_size=128, num_heads=2, layer_num=3, dropout=0.5)

model = DataParallel(model)
optimizer = optim.Adam(model.parameters(), lr=0.001)
model.train()
for epoch in range(100):
    prediction = []
    ground_truth = []
    for i, (x, y) in enumerate(train_loader):
        optimizer.zero_grad()
        p = model(x)
        loss = F.mse_loss(p, y)
        prediction.append(p)
        ground_truth.append(y)
        loss.backward()
        optimizer.step()
        if i % 100 == 0:
            print(epoch, i, loss.item())
    print(epoch, F.mse_loss(torch.cat(prediction), torch.cat(ground_truth)).item())

#the number of gpu 
print(torch.cuda.device_count())



0 0 1.502640962600708
0 25.192827224731445
1 0 1.6645662784576416


KeyboardInterrupt: 

In [3]:
#pandas concat 
import pandas as pd
import numpy as np
list1 = [pd.DataFrame(np.random.randn(5, 4)) for _ in range(10)]
pd.concat(list1, axis=0, ignore_index=True)

Unnamed: 0,0,1,2,3
0,-0.277475,1.430054,1.223614,-1.022467
1,1.351404,-2.492247,0.97626,-0.160022
2,-0.612908,0.056709,1.291186,0.31465
3,0.629842,-0.495011,1.036628,-0.60917
4,0.098073,-0.335783,0.80976,-1.093411
5,0.33794,-0.054272,2.486985,0.989399
6,1.858411,1.98752,-1.624891,-0.104429
7,0.053372,0.238143,0.610426,0.201788
8,-0.376109,-0.442441,0.496593,0.808438
9,-0.769942,-1.282127,-0.24373,-0.470049


In [88]:
# def scores(pred, target, days):
#     pred_group, ground_group, corr = groupbyday(pred, target, days)
#     sharpes = sharpeyear(corr, list(pred_group.keys()))
#     return corr.mean(), sharpes
    
# def fitmodel(model, training_data, test_data, training_target, test_target, training_days, test_days):
#     for train, test, train_target, test_target, train_day, test_day in zip(training_data, test_data, training_target, test_target, training_days, test_days):
#         model.fit(train, train_target)
#         pred_train = model.predict(train)
#         pred_test = model.predict(test)
#         pnl_train, sharpe_train = scores(pred_train, train_target,train_day)
#         pnl_test, sharpe_test = scores(pred_test, test_target, test_day)
#         return pnl_train, sharpe_train, pnl_test, sharpe_test

In [145]:
import numpy as np

model = Attention(input_dim=10, input_len=128, hidden_size=64, num_heads=8)

def trainmodel(model, data, target, days, train_index, test_index, epochs=100):
    #get dataset
    mydataset = MyDataset(data, target, days)
    train_index = range(0, int(num_samples * 0.8))
    test_index = range(int(num_samples * 0.8), num_samples)

    train_dataset = torch.utils.data.Subset(mydataset, train_index)
    test_dataset = torch.utils.data.Subset(mydataset, test_index)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=True)
    #train the model
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)
    for epoch in range(epochs):
        model.train()
        prediction = 0
        ground_truth = 0
        for i, data in enumerate(train_loader):
            x, y = data
            x = x.to(device)
            y = y.to(device)
            optimizer.zero_grad()
            p = model(x)
            prediction += p
            ground_truth += y
            correlation = torch.sum(torch.mul(model.predict(x), y)) / torch.sqrt(torch.sum(torch.pow(model.predict(x), 2)) * torch.sum(torch.pow(y, 2)))
            loss.backward()
            optimizer.step()
        scheduler.step()
        print("epoch: {}, loss: {}".format(epoch, loss.item()))

        #evaluate the model
        model.eval()
        with torch.no_grad():
            for i, data in enumerate(test_loader):
                x, y = data
                x = x.to(device)
                y = y.to(device)
                loss = model.loss(x, y)
                print("test loss: {}".format(loss.item()))
    
    pred = torch.zeros(len(test_dataset), 1)
    with torch.no_grad():
        for i, data in enumerate(test_loader):
            x, y = data
            x = x.to(device)
            y = y.to(device)
            pred[i * 128: i * 128 + len(x)] = model.predict(x)
    return pred


In [146]:
trainmodel(model, X, y, day, train_index, test_index, epochs=10)

IndexError: index 357 is out of bounds for dimension 0 with size 128

In [117]:
torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3])

torch.Size([10, 10])

In [None]:
#create a zero tensor in gpu
x = torch.zeros(10, 10)
#to device y is current in cpu
y = torch.randn(10, 10)
y = y.to(x.device)

In [130]:
#evaluate the model with multiprocessing on cpu
import multiprocessing as mp
import time

model = Attention(input_dim=10, input_len=128, hidden_size=64, num_heads=8)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=True)

#multiprocessing
pool = mp.Pool(processes=4)
start = time.time()
results = [pool.apply_async(model.predict, args=(data[0],)) for i, data in test_loader]
output = [p.get() for p in results]
print(time.time() - start)




Process SpawnPoolWorker-2:
Traceback (most recent call last):
  File "/opt/homebrew/anaconda3/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/opt/homebrew/anaconda3/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/homebrew/anaconda3/lib/python3.10/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/opt/homebrew/anaconda3/lib/python3.10/multiprocessing/queues.py", line 367, in get
    return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'Attention' on <module '__main__' (built-in)>
Process SpawnPoolWorker-3:
Traceback (most recent call last):
  File "/opt/homebrew/anaconda3/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/opt/homebrew/anaconda3/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/homebrew/anaconda3/lib/python3.10

KeyboardInterrupt: 

In [161]:
import pickle
import io
class CPU_Unpickler(pickle.Unpickler):
    def find_class(self, module, name):
        if module == 'torch.storage' and name == '_load_from_bytes':
            return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
        else:
            return super().find_class(module, name)

#contents = pickle.load(f) becomes...
contents = CPU_Unpickler(f).load()

ModuleNotFoundError: No module named '__builtin__'

In [167]:
#save model
with open('model.pkl', 'wb') as f:
    pickle.dump(model, f)

In [169]:
import io
import pickle

class CPU_Unpickler(pickle.Unpickler):
    def find_class(self, module, name):
        if module == 'torch.storage' and name == '_load_from_bytes':
            return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
        else: return super().find_class(module, name)


with open('model.pkl', 'rb') as df:
     patient_notes_agg = CPU_Unpickler(df).load()
patient_notes_agg

DataParallel(
  (module): Attention(
    (q_linear): Linear(in_features=10, out_features=64, bias=True)
    (v_linear): Linear(in_features=10, out_features=64, bias=True)
    (k_linear): Linear(in_features=10, out_features=64, bias=True)
    (multihead_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
    )
    (out): Linear(in_features=256, out_features=1, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
)

In [166]:
contents = CPU_Unpickler(f).load()

ModuleNotFoundError: No module named '__builtin__'

In [162]:
with torch.loading_context(map_location='cpu'):
    obj = pickle.load(f)  # In my case this call is buried deeper in torch-agnostic code

AttributeError: module 'torch' has no attribute 'loading_context'

In [178]:
X = torch.randn(100, 10)
#save X
torch.save(X, 'X.pt')
#load X
X = torch.load('X.pt')

In [182]:
#Use multiprocessing Pool to process data
import multiprocessing as mp
import time

#split X, Y, days into 10 parts
X_split = torch.split(X, 10, dim=0
Y_split = torch.split(Y, 10, dim=0)


#X = X + Y for each part
def adddays(X, Y):
    return X + Y


#multiprocessing
pool = mp.Pool(processes=4)
start = time.time()
results = [pool.apply_async(adddays, args=(X_split[i], Y_split[i])) for i in range(10)]
output = [p.get() for p in results]
print(time.time() - start)




Process SpawnPoolWorker-26:
Process SpawnPoolWorker-27:
Process SpawnPoolWorker-28:
Process SpawnPoolWorker-29:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/opt/homebrew/anaconda3/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/opt/homebrew/anaconda3/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/opt/homebrew/anaconda3/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/homebrew/anaconda3/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/homebrew/anaconda3/lib/python3.10/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/opt/homebrew/anaconda3/lib/python3.10/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/opt/homebrew/anaconda3/lib/python3.10/multipro

KeyboardInterrupt: 

In [187]:
import pandas as pd
X = pd.DataFrame(np.random.randn(100, 10))
#split X into 10 parts
X_split = np.array_split(X, 10, axis=0)

In [188]:
pool = mp.Pool()

In [189]:
pool

<multiprocessing.pool.Pool state=RUN pool_size=8>