In [1]:
import pandas as pd
import numpy as np
import math

In [2]:
data = pd.read_excel('../data/data.xlsx')
data['Unnamed: 0'] = pd.to_datetime(data['Unnamed: 0'])
data = data.set_index('Unnamed: 0')
data.index.name = None
data_pct = data.pct_change(1).dropna(axis=0)
data_pct.head()

Unnamed: 0,AAPL,MSFT,NVDA,AMZN,GOOG,META,BRK-B,TSLA,AVGO,WMT,...,SBER,ROSN,BTC-USD,GC%3DF,SI%3DF,PL%3DF,HG%3DF,PA%3DF,CL%3DF,NG%3DF
2016-01-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-0.002063,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2016-01-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,-0.007907,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2016-01-04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.007163,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2016-01-05,-0.025059,0.004562,0.016064,-0.005024,0.000998,0.004989,0.003824,9e-05,-0.033455,0.023756,...,0.008979,0.008765,-0.002611,0.00307,0.009553,0.006114,0.008694,-0.014458,-0.021491,-0.003856
2016-01-06,-0.01957,-0.018165,-0.04135,-0.001799,0.0014,0.002336,0.00061,-0.019648,-0.030759,0.010013,...,-0.020629,-0.00316,-0.006609,0.012519,0.000573,-0.016768,-0.003831,-0.056158,-0.055602,-0.024946


In [3]:
#data slicer
indmin = data_pct.groupby(data_pct.index.to_period('M')).apply(lambda x: x.index.min()).values
indmax = data_pct.groupby(data_pct.index.to_period('M')).apply(lambda x: x.index.max()).values

#itg lists with frames
dfs = []

for i in range(0, 61):
    #data slicer
    pre = data_pct.loc[indmin[i]:indmax[i+11]]
    if len(pre) == 366:
        pre = pre[1:]
    dfs.append(pre)
    
dfs = dfs[:-29]

In [4]:
x = []
y = []
for i in range(0, len(dfs)-1):
    x.append(dfs[i].values.astype(np.float32))
    y.append(dfs[i+1][:30].values.astype(np.float32))

In [5]:
X_train = x[:21]
X_test = x[21:]

y_train = y[:21]
y_test = y[21:]

In [6]:
len(X_train), len(X_test), len(y_train), len(y_test)

(21, 10, 21, 10)

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [8]:
class CNNTimeSeriesForecaster(nn.Module):
    def __init__(self, input_size=365, output_size=30):
        super(CNNTimeSeriesForecaster, self).__init__()
        
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=5, stride=1, padding=2)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool1d(kernel_size=2, stride=2)
        
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=5, stride=1, padding=2)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool1d(kernel_size=2, stride=2)
        
        self.conv3 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.relu3 = nn.ReLU()
        self.pool3 = nn.MaxPool1d(kernel_size=2, stride=2)
        
        # Рассчитываем размер после пулинга
        self.flatten_size = 128 * (input_size // 8)
        
        self.fc1 = nn.Linear(self.flatten_size, 256)
        self.relu4 = nn.ReLU()
        self.dropout = nn.Dropout(0.2)
        
        self.fc2 = nn.Linear(256, output_size)
    
    def forward(self, x):
        # Изменяем размерность для Conv1d (batch, channels, length)
        x = x.unsqueeze(1)
        
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)
        
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)
        
        x = self.conv3(x)
        x = self.relu3(x)
        x = self.pool3(x)
        
        x = x.view(x.size(0), -1)  # flatten
        x = self.fc1(x)
        x = self.relu4(x)
        x = self.dropout(x)
        
        x = self.fc2(x)
        
        return x

In [9]:
from tqdm import tqdm
from IPython.display import clear_output

In [10]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

EPOCHS = 10
LEARNING_RATE = 0.00001

train_asset_losses = []
test_asset_losses = []

for i in tqdm(range(200)):
    model = CNNTimeSeriesForecaster(input_size=365, output_size=30).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
    
    train_epoch_losses = []
    test_epoch_losses = []
    
    for epoch in tqdm(range(10)):
        model.train()
        optimizer.zero_grad()
        period_train_losses = []
        
        for p in range(len(X_train)):
            inp = torch.from_numpy(np.array([X_train[p].T[i].astype(np.float32)]))
            outputs = model(inp)
            y_t = torch.from_numpy(np.array([y_train[p].T[i].astype(np.float32)]))
            loss = criterion(outputs, y_t)
            loss.backward()
            optimizer.step()
            period_train_losses.append(loss.item())
        
        model.eval()
        period_test_losses = []
        with torch.no_grad():
            for p in range(len(X_test)):
                inp = torch.from_numpy(np.array([X_test[p].T[i].astype(np.float32)]))
                outputs = model(inp)
                y_t = torch.from_numpy(np.array([y_test[p].T[i].astype(np.float32)]))
                loss = criterion(outputs, y_t)
                period_test_losses.append(loss.item())
                
        train_epoch_losses.append(sum(period_train_losses) / len(period_train_losses))
        test_epoch_losses.append(sum(period_test_losses) / len(period_test_losses))
        
    train_asset_losses.append(train_epoch_losses)
    test_asset_losses.append(test_epoch_losses)
    torch.save(model.state_dict(), 'cnn_models/cnn%s.pth' %i)
    clear_output(wait=True)

100%|██████████| 200/200 [13:47<00:00,  4.14s/it]
