In [4]:
import pandas as pd
import numpy as np

In [82]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, Dataset

import pytorch_lightning as pl
from pytorch_lightning.metrics import F1

In [83]:
WINDOW_SIZE = 50 # 50 günlük veri ile tahminleme yapılacak demek

In [84]:
btc = pd.read_csv("../data/0_raw/BTC_USD_2013-10-01_2021-04-21-CoinDesk.csv")

In [85]:
btc.columns

Index(['Currency', 'Date', 'Closing Price (USD)', '24h Open (USD)',
       '24h High (USD)', '24h Low (USD)'],
      dtype='object')

In [86]:
btc.Date = btc.Date.apply(pd.Timestamp)

In [87]:
btc = btc.sort_values("Date", ascending=True)

In [88]:
btc

Unnamed: 0,Currency,Date,Closing Price (USD),24h Open (USD),24h High (USD),24h Low (USD)
0,BTC,2013-10-01,123.654990,124.304660,124.751660,122.563490
1,BTC,2013-10-02,125.455000,123.654990,125.758500,123.633830
2,BTC,2013-10-03,108.584830,125.455000,125.665660,83.328330
3,BTC,2013-10-04,118.674660,108.584830,118.675000,107.058160
4,BTC,2013-10-05,121.338660,118.674660,121.936330,118.005660
...,...,...,...,...,...,...
2754,BTC,2021-04-17,61965.782598,63225.093917,63520.325374,60033.534667
2755,BTC,2021-04-18,60574.444728,61444.232503,62534.028498,59802.889267
2756,BTC,2021-04-19,56850.830166,60191.525406,60531.988848,52148.983544
2757,BTC,2021-04-20,56224.101588,56335.389141,57609.368118,54449.245330


In [89]:
btc = btc[btc.Date >= pd.Timestamp("2018")]

In [90]:
btc[(btc.Date >= pd.Timestamp("2018") ) & (btc.Date < pd.Timestamp("2020"))]

Unnamed: 0,Currency,Date,Closing Price (USD),24h Open (USD),24h High (USD),24h Low (USD)
1553,BTC,2018-01-01,13439.417500,13062.145000,14213.441250,12587.603750
1554,BTC,2018-01-02,13337.621250,13439.417500,13892.242500,12859.802500
1555,BTC,2018-01-03,14881.545000,13337.621250,15216.756250,12955.965000
1556,BTC,2018-01-04,15104.450000,14881.545000,15394.986250,14588.595000
1557,BTC,2018-01-05,14953.852500,15104.450000,15194.406250,14225.166250
...,...,...,...,...,...,...
2277,BTC,2019-12-27,7183.706536,7212.808361,7427.472280,7105.723864
2278,BTC,2019-12-28,7227.293712,7183.706083,7251.381246,7065.278308
2279,BTC,2019-12-29,7311.560644,7227.294388,7348.789794,7217.079597
2280,BTC,2019-12-30,7385.464848,7315.151548,7520.637034,7272.593791


In [95]:
int(len(btc)*7/10)

844

In [103]:
btc = btc[['24h Open (USD)', '24h High (USD)', '24h Low (USD)', 'Closing Price (USD)']]

x = [[o, h, l, c] for o, h, l, c in btc.values]

In [104]:
btc.columns = ["open", "high", "low", "close"]

In [105]:
btc["mid"] = (btc["high"] +btc["low"]) / 2.0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  btc["mid"] = (btc["high"] +btc["low"]) / 2.0


In [106]:
btc

Unnamed: 0,open,high,low,close,mid
1553,13062.145000,14213.441250,12587.603750,13439.417500,13400.522500
1554,13439.417500,13892.242500,12859.802500,13337.621250,13376.022500
1555,13337.621250,15216.756250,12955.965000,14881.545000,14086.360625
1556,14881.545000,15394.986250,14588.595000,15104.450000,14991.790625
1557,15104.450000,15194.406250,14225.166250,14953.852500,14709.786250
...,...,...,...,...,...
2754,63225.093917,63520.325374,60033.534667,61965.782598,61776.930021
2755,61444.232503,62534.028498,59802.889267,60574.444728,61168.458883
2756,60191.525406,60531.988848,52148.983544,56850.830166,56340.486196
2757,56335.389141,57609.368118,54449.245330,56224.101588,56029.306724


In [75]:
btc.mid

0         123.657575
1         124.696165
2         104.496995
3         112.866580
4         119.970995
            ...     
2754    61776.930021
2755    61168.458883
2756    56340.486196
2757    56029.306724
2758    55282.397781
Name: mid, Length: 2759, dtype: float64

In [99]:
train_index =  int(len(btc)*70/100)
val_index = int(len(btc)*85/100)
test_index = len(btc)
train_index, val_index, test_index

(844, 1025, 1206)

In [173]:
class TimeSeriesDataset(Dataset):
    def __init__(self, x: np.ndarray, seq_len = 50):
        self.x = torch.tensor(x).float()
        self.seq_len = seq_len
        
    def __len__(self):
        return len(self.x) - ( self.seq_len -1 ) #sliding window count
    
    def __getitem__(self, index):
        #return (self.x[index:index+self.seq_len], self.x[index+self.seq_len]) # regression
        
        window = self.x[index:index+self.seq_len]
        price_change = self.x[index+self.seq_len] - self.x[index+self.seq_len-1]
        price_change = 0 if price_change == 0 else 1 if price_change>0 else 2
        return (window, price_change)

In [174]:
train_dataset = TimeSeriesDataset(btc[:train_index].mid.to_numpy())
val_dataset = TimeSeriesDataset(btc[train_index:val_index].mid.to_numpy())
test_dataset = TimeSeriesDataset(btc[val_index:].mid.to_numpy())

In [179]:
class LSTM_based_regression_model(pl.LightningModule):
    def __init__(self, 
                 batch_size=16,
                 lstm_hidden_size = 256, 
                 train_dataset = train_dataset, 
                 val_dataset = val_dataset, 
                 test_dataset= test_dataset):
        super().__init__()
        
        self.batch_size = batch_size
        self.lstm_hidden_size = lstm_hidden_size
        
        self.stack_lstm = nn.LSTM(input_size = 1, 
                hidden_size = self.lstm_hidden_size, 
                num_layers= 3,
                dropout = 0,
                bidirectional = False, batch_first=True,)
        
        self.linear1 = nn.Linear(self.lstm_hidden_size, 128)
        
        
        self.linear2 = nn.Linear(128, 64)
        
        self.activation = nn.ReLU()
        self.output_layer = nn.Linear(64, 3)
        
        self.f1_score = pl.metrics.F1(num_classes=3)
        self.accuracy_score = pl.metrics.Accuracy()
        
        self.train_dl = DataLoader(train_dataset, batch_size=self.batch_size)
        self.val_dl = DataLoader(val_dataset, batch_size=self.batch_size)
        self.test_dl = DataLoader(test_dataset, batch_size=self.batch_size)
    
    def forward(self, x):
        print(x.view(16,50,1).size())
        x, _=  self.stack_lstm(x.view(16,50,1))
        x = self.linear1(x)
        x = self.activation(x)
        
        x = self.linear2(x)
        x = self.activation(x)
        
        output = self.output_layer(x)
        output = F.log_softmax(x, dim = 1)
    
        return output
    
    def training_step(self, batch, batch_nb):
        x, y = batch
        output = self(x)
        loss = F.nll_loss(output, y)
        self.log('train_loss', loss, on_step=True, prog_bar=True)
        
        acc = self.accuracy_score(torch.max(output, dim=1)[1], y)
        self.log('train_acc', acc, on_step=True, prog_bar=True)
        
        f1 = self.f1_score(torch.max(output, dim=1)[1], y)
        self.log('train_f1', f1, on_step=True, prog_bar=True)
        
        return loss
    
    def validation_step(self, batch, batch_nb):
        x, y = batch
        output = self(x)
        loss = F.nll_loss(x, y)
        self.log('val_loss', loss, on_epoch=True, reduce_fx=torch.mean)
        
        acc = self.accuracy_score(torch.max(output, dim=1)[1], y)
        self.log('val_acc', acc, on_epoch=True, reduce_fx=torch.mean)
        
        f1 = self.f1_score(torch.max(output, dim=1)[1], y)
        self.log('val_f1', f1, on_epoch=True, reduce_fx=torch.mean)
        
    def test_step(self, batch, batch_nb):
        x, y = batch
        output = self(x)
        loss = F.nll_loss(x, y)
        self.log('tes_loss', loss, on_epoch=True, reduce_fx=torch.mean)
        
        acc = self.accuracy_score(torch.max(output, dim=1)[1], y)
        self.log('test_acc', acc, on_epoch=True, reduce_fx=torch.mean)
        
        f1 = self.f1_score(torch.max(output, dim=1)[1], y)
        self.log('test_f1', f1, on_epoch=True, reduce_fx=torch.mean)

    def configure_optimizers(self):
        optimizer = torch.optim.Adadelta(model.parameters(), lr=1.0)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.7)
        return [optimizer], [scheduler]
    
    def train_dataloader(self):
        return self.train_dl

    def val_dataloader(self):
        return self.val_dl

    def test_dataloader(self):
        return self.test_dl

In [180]:
model = LSTM_based_regression_model()

In [181]:
trainer = pl.Trainer()

GPU available: False, used: False
TPU available: False, using: 0 TPU cores


In [182]:
trainer.fit(model)


  | Name           | Type     | Params
--------------------------------------------
0 | stack_lstm     | LSTM     | 1.3 M 
1 | linear1        | Linear   | 32.9 K
2 | linear2        | Linear   | 8.3 K 
3 | activation     | ReLU     | 0     
4 | output_layer   | Linear   | 195   
5 | f1_score       | F1       | 0     
6 | accuracy_score | Accuracy | 0     
--------------------------------------------
1.4 M     Trainable params
0         Non-trainable params
1.4 M     Total params
5.437     Total estimated model params size (MB)


HBox(children=(HTML(value='Validation sanity check'), FloatProgress(value=1.0, bar_style='info', layout=Layout…

torch.Size([16, 50, 1])


ValueError: If `preds` have one dimension more than `target`, `preds` should be a float tensor.

In [124]:
# data
dataset = MNIST('', train=True, download=True, transform=transforms.ToTensor())
mnist_train, mnist_val = random_split(dataset, [55000, 5000])

train_loader = DataLoader(mnist_train, batch_size=32)
val_loader = DataLoader(mnist_val, batch_size=32)

# model
model = LitAutoEncoder()

# training
trainer = pl.Trainer(gpus=4, num_nodes=8, precision=16, limit_train_batches=0.5)
trainer.fit(model, train_loader, val_loader)

NameError: name 'MNIST' is not defined

In [None]:
class LitPredictor(pl.LightningModule):
    def __init__(self):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(28 * 28, 64),
            nn.ReLU(),
            nn.Linear(64, 3))
        
        self.decoder = nn.Sequential(
            nn.Linear(3, 64),
            nn.ReLU(),
            nn.Linear(64, 28 * 28))

    def forward(self, x):
        embedding = self.encoder(x)
        return embedding

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

    def training_step(self, train_batch, batch_idx):
        x, y = train_batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)    
        x_hat = self.decoder(z)
        loss = F.mse_loss(x_hat, x)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, val_batch, batch_idx):
        x, y = val_batch
        x = x.view(x.size(0), -1)
        z = self.encoder(x)
        x_hat = self.decoder(z)
        loss = F.mse_loss(x_hat, x)
        self.log('val_loss', loss)