In [None]:
import torch 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from torch import nn, optim
from torch.utils.data import DataLoader, Dataset



args = {
    'batch_size': 128,
    'num_workers': 2,
    'device': 'cpu',
    'lr': 9e-7,
    'weight_decay': 1e-4,
    'num_epochs': 100
}

if torch.cuda.is_available():
    args['device'] = torch.device('cuda')

print(args['device'])
print(args['lr'])

df = pd.read_csv('../input/btc-2017to2021/BTC.csv')
print(len(df))
df.head()

In [None]:
torch.manual_seed(1)
indices = torch.randperm(len(df)).tolist()

train_size = int(0.8 * len(df))
df_train = df.iloc[indices[:train_size]]
df_test = df.iloc[indices[train_size:]]

print(len(df_train), len(df_test))

df_train.to_csv('bitcoin_train.csv', index=False)
df_test.to_csv('bitcoin_test.csv',  index=False)

In [None]:
class BitcoinDataset(Dataset):
  def __init__(self, csv_path):
    self.data = pd.read_csv(csv_path).to_numpy()

  def __getitem__(self, index):
    i = [3, 4, 5]
    sample = self.data[index][i]
    label = self.data[index][[6]]

    sample = torch.from_numpy(sample.astype(np.float32))
    label = torch.from_numpy(label.astype(np.float32))

    return sample, label

  def __len__(self):
    return len(self.data)

In [None]:
train_set = BitcoinDataset('bitcoin_train.csv')
test_set = BitcoinDataset('bitcoin_test.csv')


dado, rotulo = train_set[0]

print(dado, rotulo)

In [None]:
train_loader = DataLoader(
    train_set,
    batch_size=args['batch_size'],
    shuffle=True,
    num_workers=args['num_workers'],
)

test_loader = DataLoader(
    test_set,
    batch_size=args['batch_size'],
    shuffle=True,
    num_workers=args['num_workers'],
)

In [None]:
class MLP(nn.Module):
  def __init__(self, input_size, hidden_size, output_size):
    super(MLP, self).__init__()

    self.features = nn.Sequential(
                        nn.Linear(input_size, hidden_size),
                        nn.ReLU(),
                        nn.Linear(hidden_size, hidden_size),
                        nn.ReLU(),
                    )
    self.out =  nn.Linear(hidden_size, output_size)

    
  def forward(self, x):
    feature = self.features(x)
    output = self.out(feature)

    return output

input_size = len(train_set[0][0])
hidden_size = 250
output_size = 1 # classes

net = MLP(input_size, hidden_size, output_size).to(args['device'])
3

In [None]:
criterium = nn.L1Loss().to(args['device'])
optimizer = optim.Adam(net.parameters(), lr=args['lr'], weight_decay=args['weight_decay'])

In [None]:
def train(net, epoch, train_loader):
  net.train()
  epoch_loss = []

  for batch in train_loader:
    
    dado, rotulo = batch

    dado = dado.to(args['device'])
    rotulo = rotulo.to(args['device'])

    pred = net(dado)
    loss = criterium(pred, rotulo)
    epoch_loss.append(loss.cpu().data)

    loss.backward()
    optimizer.step()
    
  epoch_loss = np.asarray(epoch_loss)
  print("Epoca: %d Loss: %.4f +/- %.4f" % (epoch, epoch_loss.mean(), epoch_loss.std()))
  return epoch_loss.mean()

In [None]:
def validation(net, epoch, test_loader):
  net.eval()
  with torch.no_grad():
    epoch_loss = []
    for batch in train_loader:
    
      dado, rotulo = batch

      dado = dado.to(args['device'])
      rotulo = rotulo.to(args['device'])

      pred = net(dado)
      loss = criterium(pred, rotulo)
      epoch_loss.append(loss.cpu().data)

    epoch_loss = np.asarray(epoch_loss)
    print("Epoca: %d Loss: %.4f +/- %.4f" % (epoch, epoch_loss.mean(), epoch_loss.std()))
    return epoch_loss.mean()

In [None]:
train_losses, test_losses = [], []
for epoch in range(args['num_epochs']):
  train_losses.append(train(net, epoch, train_loader))
  test_losses.append(validation(net, epoch, test_loader))
  print('----------------------------')

In [None]:
Xtest = torch.stack([tup[0] for tup in test_set])
Xtest = Xtest.to(args['device'])

ytest = torch.stack([tup[1] for tup in test_set])
ypred = net(Xtest).cpu().data

data = torch.cat((ytest, ypred), axis=1)

df_results = pd.DataFrame(data, columns=['ypred', 'ytest'])
df_results.head(20)

In [None]:
plt.figure(figsize=(20, 9))
plt.plot(train_losses, label='Train')
plt.plot(test_losses, label='Test', linewidth=3, alpha=0.5)
plt.xlabel('Epochs', fontsize=16)
plt.ylabel('Loss', fontsize=16)
plt.title('Convergence', fontsize=16)
plt.legend()
plt.show()

In [None]:
net.to('cpu')
traced_cell = torch.jit.script(net)
traced_cell.save('bitcoin_model.zip')
torch.save(net.state_dict(), 'bitcoin_model.pt')

In [None]:
# !ls -a

In [None]:
# dados_hoje = np.array([37642, 38676,37397])
# dados_hoje = torch.from_numpy(dados_hoje.astype(np.float32))
# print(dados_hoje)

In [None]:
# pred_hoje = net(dados_hoje)
# print(pred_hoje)