<a href="https://colab.research.google.com/github/dongminkim0220/pytorch_tutorial/blob/master/bitmagic_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Bitmagic v2

v2.1: Using DQN with LSTM

v2.2: Stochastic Period Taking

## Setting Environment for Colab

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd /content/drive/MyDrive/RL_bitmagic_Trader/bitmagic/

/content/drive/MyDrive/RL_bitmagic_Trader/bitmagic


In [None]:
#!git clone https://github.com/dongminkim0220/bitmagic

In [7]:
#!git config --global user.email "tommy.dm.kim@gmail.com"
#!git config --global user.name "tommy @ colab"

In [20]:
#!git pull origin master

From https://github.com/dongminkim0220/bitmagic
 * branch            master     -> FETCH_HEAD
Already up to date.


In [16]:
#!git commit -am "deleted unnessary"

[master 9bb9794] deleted unnessary


In [19]:
#!git push origin master

Counting objects: 25, done.
Delta compression using up to 2 threads.
Compressing objects: 100% (24/24), done.
Writing objects: 100% (25/25), 2.34 MiB | 5.81 MiB/s, done.
Total 25 (delta 5), reused 0 (delta 0)
remote: Resolving deltas: 100% (5/5), completed with 4 local objects.[K
To https://github.com/dongminkim0220/bitmagic.git
   d5cb0d0..9bb9794  master -> master


## Import & GPU setting

In [None]:
!pip install pyupbit



In [None]:
from Agent import Agent
from Environment import DataEnvironment, RealTimeEnvironment
from Model import Model

import pandas as pd
import torch
import matplotlib.pyplot as plt

# for error
# OverflowError: Exceeded cell block limit (set 'agg.path.chunksize' rcparam)
import matplotlib as mpl
mpl.rcParams['agg.path.chunksize'] = 10000

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
device

device(type='cuda')

## Data Handling

In [None]:
btcdata = pd.read_csv("./data/KRW-BTC(20170925-20210517).csv")
TRAIN_RATIO, VALIDATION_RATIO, TEST_RATIO = 0.7, 0.1, 0.2
TOTAL = len(btcdata)
traindata = btcdata[:int(TOTAL * TRAIN_RATIO)]
validationdata = btcdata[int(TOTAL * TRAIN_RATIO): int(TOTAL * TRAIN_RATIO) + int(TOTAL * VALIDATION_RATIO)]
testdata = btcdata[int(TOTAL * TRAIN_RATIO) + int(TOTAL * VALIDATION_RATIO):]

## hyperparameters

In [None]:
PERIOD_LENGTH = 4320
EPISODES = (len(traindata) // PERIOD_LENGTH) * 10 if len(traindata) > PERIOD_LENGTH else 100

In [None]:
PERIOD_LENGTH

4320

In [None]:
EPISODES

430

## Model, Agent, Environment

In [None]:
model = Model(input_size = 7, hidden_size = 256, output_size = 1)
model.to(device)

Model(
  (lstm): LSTM(7, 256, batch_first=True)
  (relu): ReLU()
  (fc): Linear(in_features=256, out_features=1, bias=True)
)

In [None]:
VALIDATION_KRW_SEED = 1000000
VALIDATION_BTC_SEED = 0.01
TEST_KRW_SEED = 1000000
TEST_BTC_SEED = 0.01

trainenv = DataEnvironment(data = traindata, pick_random_period=True, PERIOD_LENGTH=PERIOD_LENGTH)
validationenv = DataEnvironment(data = validationdata, pick_random_period=False, KRW_SEED=VALIDATION_KRW_SEED, BTC_SEED=VALIDATION_BTC_SEED)
testenv = DataEnvironment(data = testdata, pick_random_period=False, KRW_SEED=TEST_KRW_SEED, BTC_SEED=TEST_BTC_SEED)

In [None]:
agent = Agent(model, trainenv, validationenv, testenv)

TypeError
TypeError


## Train

In [None]:
state = None
action = None

validation_return_history = []

for e in range(1, EPISODES+1):
    
    agent.TrainDataEnvironment.reset()

    asset_history = []
    action_history = []

    is_initial = True
    initial_state = None

    # Reinforcement Learning
    while True:
        state = agent.TrainDataEnvironment.render()
        state = torch.FloatTensor([state]).unsqueeze(0)
        state = state.to(device)

        if is_initial:
            is_initial = False
            initial_state = state

        action = agent.act(state = state)

        action_history.append(action)
        next_state, reward = agent.TrainDataEnvironment.step(action.item())
        if next_state is None:
            break

        action = action.to(device)
        next_state = torch.FloatTensor([next_state]).unsqueeze(0)
        reward = torch.FloatTensor([reward]).unsqueeze(0)
        
        asset_history.append(reward)
        
        next_state = next_state.to(device)
        reward = reward.to(device)
        
        agent.memorize(state, action, reward, next_state)
        agent.learn()
        state = next_state

    

    # save plots
    if e == 1 or e % 100 == 0:
        print("")
        print(f"=== episode {e} ===")
        
        # validation
        agent.ValidationDataEnvironment.reset(KRW_SEED=VALIDATION_KRW_SEED, BTC_SEED=VALIDATION_BTC_SEED)
        r, val_trades_history, val_asset_history = agent.eval(agent.ValidationDataEnvironment)
        print(f"Validation Return: {r}")
        validation_return_history.append(r)

        # plot
        print("")
        print("-training data-")
        print(f"initial state: {initial_state}")
        print(f"state: {state}")
        print(f"action: {action}")
        print(f"reward: {reward}")
        plt.plot(asset_history)
        plt.savefig("./plots/asset/" + "epoch" + str(e) + ".png", dpi = 300)
        plt.close()

        plt.plot(val_asset_history)
        plt.savefig("./plots/asset/" + "epoch" + str(e) + "(val).png", dpi = 300)
        plt.close()

        plt.plot(action_history)
        plt.savefig("./plots/trades/" + "epoch" + str(e) + ".png", dpi = 300)
        plt.close()

        plt.plot(val_trades_history)
        plt.savefig("./plots/trades/" + "epoch" + str(e) + "(val).png", dpi = 300)
        plt.close()
    else:
        print(".", end = "")
        if e % 100 == 0:
            print("")

Q:  tensor([[[0.0387]]], device='cuda:0', grad_fn=<MulBackward0>)
max_next:  tensor(0.0742, device='cuda:0', grad_fn=<MaxBackward1>)
expected = reward + max_next:  tensor([[3824519.7500]], device='cuda:0', grad_fn=<AddBackward0>)
Q:  tensor([[[0.0319]]], device='cuda:0', grad_fn=<MulBackward0>)
max_next:  tensor(0.0470, device='cuda:0', grad_fn=<MaxBackward1>)
expected = reward + max_next:  tensor([[3808027.]], device='cuda:0', grad_fn=<AddBackward0>)
Q:  tensor([[[0.1285]]], device='cuda:0', grad_fn=<MulBackward0>)
max_next:  tensor(0.1140, device='cuda:0', grad_fn=<MaxBackward1>)
expected = reward + max_next:  tensor([[3801305.]], device='cuda:0', grad_fn=<AddBackward0>)
Q:  tensor([[[0.0346]]], device='cuda:0', grad_fn=<MulBackward0>)
max_next:  tensor(0.0587, device='cuda:0', grad_fn=<MaxBackward1>)
expected = reward + max_next:  tensor([[3837213.7500]], device='cuda:0', grad_fn=<AddBackward0>)
Q:  tensor([[[0.0457]]], device='cuda:0', grad_fn=<MulBackward0>)
max_next:  tensor(0.06

  loss = F.mse_loss(current_Q, expected_Q)


[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
expected = reward + max_next:  tensor([[3453791.]], device='cuda:0', grad_fn=<AddBackward0>)
Q:  tensor([[[0.2919]]], device='cuda:0', grad_fn=<MulBackward0>)
max_next:  tensor(0.0291, device='cuda:0', grad_fn=<MaxBackward1>)
expected = reward + max_next:  tensor([[3483822.7500]], device='cuda:0', grad_fn=<AddBackward0>)
Q:  tensor([[[0.]]], device='cuda:0', grad_fn=<MulBackward0>)
max_next:  tensor(0., device='cuda:0', grad_fn=<MaxBackward1>)
expected = reward + max_next:  tensor([[3387840.2500]], device='cuda:0', grad_fn=<AddBackward0>)
Q:  tensor([[[0.]]], device='cuda:0', grad_fn=<MulBackward0>)
max_next:  tensor(0., device='cuda:0', grad_fn=<MaxBackward1>)
expected = reward + max_next:  tensor([[3402737.5000]], device='cuda:0', grad_fn=<AddBackward0>)
Q:  tensor([[[0.0030]]], device='cuda:0', grad_fn=<MulBackward0>)
max_next:  tensor(0., device='cuda:0', grad_fn=<MaxBackward1>)
expected = reward + max_next:  tensor([[3462331.]], de

KeyboardInterrupt: ignored

## Plot Training Images

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as img

epoch 1

In [None]:
image = img.imread('./plots/asset/epoch1.png')
plt.imshow(image)
plt.show()

image = img.imread('./plots/trades/epoch1.png')
plt.imshow(image)
plt.show()

epoch 100

In [None]:
image = img.imread('./plots/asset/epoch100.png')
plt.imshow(image)
plt.show()

image = img.imread('./plots/trades/epoch100.png')
plt.imshow(image)
plt.show()

epoch 200

In [None]:
image = img.imread('./plots/asset/epoch200.png')
plt.imshow(image)
plt.show()

image = img.imread('./plots/trades/epoch200.png')
plt.imshow(image)
plt.show()

epoch 300

In [None]:
image = img.imread('./plots/asset/epoch300.png')
plt.imshow(image)
plt.show()

image = img.imread('./plots/trades/epoch300.png')
plt.imshow(image)
plt.show()

epoch 400

In [None]:
image = img.imread('./plots/asset/epoch400.png')
plt.imshow(image)
plt.show()

image = img.imread('./plots/trades/epoch400.png')
plt.imshow(image)
plt.show()

### Plot Validation Return 

In [None]:
plt.plot(validation_return_history)
plt.show()

## Save and Reload


Save model

In [None]:
torch.save(model.state_dict(), "./models/lstm1.pth")

reload model

In [None]:
model = Model(input_size = 7, hidden_size = 256, output_size = 1)
model.load_state_dict(torch.load("./models/lstm1.pth"))
model.state_dict()