In [None]:
import numpy as np
import pandas as pd
import gym
import warnings
warnings.filterwarnings('ignore')

In [7]:
df = pd.read_csv('/content/crypto_data.csv')

In [8]:
df.head()

Unnamed: 0,date,symbol,open,high,low,close,volume usdt,tradecount,token,hour,day
0,2020-12-25 05:00:00,1INCHUSDT,0.2,3.0885,0.2,2.5826,35530516,48768,1INCH,5,Friday
1,2020-12-25 06:00:00,1INCHUSDT,2.5824,2.69,2.2249,2.5059,22440875,31099,1INCH,6,Friday
2,2020-12-25 07:00:00,1INCHUSDT,2.5152,2.887,2.3609,2.6237,21300426,33001,1INCH,7,Friday
3,2020-12-25 08:00:00,1INCHUSDT,2.6318,2.8247,2.465,2.6134,17491813,30459,1INCH,8,Friday
4,2020-12-25 09:00:00,1INCHUSDT,2.6104,2.7498,2.5629,2.6365,9919400,21023,1INCH,9,Friday


In [9]:
df.columns

Index(['date', 'symbol', 'open', 'high', 'low', 'close', 'volume usdt',
       'tradecount', 'token', 'hour', 'day'],
      dtype='object')

In [10]:
start_date = '2020-08-17 04:00:00'
end_date = '2023-10-19 23:00:00'

In [11]:
data_df = df.copy(deep = True)

In [12]:
data_df = data_df[(data_df['token'] == 'BTC') & (data_df['date'] >= start_date) & (data_df['date'] <= end_date)]

In [13]:
data_df.columns

Index(['date', 'symbol', 'open', 'high', 'low', 'close', 'volume usdt',
       'tradecount', 'token', 'hour', 'day'],
      dtype='object')

In [14]:
day_mapping = {
    'Monday': 1,
    'Tuesday': 2,
    'Wednesday': 3,
    'Thursday': 4,
    'Friday': 5,
    'Saturday': 6,
    'Sunday': 7
}

data_df['day'] = data_df['day'].apply(lambda x: day_mapping[x])

In [15]:
data_df['ema_13'] = data_df['close'].ewm(span=13).mean()
data_df['ema_25'] = data_df['close'].ewm(span=25).mean()
data_df['ema_32'] = data_df['close'].ewm(span=32).mean()
data_df['ema_100'] = data_df['close'].ewm(span=100).mean()
data_df['ema_200'] = data_df['close'].ewm(span=200).mean()

In [16]:
data_df['vol_close'] = (data_df['high'] - data_df['low']) / data_df['close']

In [17]:
# EMA for 2 hours (candles)
data_df['vol_close_ema_3'] = data_df['vol_close'].ewm(span=3, adjust=False).mean()

# EMA for 4 hours (candles)
data_df['vol_close_ema_6'] = data_df['vol_close'].ewm(span=6, adjust=False).mean()

# EMA for 8 hours ((candles)
data_df['vol_close_ema_12'] = data_df['vol_close'].ewm(span=12, adjust=False).mean()

In [18]:
data_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 27812 entries, 343607 to 371418
Data columns (total 20 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   date              27812 non-null  object 
 1   symbol            27812 non-null  object 
 2   open              27812 non-null  float64
 3   high              27812 non-null  float64
 4   low               27812 non-null  float64
 5   close             27812 non-null  float64
 6   volume usdt       27812 non-null  int64  
 7   tradecount        27812 non-null  int64  
 8   token             27812 non-null  object 
 9   hour              27812 non-null  int64  
 10  day               27812 non-null  int64  
 11  ema_13            27812 non-null  float64
 12  ema_25            27812 non-null  float64
 13  ema_32            27812 non-null  float64
 14  ema_100           27812 non-null  float64
 15  ema_200           27812 non-null  float64
 16  vol_close         27812 non-null  float

In [19]:
data_df['date'] = pd.to_datetime(data_df['date'])

In [20]:
columns_to_drop = ['symbol', 'token']
data_df = data_df.drop(columns=columns_to_drop)

In [21]:
data_df.to_csv('data_featured.csv', index=False)

In [22]:
train_df = data_df.copy(deep=True)
train_df.reset_index(drop=True, inplace=True)

In [23]:
train_df.head()

Unnamed: 0,date,open,high,low,close,volume usdt,tradecount,hour,day,ema_13,ema_25,ema_32,ema_100,ema_200,vol_close,vol_close_ema_3,vol_close_ema_6,vol_close_ema_12
0,2020-08-17 04:00:00,11844.72,11858.91,11802.35,11809.38,17196539,28800,4,1,11809.38,11809.38,11809.38,11809.38,11809.38,0.004789,0.004789,0.004789,0.004789
1,2020-08-17 05:00:00,11809.39,11836.9,11790.0,11800.01,19958274,28571,5,1,11804.334615,11804.5076,11804.548594,11804.64815,11804.671575,0.003975,0.004382,0.004557,0.004664
2,2020-08-17 06:00:00,11800.0,11846.74,11785.23,11806.37,19327915,29762,6,1,11805.119921,11805.178699,11805.194058,11805.233617,11805.243387,0.00521,0.004796,0.004743,0.004748
3,2020-08-17 07:00:00,11806.37,11843.01,11792.32,11807.21,22698672,33111,7,1,11805.768697,11805.749021,11805.746245,11805.742633,11805.74244,0.004293,0.004545,0.004615,0.004678
4,2020-08-17 08:00:00,11806.94,11885.0,11806.91,11868.77,28031116,37458,8,1,11822.518351,11820.447106,11819.974205,11818.857316,11818.601316,0.006579,0.005562,0.005176,0.004971


In [24]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
import gym
from gym import spaces

In [25]:
class ActorCritic(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(ActorCritic, self).__init__()
        self.actor = nn.Sequential(
            nn.Linear(state_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, action_dim),
            nn.Softmax(dim=-1)
        )
        self.critic = nn.Sequential(
            nn.Linear(state_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self):
        raise NotImplementedError

    def act(self, state):
        action_probs = self.actor(state)
        dist = torch.distributions.Categorical(action_probs)
        action = dist.sample()
        return action.item(), dist.log_prob(action)

    def evaluate(self, state, action):
        action_probs = self.actor(state)
        dist = torch.distributions.Categorical(action_probs)
        action_logprobs = dist.log_prob(action)
        dist_entropy = dist.entropy()
        state_value = self.critic(state)
        return action_logprobs, torch.squeeze(state_value), dist_entropy

In [26]:
class PPO:
    def __init__(self, state_dim, action_dim, lr, gamma, K_epochs, eps_clip):
        self.lr = lr
        self.gamma = gamma
        self.eps_clip = eps_clip
        self.K_epochs = K_epochs

        self.policy = ActorCritic(state_dim, action_dim)
        self.optimizer = torch.optim.Adam(self.policy.parameters(), lr=lr)

        self.policy_old = ActorCritic(state_dim, action_dim)
        self.policy_old.load_state_dict(self.policy.state_dict())

        self.MseLoss = nn.MSELoss()

    def update(self, memory):
        rewards = []
        discounted_reward = 0
        for reward, is_terminal in zip(reversed(memory.rewards), reversed(memory.is_terminals)):
            if is_terminal:
                discounted_reward = 0
            discounted_reward = reward + (self.gamma * discounted_reward)
            rewards.insert(0, discounted_reward)

        rewards = torch.tensor(rewards, dtype=torch.float32)
        rewards = (rewards - rewards.mean()) / (rewards.std() + 1e-5)

        old_states = torch.squeeze(torch.stack(memory.states).detach()).float()
        old_actions = torch.squeeze(torch.stack(memory.actions).detach()).float()
        old_logprobs = torch.squeeze(torch.stack(memory.logprobs)).detach()

        for _ in range(self.K_epochs):
            logprobs, state_values, dist_entropy = self.policy.evaluate(old_states, old_actions)

            ratios = torch.exp(logprobs - old_logprobs.detach())

            advantages = rewards - state_values.detach()
            surr1 = ratios * advantages
            surr2 = torch.clamp(ratios, 1-self.eps_clip, 1+self.eps_clip) * advantages
            loss = -torch.min(surr1, surr2) + 0.5*self.MseLoss(state_values, rewards) - 0.01*dist_entropy

            self.optimizer.zero_grad()
            loss.mean().backward()
            self.optimizer.step()

        self.policy_old.load_state_dict(self.policy.state_dict())

In [27]:
# Memory
class Memory:
    def __init__(self):
        self.actions = []
        self.states = []
        self.logprobs = []
        self.rewards = []
        self.is_terminals = []

    def clear_memory(self):
        del self.actions[:]
        del self.states[:]
        del self.logprobs[:]
        del self.rewards[:]
        del self.is_terminals[:]

In [28]:
# Crypto Trading Environment
class CryptoTradingEnv(gym.Env):
    def __init__(self, data, take_profit_position_range=(0.10, 0.80), stop_loss_position_range=(0.00, 0.15), max_stop_loss_position=0.30):
        super(CryptoTradingEnv, self).__init__()

        self.data = data
        self.n_steps = len(data)
        self.current_step = 0
        self.initial_balance = 10000
        self.balance = self.initial_balance
        self.position = 0
        self.position_open = 0
        self.num_trades = 0
        self.profit_loss = 0
        self.max_stop_loss_position = max_stop_loss_position

        self.take_profit_position_range = take_profit_position_range
        self.stop_loss_position_range = stop_loss_position_range

        self.action_space = spaces.Discrete(3)
        n_features = 15
        self.observation_space = spaces.Box(low=0, high=1, shape=(n_features,))

        self.episode_returns = []
        self.cumulative_returns = 0
        self.winning_trades = 0
        self.losing_trades = 0
        self.overall_rewards = 0

    def step(self, action):
        self.current_step += 1
        done = False

        reward = 0
        trade_outcome = 0

        if action == 1:  # Open Position
            if self.position == 0:
                position_size = 0.05 * self.balance
                self.position_open = self.data.loc[self.current_step, 'open']
                self.position = 1
                self.num_trades += 1
                print(f"Opened trade at step {self.current_step} with position size: {position_size:.2f}")
        elif action == 2:  # Close Position
            if self.position == 1:
                position_close = self.data.loc[self.current_step, 'open']
                trade_outcome = position_close - self.position_open
                self.profit_loss += trade_outcome
                self.position = 0
                self.position_open = 0
                print(f"Closed trade at step {self.current_step}")
                print(f"------------------------------> Trade outcome: {trade_outcome}")

                self.balance += trade_outcome

                if trade_outcome > 0:
                    if trade_outcome >= self.take_profit_position_range[0] * self.position:
                        reward = 1
                        self.winning_trades += 1
                        print("Trade reached take profit")
                elif trade_outcome < 0:
                    if abs(trade_outcome) >= self.max_stop_loss_position * self.position:
                        reward = -1.7
                        self.losing_trades += 1
                        print("Trade reached the maximum stop loss")
                    elif abs(trade_outcome) >= self.stop_loss_position_range[0] * self.position:
                        reward = -1
                        self.losing_trades += 1
                        print("Trade reached stop loss")
                    else:
                        reward = 0
                        print("Trade closed with no loss")

        if trade_outcome > 0:
            reward += 0.5
        print(f"Reward: {reward}")

        self.episode_returns.append(reward)
        self.cumulative_returns += reward
        self.overall_rewards += reward

        if self.current_step >= self.n_steps - 1:
            done = True

        next_state = self.get_observation()
        return next_state, reward, done, {}

    def reset(self):
        self.current_step = 0
        self.position = 0
        self.position_open = 0
        self.balance = self.initial_balance
        self.num_trades = 0
        self.profit_loss = 0
        self.episode_returns = []
        self.winning_trades = 0
        self.losing_trades = 0
        self.overall_rewards = 0
        return self.get_observation()

    def render(self, mode='human'):
        if mode == 'human':
            print(f"Step: {self.current_step}")
            print(f"Open Position: {self.position}")
            print(f"Trades: {self.num_trades} | Profit/Loss: {self.profit_loss:.2f}")
            print(f"Balance: {self.balance:.2f}")
            print(f"Winning Trades: {self.winning_trades} | Losing Trades: {self.losing_trades}")
            print(f"Overall Rewards: {self.overall_rewards:.2f}")
            print(self.data.loc[self.current_step])

    def get_observation(self):
        obs = self.data.loc[self.current_step, [
            'open', 'high', 'low', 'close', 'volume usdt', 'hour', 'day',
            'ema_13', 'ema_25', 'ema_32', 'ema_100', 'ema_200', 'vol_close_ema_3', 'vol_close_ema_6', 'vol_close_ema_12'
        ]].values.astype(np.float32)
        return obs / obs.max()

In [29]:
# Training loop
def train(env, agent, num_episodes, max_steps, print_interval):
    memory = Memory()

    for episode in range(num_episodes):
        state = env.reset()
        for t in range(max_steps):
            action, log_prob = agent.policy_old.act(torch.FloatTensor(state))
            next_state, reward, done, _ = env.step(action)

            memory.states.append(torch.FloatTensor(state))
            memory.actions.append(torch.tensor(action))
            memory.logprobs.append(log_prob)
            memory.rewards.append(reward)
            memory.is_terminals.append(done)

            state = next_state

            if done:
                break

        agent.update(memory)
        memory.clear_memory()

        if (episode + 1) % print_interval == 0:
            print(f"Episode {episode + 1}: Final Rewards: {env.cumulative_returns:.2f} | Total Trades: {env.num_trades}")

    return agent

In [30]:
data = train_df
data['date'] = pd.to_datetime(data['date'])

In [31]:
# Create environment and agent
env = CryptoTradingEnv(data)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n

lr = 0.002
gamma = 0.99
K_epochs = 4
eps_clip = 0.2

agent = PPO(state_dim, action_dim, lr, gamma, K_epochs, eps_clip)

In [32]:
# Train the agent
num_episodes = 5
max_steps = 500
print_interval = 100

trained_agent = train(env, agent, num_episodes, max_steps, print_interval)

# Save the trained model
torch.save(trained_agent.policy.state_dict(), 'ppo_crypto_model.pth')

print("Training completed and model saved.")

Opened trade at step 1 with position size: 500.00
Reward: 0
Reward: 0
Reward: 0
Closed trade at step 4
------------------------------> Trade outcome: -2.4499999999989086
Trade reached the maximum stop loss
Reward: -1.7
Opened trade at step 5 with position size: 499.88
Reward: 0
Reward: 0
Reward: 0
Reward: 0
Reward: 0
Closed trade at step 10
------------------------------> Trade outcome: 250.53000000000065
Trade reached take profit
Reward: 1.5
Reward: 0
Reward: 0
Reward: 0
Reward: 0
Opened trade at step 15 with position size: 512.40
Reward: 0
Reward: 0
Reward: 0
Closed trade at step 18
------------------------------> Trade outcome: 8.909999999999854
Trade reached take profit
Reward: 1.5
Reward: 0
Reward: 0
Reward: 0
Opened trade at step 22 with position size: 512.85
Reward: 0
Reward: 0
Reward: 0
Closed trade at step 25
------------------------------> Trade outcome: -81.05999999999949
Trade reached the maximum stop loss
Reward: -1.7
Opened trade at step 26 with position size: 508.80
Rewa

In [35]:
# Validation phase
val_data = data.copy()  # You might want to use a separate validation dataset
val_env = CryptoTradingEnv(val_data)

state = val_env.reset()
done = False
total_reward = 0

while not done:
    action, _ = trained_agent.policy_old.act(torch.FloatTensor(state))
    state, reward, done, _ = val_env.step(action)
    total_reward += reward
    val_env.render()
print(f"Validation Phase - Total Reward: {total_reward:.2f}")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Step: 27624
Open Position: 1
Trades: 4334 | Profit/Loss: 19941.92
Balance: 29941.92
Winning Trades: 2246 | Losing Trades: 2087
Overall Rewards: -178.90
date                2023-10-12 04:00:00
open                           26847.07
high                           26849.99
low                            26815.01
close                           26821.1
volume usdt                    12777077
tradecount                        22648
hour                                  4
day                                   4
ema_13                     26848.068878
ema_25                     26944.249886
ema_32                      27002.45525
ema_100                    27341.514681
ema_200                     27430.49581
vol_close                      0.001304
vol_close_ema_3                0.002751
vol_close_ema_6                0.003404
vol_close_ema_12               0.004006
Name: 27624, dtype: object
Reward: 0
Step: 27625
Open Position: