In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ExponentialLR
from time import time
import json
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from Historic_Crypto import HistoricalData
from Historic_Crypto import Cryptocurrencies
from Historic_Crypto import LiveCryptoData
import yfinance as yf

In [None]:
print("PyTorch version " + torch.__version__)
print("Num GPUs Available: ", torch.cuda.device_count())
print(torch.cuda.is_available())

In [None]:
class DDDQN(torch.nn.Module):
    def __init__(self, input_features, window_size):
        super().__init__()
        self.input_size = input_features * window_size
        self.leaky_relu = torch.nn.LeakyReLU(negative_slope=0.1)
        self.d1 = torch.nn.Linear(self.input_size, 256)
        self.bn1 = torch.nn.BatchNorm1d(256)
        self.d2 = torch.nn.Linear(256, 512)
        self.bn2 = torch.nn.BatchNorm1d(512)
        self.drop1 = torch.nn.Dropout(0.3)
        self.d3 = torch.nn.Linear(512, 512)
        self.bn3 = torch.nn.BatchNorm1d(512)
        self.drop2 = torch.nn.Dropout(0.3)
        self.d4 = torch.nn.Linear(512, 256)
        self.bn4 = torch.nn.BatchNorm1d(256)
        self.drop3 = torch.nn.Dropout(0.3)
        self.dv1 = torch.nn.Linear(256, 128)  # value hidden layer
        self.da1 = torch.nn.Linear(256, 128)  # actions hidden layer
        self.dv2 = torch.nn.Linear(128, 1)  # value output
        self.da2 = torch.nn.Linear(128, 9)  # actions output

    def forward(self, input_data):
        input_data = input_data.reshape(input_data.size(0), -1)  # Flatten the input tensor
        x = self.leaky_relu(self.d1(input_data))
        x = self.bn1(x)
        x = x.view(x.size(0), -1)  # equivalent to Flatten()
        x = self.leaky_relu(self.d2(x))
        x = self.bn2(x)
        x = self.drop1(x)
        x = self.leaky_relu(self.d3(x))
        x = self.bn3(x)
        x = self.drop2(x)
        x = self.leaky_relu(self.d4(x))
        x = self.bn4(x)
        x = self.drop3(x)
        v = self.leaky_relu(self.dv1(x))
        a = self.leaky_relu(self.da1(x))
        v = self.dv2(v)
        a = self.da2(a)
        Q = v + (a - torch.mean(a, dim=1, keepdim=True))
        return Q

    def advantage(self, state):
        x = self.leaky_relu(self.d1(state))
        x = self.bn1(x)
        x = x.view(x.size(0), -1)
        x = self.leaky_relu(self.d2(x))
        x = self.bn2(x)
        x = self.drop1(x)
        x = self.leaky_relu(self.d3(x))
        x = self.bn3(x)
        x = self.drop2(x)
        x = self.leaky_relu


In [None]:
class ExpReplay():
    def __init__(self, num_features, window_size, device, buffer_size=1000000):
        self.num_features = num_features
        self.device = device
        self.buffer_size = buffer_size
        self.state_mem = np.zeros((self.buffer_size, self.num_features, window_size), dtype=np.float32)
        self.action_mem = np.ones((self.buffer_size), dtype=np.int32)
        self.reward_mem = np.zeros((self.buffer_size), dtype=np.compat.long)
        self.next_state_mem = np.zeros((self.buffer_size, self.num_features, window_size), dtype=np.float32)
        self.done_mem = np.zeros((self.buffer_size), dtype=bool)
        self.counter = 0

    def add_exp(self, state, action, reward, next_state, done):
        pointer = self.counter % self.buffer_size
        self.state_mem[pointer] = state
        self.action_mem[pointer] = action
        self.reward_mem[pointer] = reward
        self.next_state_mem[pointer] = next_state
        self.done_mem[pointer] = 1 - int(done)
        self.counter += 1

    def sample_exp(self, batch_size=64):
        max_mem = min(self.counter, self.buffer_size)
        batch = np.random.choice(max_mem, batch_size, replace=False)
        states = torch.tensor(self.state_mem[batch], dtype=torch.float32).to(self.device)
        actions = torch.tensor(self.action_mem[batch], dtype=torch.int64).to(self.device)
        rewards = torch.tensor(self.reward_mem[batch], dtype=torch.float32).to(self.device)
        next_states = torch.tensor(self.next_state_mem[batch], dtype=torch.float32).to(self.device)
        dones = torch.tensor(self.done_mem[batch], dtype=torch.bool).to(self.device)
        return states, actions, rewards, next_states, dones


In [None]:
class Agent():
    def __init__(self, data_shape, num_episodes, window_size=48, gamma=0.99, update_interval=96, lr=0.01, min_epsilon=0.02):
        
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.window_size = window_size
        self.data_shape = data_shape
        self.portfolio = [0, 0, 0]  # [total eth, cash_held, total_portfolio_value (eth value + cash held - initial investment)]
        self.gamma = gamma
        self.num_episodes = num_episodes
        self.epsilon = 1.0
        self.min_epsilon = min_epsilon
        self.update_interval = update_interval
        self.trainstep = 0
        self.memory = ExpReplay(self.window_size, data_shape[1], self.device)        
        self.batch_size = 64
        self.online_net = DDDQN(self.data_shape[1], window_size).to(self.device)
        self.target_net = DDDQN(self.data_shape[1], window_size).to(self.device)
        self.target_net.load_state_dict(self.online_net.state_dict())

        initial_learning_rate = lr
        decay_steps = self.num_episodes * self.data_shape[0] // 10  # You can adjust the divisor to control the decay rate
        decay_rate = 0.9  # You can adjust this value to control the decay rate
        self.optimizer = optim.Adam(self.online_net.parameters(), lr=initial_learning_rate)
        self.scheduler = ExponentialLR(self.optimizer, gamma=decay_rate)
        self.criterion = nn.MSELoss()

    def get_action(self, state, cash_balance):
        state_numpy = state.values
        state_tensor = torch.FloatTensor(state_numpy).to(self.device).unsqueeze(0)
        if np.random.rand() <= self.epsilon:
            if self.portfolio[0] > 0.01:
                return np.random.choice([0, 1, 2, 3, 4, 5, 6, 7, 8])
            elif cash_balance > 0:
                return np.random.choice([4, 5, 6, 7, 8])
            else:
                action = 4  # hold
                return action
        else:
            with torch.no_grad():
                self.online_net.eval()  # Set the model to evaluation mode
                actions = self.online_net(state_tensor)
                self.online_net.train()  # Set the model back to training mode
                if self.portfolio[0] > 0.01:
                    action = torch.argmax(actions).item()
                elif cash_balance > 0:
                    action = torch.argmax(actions[0, 4:]) + 4
                else:
                    action = 4  # hold action
            return action

    def update_target(self):
        self.target_net.load_state_dict(self.online_net.state_dict())

    def update_epsilon(self):
        if self.epsilon > self.min_epsilon:
            b = self.min_epsilon**(1/(self.num_episodes*self.data_shape[0]))
            self.epsilon = b**self.trainstep

    def train(self):
        if self.memory.counter < self.batch_size:
            return

        if self.trainstep % self.update_interval == 0:
            self.update_target()

        states, actions, rewards, next_states, dones = self.memory.sample_exp(self.batch_size)


        # Move tensors to the device and set the correct data type
        states = states.to(self.device).float()
        actions = actions.to(self.device).long()
        rewards = rewards.to(self.device).float()
        next_states = next_states.to(self.device).float()
        dones = dones.to(self.device).float()
        

        # print("states shape: ", states.shape)
        # print("actions shape: ", actions.shape)
        # print("rewards shape: ", rewards.shape)
        # print("next_states shape: ", next_states.shape)
        # print("dones shape: ", dones.shape)

        q_next_state_online_net = self.online_net(next_states)
        q_next_state_target_net = self.target_net(next_states)

        max_action = torch.argmax(q_next_state_online_net, dim=1).to(self.device)

        batch_index = torch.arange(self.batch_size, dtype=torch.int64).to(self.device)

        q_predicted = self.online_net(states)
        q_target = q_predicted.clone().detach()

        q_target[batch_index, actions] = rewards + self.gamma * q_next_state_target_net[batch_index, max_action] * dones

        loss = self.criterion(q_predicted, q_target)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        self.update_epsilon()
        self.trainstep += 1
        return loss.item()

    def save_model(self):
        import os

        output_directory = "Output"
        online_model_directory = os.path.join(output_directory, "online_model")
        target_model_directory = os.path.join(output_directory, "target_model")

        os.makedirs(online_model_directory, exist_ok=True)
        os.makedirs(target_model_directory, exist_ok=True)

        torch.save(self.online_net.state_dict(), os.path.join(online_model_directory, 'model.pt'))
        torch.save(self.target_net.state_dict(), os.path.join(target_model_directory, 'model.pt'))

    def calculate_reward(self, t, eth_df, eth_close_price_unscaled, amount_to_sell, initial_investment, trading_fee_rate):
        ether_held, cash_held, previous_portfolio_value = self.portfolio
        
        unscaled_close_price = eth_close_price_unscaled["Close"].iloc[t]
        value_of_eth_sold = unscaled_close_price * amount_to_sell
        trading_fee = value_of_eth_sold * trading_fee_rate
        cash_received = value_of_eth_sold - trading_fee
        new_cash_held = cash_held + cash_received
        new_ether_held = ether_held - amount_to_sell
        
        # Calculate portfolio value
        new_portfolio_value = new_cash_held + (new_ether_held * unscaled_close_price) - initial_investment
        
        # Calculate the reward based on the change in portfolio value
        reward = new_portfolio_value - previous_portfolio_value

        # Update portfolio
        self.portfolio = [new_ether_held, new_cash_held, new_portfolio_value]

        return reward

    def trade(self, t, action, eth_df, eth_df_unscaled, initial_investment, trading_fee_rate):
        reward = 0
        eth_held, cash_balance, previous_portfolio_value = self.portfolio
        sell_percentages = [0.25, 0.5, 0.75, 1.0]
        buy_percentages = [0.25, 0.5, 0.75, 1.0]

        if action >= 0 and action <= 3 and eth_held > 0.01:
            # print("Selling: " + str(sell_percentages[action]) + "% of portfolio at eth price " + str(eth_df_unscaled["Close"].iloc[t]))
            # print("Current cash_balance: " + str(round(cash_balance, 2)))
            sell_percentage = sell_percentages[action]
            amount_to_sell = eth_held * sell_percentage

            while amount_to_sell > 0 and self.portfolio[0] > 0.01:
                scaled_item_cost = eth_df["Close"].iloc[t]
                unscaled_item_cost = eth_df_unscaled["Close"].iloc[t]
                # print("Eth amount: " + str(round(eth_held, 7)))
                # print("Current ask price: " + str(eth_df_unscaled["Close"].iloc[t]))
                # print('----------------')

                reward = self.calculate_reward(t, eth_df, eth_df_unscaled, amount_to_sell, initial_investment, trading_fee_rate)

                eth_held, cash_balance, current_portfolio_value = self.portfolio # Update portfolio after selling

                # print("Amount Eth sold: " + str(amount_to_sell))
                # print("Selling price: " + str(eth_df_unscaled["Close"].iloc[t]))
                # print("Trading fee: " + str(trading_fee_rate * unscaled_item_cost))
                # print("New Eth amount: " + str(round(eth_held, 7)))
                # print("New cash_balance: " + str(round(cash_balance, 2)))
                amount_to_sell = 0
        elif action == 4:
            # print("Hold: Price is " + str(eth_df_unscaled["Close"].iloc[t]))
            self.portfolio[2] = cash_balance + (eth_held * eth_df_unscaled["Close"].iloc[t]) - initial_investment
            reward = -0.01
        elif action >= 5 and cash_balance >= 0:
            # print("Buy Ether with: " + str(buy_percentages[action - 5]) + "% of cash_balance at Eth price " + str(eth_df_unscaled["Close"].iloc[t]))
            buy_percentage = buy_percentages[action - 5]
            eth_to_buy = (cash_balance * buy_percentage) / eth_df_unscaled["Close"].iloc[t]
            # print("Current cash_balance: " + str(round(cash_balance, 2)))
            # print("Current Eth amount: " + str(round(eth_held, 7)))
            # print("Eth purchased: " + str(eth_to_buy))
            self.portfolio[0] += eth_to_buy
            self.portfolio[1] -= eth_df_unscaled["Close"].iloc[t] * eth_to_buy
            self.portfolio[2] = self.portfolio[1] + (self.portfolio[0] * eth_df_unscaled["Close"].iloc[t]) - initial_investment
            # print("New cash_balance: " + str(round(self.portfolio[1], 2)))
            # add some reward for buying if a buy flag is set?
        return reward
    
    def get_state(self, t, eth_df):
        num_rows = t - self.window_size + 1
        if num_rows >= 0:
            window = eth_df.iloc[num_rows : t + 1]
        else:
            repeated_first_row = pd.concat([pd.DataFrame(np.repeat(eth_df.iloc[[0]].values, -num_rows, axis=0), columns=eth_df.columns)])
            new_data = eth_df.iloc[0 : t + 1]
            window = pd.concat([repeated_first_row, new_data], ignore_index=True)  # prevents us from sampling data that doesn't exist at the start.
        return window

In [None]:
def get_crypto_data(start_date, end_date, split_date):
    try:
        train = pd.read_csv("Datasets/train_eth.csv", index_col=0)
        test = pd.read_csv("Datasets/test_eth.csv", index_col=0)
    except:
        df = HistoricalData('ETH-USD', 3600, start_date, end_date).retrieve_data()  # 3600 is the interval in seconds which is 1 hour
        split_row = df.index.get_loc(split_date)
        cols = df.columns.tolist()
        df = pd.DataFrame(MinMaxScaler().fit_transform(df), columns=cols)
        train = df[:split_row]
        test = df[split_row:]
        train.to_csv("Datasets/train_eth.csv")
        test.to_csv("Datasets/test_eth.csv")
    return train, test

In [None]:
def get_crypto_data(start_date, end_date):
    historic_data_df = HistoricalData('ETH-USD', 21600, start_date, end_date).retrieve_data()  # 3600 is the interval in seconds which is 1 hour
    cols = historic_data_df.columns.tolist()
    historic_data_df = pd.DataFrame(MinMaxScaler().fit_transform(historic_data_df), columns=cols)
    historic_data_df.to_csv("Datasets/train_eth.csv")
    return historic_data_df

In [None]:
# pre-collected data
def get_precollected_data(split_ratio=0.8):
    # try:
    historic_data = pd.read_csv("Datasets/eth_historical_data_simple.csv", index_col=0)
    print('Pre-collected data loaded successfully.')
    # convert date to datetime if it isn't the index
    if historic_data.index.name != 'Date':
        historic_data['Date'] = pd.to_datetime(historic_data['Date'], infer_datetime_format=True)
        print('Date converted to datetime.')
        # set date as index
        historic_data = historic_data.set_index('Date')
        print('Date set as index.')
    # drop nan values
    historic_data = historic_data.dropna()
    print('NaN values dropped.')
    cols = historic_data.columns.tolist()
    train_unscaled, test_unscaled = train_test_split(historic_data, train_size=split_ratio,shuffle=False)
    train_scaled = pd.DataFrame(MinMaxScaler().fit_transform(train_unscaled), columns=cols)
    test_scaled = pd.DataFrame(MinMaxScaler().fit_transform(test_unscaled), columns=cols)
    print('Data split.')
    # set the index to the date
    train_unscaled = pd.DataFrame(train_unscaled,  columns=cols)
    test_unscaled = pd.DataFrame(test_unscaled,  columns=cols)
    # convert train_unscaled and test_unscaled to df with only the Close column
    train_unscaled = train_unscaled[['Close']]
    test_unscaled = test_unscaled[['Close']]

    train_unscaled.to_csv("Output/train_unscaled.csv")
    train_scaled.to_csv("Output/train_scaled.csv")
    print('Train data saved.')
    test_unscaled.to_csv("Output/test_unscaled.csv")
    test_scaled.to_csv("Output/test_scaled.csv")
    print('Test data saved.')
    return train_scaled, test_scaled, train_unscaled, test_unscaled
        
    # except:
    #     # throw error
    #     print("No pre-collected data found. Please run the data collection script first.")
    #     exit()

In [None]:
historic_data = pd.read_csv("Datasets/eth_historical_data_simple.csv", index_col=0)

historic_data.head()

In [None]:
def main():

    INITIAL_INVESTMENT = 1000
    NUM_EPISODES = 1000
    START_DATE = "2017-01-01-00-00"
    END_DATE = "2020-01-01-00-00"
    TESTING_SPLIT = "2022-01-01-00:00:00" # formatting is different here because of how historic_crypto indexes dataframes

    # train_df, test_df = get_crypto_data(START_DATE, END_DATE, TESTING_SPLIT)
    train_df, test_df, train_close, test_close = get_precollected_data(split_ratio=0.8)
    trading_agent = Agent(train_df.shape, NUM_EPISODES, window_size=48, gamma=0.95, update_interval=96, lr=0.01, min_epsilon=0.02)
    
    episode_mem = [{"Actions": [], "Eth Held": [], "Cash Held": [], "Portfolio Value": [], "Reward": [], "Done": [], "Epsilon": [], "MSE Loss": []} for i in range(NUM_EPISODES)]
    t0 = time()

    ######################## Training ########################
    for s in range(NUM_EPISODES):
        print(f"\n===== Episode {s + 1} / {NUM_EPISODES} =====")
        state = trading_agent.get_state(0, train_df)
        cash_balance = INITIAL_INVESTMENT
        portfolio_value_usd = INITIAL_INVESTMENT
        # Reset the agent's portfolio at the beginning of each episode
        trading_agent.portfolio = [0 , INITIAL_INVESTMENT, portfolio_value_usd]

        done = False
        for t in range(len(train_df) - 1):
            if done:
                break
            action = trading_agent.get_action(state, cash_balance)
            next_state = trading_agent.get_state(t + 1, train_df)
            reward = trading_agent.trade(t, action, train_df, train_close, INITIAL_INVESTMENT, trading_fee_rate = 0.05)
            eth_held, cash_held, new_portfolio_value = trading_agent.portfolio
            cash_balance = cash_held  # update cash balance
            portfolio_value_usd = new_portfolio_value  # update portfolio value
            
            if t != 0:  # if not the first trade
                done = cash_balance <= 1 and eth_held <= 0.01
            trading_agent.memory.add_exp(state, action, reward, next_state, done)
            loss = trading_agent.train()
            if not loss:
                loss = 0
            state = next_state
            
            episode_mem[s]["Actions"].append(int(action))
            episode_mem[s]["Eth Held"].append(float(eth_held))
            episode_mem[s]["Cash Held"].append(round(float(cash_balance), 2))
            episode_mem[s]["Portfolio Value"].append(float(portfolio_value_usd))
            episode_mem[s]["Reward"].append(float(reward))
            episode_mem[s]["Done"].append(bool(done))
            episode_mem[s]["Epsilon"].append(trading_agent.epsilon)
            episode_mem[s]["MSE Loss"].append(float(loss))

            if t % 100 == 0:
                print(f"Time step {t} / {len(train_df)}   |  Eth Held: {round(episode_mem[s]['Eth Held'][t], 7)}  |  Cash Held: {round(episode_mem[s]['Cash Held'][t], 2)}  |  Portfolio Value: {round(episode_mem[s]['Portfolio Value'][t], 3)}  |   MSE Loss: {round(episode_mem[s]['MSE Loss'][t], 3)}")

    with open('Output/training_scores.out', 'a') as f:
            f.write(f"EPISODE {s} (runtime: {time() - t0})   | Portfolio Value is {round(episode_mem[s]['Portfolio Value'][-1], 3)} Epsilon is {round(trading_agent.epsilon, 3)}   |   MSE Loss is {round(episode_mem[s]['MSE Loss'][-1], 3)}\n")

    with open('Output/episode_mem.json', 'w') as f:
        json.dump(episode_mem, f)

    ######################## Testing ########################
######################## Testing ########################
    t0 = time()
    testing_mem = {"Actions": [], "Eth Held": [], "Cash Held": [], "Portfolio Value": [], "Reward": [], "Done": []}
    trading_agent.epsilon = 0
    state = trading_agent.get_state(0, test_df)
    cash_balance = INITIAL_INVESTMENT
    portfolio_value_usd = INITIAL_INVESTMENT
    # Reset the agent's portfolio at the beginning of each episode
    trading_agent.portfolio = [0 , INITIAL_INVESTMENT, portfolio_value_usd]
    # Reset the agent's portfolio at the beginning of each episode

    done = False
    for t in range(len(test_df) - 1):
        if done:
            break
        action = trading_agent.get_action(state, cash_balance)
        next_state = trading_agent.get_state(t + 1, test_df)
        reward = trading_agent.trade(t, action, test_df, test_close, INITIAL_INVESTMENT, trading_fee_rate = 0.05)
        eth_held, cash_held, new_portfolio_value  = trading_agent.portfolio
        cash_balance = cash_held  # update cash balance
        portfolio_value_usd = new_portfolio_value  # update portfolio value
        if t != 0:  # if not the first trade
                done = cash_balance <= 1 and eth_held <= 0.01
        state = next_state

        testing_mem["Actions"].append(int(action))
        testing_mem["Eth Held"].append(float(eth_held))
        testing_mem["Cash Held"].append(round(float(cash_balance),2))
        testing_mem["Portfolio Value"].append(float(portfolio_value_usd))
        testing_mem["Reward"].append(float(reward))
        testing_mem["Done"].append(bool(done))

        if t % 1 == 0:
            print(f"Time step {t} / {len(test_df)}   |   Eth Held: {round(testing_mem['Eth Held'][t], 7)}  |  Cash Held: {round(testing_mem['Cash Held'][t], 2)}  |  Portfolio Value: {round(testing_mem['Portfolio Value'][t], 3)}")

    with open('Output/testing_scores.out', 'a') as f:
        f.write(f"TESTING (runtime: {time() - t0})   |  Portfolio Value is {round(testing_mem['Portfolio Value'][-1], 3)}\n")

    with open('Output/testing_mem.json', 'w') as f:
        json.dump(testing_mem, f)

    trading_agent.save_model()

if __name__ == "__main__":
    main()

In [None]:
import matplotlib.pyplot as plt

train_df, test_df, train_close, test_close = get_precollected_data(split_ratio=0.8)

def plot_data(data, title):
    plt.figure(figsize=(12, 6))
    plt.plot(train_df["Close"], label="Ethereum Price")
    plt.plot(data["Portfolio Value"], label="Portfolio Value")
    plt.xlabel("Time Steps")
    plt.ylabel("Value")
    plt.title(title)
    plt.legend()
    plt.grid()
    plt.show()
# visualize training data
with open('Output/episode_mem.json', 'r') as f:
    episode_mem = json.load(f)

with open("Output/testing_mem.json", "r") as f:
    testing_mem = json.load(f)

plot_data(episode_mem[29], "Training Data")
plt.show()

plt.plot((testing_mem["Portfolio Value"]))
plt.plot(test_df['Close'])
plt.legend(["Ethereum Price", "Average Portfolio Value"])
plt.xlabel("Time")
plt.ylabel("Price")
plt.show()
# plot ethereum price history
# set plot width and height
plt.figure(figsize=(16, 8))
plt.plot(historic_data["Close"])
plt.title("Ethereum Price History")
plt.xlabel("Time")
plt.ylabel("Price")
plt.show()

# # plot average portfolio value
# for i in range(NUM_EPISODES):
#     plt.plot(episode_mem[i]["Portfolio Value"])
# plt.legend(["Ethereum Price", "Average Portfolio Value"])
# plt.xlabel("Time")
# plt.ylabel("Price")
# plt.show()

# for i in range(NUM_EPISODES):
#     plt.plot(episode_mem[i]["Portfolio Value"])
# plt.title("Portfolio Value")
# plt.show()

for i in range(NUM_EPISODES):
    plt.plot(episode_mem[i]["Realized Profit"])
plt.title("Realized Profit")
plt.show()

plt.plot(episode_mem[0]["Realized Profit"])
plt.title("Realized Profit")
plt.show()

plt.plot(episode_mem[29]["Realized Profit"])
plt.title("Realized Profit")
plt.show()

# for i in range(NUM_EPISODES):
#     plt.plot(episode_mem[i]["Reward"])
# plt.title("Reward")
# plt.show()

# for i in range(NUM_EPISODES):
#     plt.plot(episode_mem[i]["Epsilon"])
# plt.title("Epsilon")
# plt.show()

# for i in range(NUM_EPISODES):
#     plt.plot(episode_mem[i]["MSE Loss"])
# plt.title("MSE Loss")
# plt.show()

# # visualize testing data
# with open('testing_mem.json', 'r') as f:
#     testing_mem = json.load(f)

# # plot portfolio value vs ethereum price
# plt.plot(testing_mem["Portfolio Value"])
# plt.title("Portfolio Value")
# plt.show()

# plt.plot(testing_mem["Realized Profit"])
# plt.title("Realized Profit")
# plt.show()

# plt.plot(testing_mem["Reward"])
# plt.title("Reward")
# plt.show()