<a href="https://colab.research.google.com/github/icarus3/4castr/blob/master/Untitled2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensortrade.env.default as default
from tensortrade.feed.core import Stream, DataFeed
from tensortrade.oms.exchanges import Exchange, ExchangeOptions
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.instruments import USD, BTC
from tensortrade.oms.wallets import Wallet, Portfolio
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from stable_baselines3 import A2C, PPO
import torch, random, optuna
from ray import tune
import ray, pickle
from ray.tune.search.optuna import OptunaSearch
from ray.tune.search import ConcurrencyLimiter
from stable_baselines3.common.env_util import make_vec_env
import copy
# from sb3_contrib import RecurrentPPO
# from stable_baselines3.common.evaluation import evaluate_policy

seed = 42
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)


def ada_df():
    df = pd.read_csv("data/ada_hourly.csv")

    df.dropna(inplace=True)
    df.drop_duplicates(inplace=True)

    columns_to_copy = ['date', 'open', 'high', 'low', 'close', 'volume', 'trend_macd_diff', 'momentum_rsi']

    return df[columns_to_copy]


def get_feed(df):
    scaler = MinMaxScaler()

    features = []
    for c in df.columns[1:]:
        s = Stream.source(list(df[c]), dtype="float").rename(df[c].name)
        features += [s]

    cp = Stream.select(features, lambda s: s.name == "close")
    p = Stream.select(features, lambda s: s.name == "close")

    new_features = [
        cp.log().diff().fillna(0).rename("lr"),
    ]

    selected_feature_name = ['open', 'high', 'low', 'volume', 'trend_macd_diff', 'momentum_rsi']
    selected_feature = df[selected_feature_name]

    # Fit and transform the selected features
    selected_feature_scaled_array = scaler.fit_transform(selected_feature)

    # Convert scaled features ndarray back to DataFrame
    selected_feature_scaled = pd.DataFrame(selected_feature_scaled_array, columns=selected_feature_name)

    # for k in selected_feature_name:
    #    new_features.append(Stream.source(list(selected_feature_scaled[k]), dtype="float").rename(k))

    feed = DataFeed(new_features)
    feed.compile()

    return feed, p


def get_env(df, feed, p):
    bitstamp = Exchange("bitstamp", service=execute_order, options=ExchangeOptions(commission=0.003))(
        Stream.source(list(df["close"]), dtype="float").rename("USD-BTC")
    )

    cash = Wallet(bitstamp, 10000 * USD)
    asset = Wallet(bitstamp, 0 * BTC)

    portfolio = Portfolio(USD, [
        cash,
        asset
    ])

    reward_scheme = default.rewards.PBR(price=p)
    action_scheme = default.actions.BSH(
        cash=cash,
        asset=asset
    ).attach(reward_scheme)

    renderer_feed = DataFeed([
        Stream.source(list(df["date"])).rename("date"),
        Stream.source(list(df["open"]), dtype="float").rename("open"),
        Stream.source(list(df["high"]), dtype="float").rename("high"),
        Stream.source(list(df["low"]), dtype="float").rename("low"),
        Stream.source(list(df["close"]), dtype="float").rename("close"),
        Stream.source(list(df["volume"]), dtype="float").rename("volume")
    ])

    env = default.create(
        portfolio=portfolio,
        action_scheme=action_scheme,
        reward_scheme=reward_scheme,
        feed=feed,
        renderer_feed=renderer_feed,
        renderer=default.renderers.PlotlyTradingChart(save_format="html"),
        window_size=24,
        max_allowed_loss=0.5
    )
    return env


df_train = ada_df()
#train_size = 0.9
#train_end = int(len(df_train) * train_size)
train_end = df_train
raw_df = df_train#df_train[:train_end]

def sample_steps_batch_size():
    n_steps = tune.randint(64, 2048)
    n_steps_int = n_steps
    factors = [i for i in range(1, n_steps_int + 1) if n_steps_int % i == 0]
    batch_size = tune.choice(factors)
    return {"n_steps": n_steps, "batch_size": batch_size}

def objective2_test(config):
    feed, p = get_feed(raw_df)
    env = get_env(raw_df, feed, p)
    obs, _ = env.reset()

    # Define and train the agent
    model = PPO("MlpPolicy",
                env,
                verbose=1,
                seed=42,
                n_steps=int(config["batch_size"] * 2),
                batch_size=config["batch_size"],
                n_epochs=config["n_epochs"],
                gae_lambda=config["gae_lambda"],
                clip_range=config["clip_range"],
                max_grad_norm=config["max_grad_norm"],
                learning_rate=config["learning_rate"],
                gamma=config["gamma"],
                ent_coef=config["ent_coef"],
                vf_coef=config["vf_coef"])

    model.load("sb3-exp.ppo")
    obs, _ = env.reset()
    done = False
    rewards_array = []
    while not done:
        action, _states = model.predict(obs, deterministic=True)
        #print(action)
        obs, rewards, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        if not done:
            rewards_array.append(rewards)

    print({"mean_reward": sum(rewards_array) / len(rewards_array)})
    env.render()

def objective2(config):
    # Create environment

    feed, p = get_feed(raw_df)
    env = get_env(raw_df, feed, p)
    obs, _ = env.reset()

    def make_env():
        return copy.deepcopy(env)

    env = make_vec_env(make_env, n_envs=25)

    # Define and train the agent
    model = PPO("MlpPolicy",
                env,
                verbose=1,
                seed=42,
                n_steps=int(config["batch_size"] * 2),
                batch_size=config["batch_size"],
                n_epochs=config["n_epochs"],
                gae_lambda=config["gae_lambda"],
                clip_range=config["clip_range"],
                max_grad_norm=config["max_grad_norm"],
                learning_rate=config["learning_rate"],
                gamma=config["gamma"],
                ent_coef=config["ent_coef"],
                vf_coef=config["vf_coef"])

    model.learn(total_timesteps=100000000)
    model.save("sb3-exp.ppo")

params={'learning_rate': 0.006140282577637256, 'batch_size': 1024, 'n_epochs': 29, 'gamma': 0.9432934745064481, 'gae_lambda': 0.9475058611459596, 'ent_coef': 0.10282313764715967, 'vf_coef': 0.2803686116565862, 'clip_range': 0.12024175041027786, 'max_grad_norm': 0.15292983461339837}
objective2(params)
objective2_test(params)
"""
ray.init()

search_space = {
    "learning_rate": tune.uniform(1e-5, 1e-2),
    "batch_size": tune.choice([64, 128, 256, 512, 1024, 2048]),
    "n_epochs": tune.randint(5, 30),
    "gamma": tune.uniform(0.94, 0.99),
    "gae_lambda": tune.uniform(0.94, 0.99),
    "ent_coef": tune.uniform(0.01, 0.3),
    "vf_coef": tune.uniform(0.01, 0.3),
    "clip_range": tune.uniform(0.01, 0.3),
    "max_grad_norm": tune.uniform(0.1, 0.9),
}

algo = OptunaSearch()
algo = ConcurrencyLimiter(algo, max_concurrent=30)
num_samples = 200

tuner = tune.Tuner(
    objective2,
    tune_config=tune.TuneConfig(
        metric="mean_reward",
        mode="max",
        search_alg=algo,
        num_samples=num_samples,
    ),
    param_space=search_space,
)
results = tuner.fit()
print(results)

with open("results_sb3_ppo_pbr_exp.pkl", "wb") as f:
    pickle.dump(results, f)


study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

best_params = study.best_params
print("Best hyperparameters:", best_params)
"""

"""
raw_df = ada_df()
feed, p = get_feed(raw_df)
env = get_env(raw_df, feed, p)

obs, _ = env.reset()

model = A2C("MlpPolicy", env=env, verbose=1, seed=seed, learning_rate=0.00007, ent_coef=0.01)
model.learn(total_timesteps=1000000)

obs, _ = env.reset()

done = False
while not done:
    action, _states = model.predict(obs, deterministic=True)
    obs, rewards, terminated, truncated, info = env.step(action)
    done = terminated or truncated

env.render()
"""


ModuleNotFoundError: No module named 'tensortrade'