In [4]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import plotly.express as px

# matplotlib.use('Agg')
import datetime

%matplotlib inline
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3.common.logger import configure
from finrl.meta.data_processor import DataProcessor

from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline
from pprint import pprint

import sys
sys.path.append("../FinRL")

import itertools



<a id='1.4'></a>
## 2.4. Create Folders

In [5]:
from finrl import config
from finrl import config_tickers
import os
from finrl.main import check_and_make_directories
from finrl.config import (
    DATA_SAVE_DIR,
    TRAINED_MODEL_DIR,
    TENSORBOARD_LOG_DIR,
    RESULTS_DIR,
    INDICATORS,
    TRAIN_START_DATE,
    TRAIN_END_DATE,
    TEST_START_DATE,
    TEST_END_DATE,
    TRADE_START_DATE,
    TRADE_END_DATE,
)
check_and_make_directories([DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR, RESULTS_DIR])



In [6]:
TRAIN_START_DATE = '2001-01-01'
TRAIN_END_DATE = '2019-05-31'
TRADE_START_DATE = '2019-06-01'
TRADE_END_DATE = '2019-11-30'

In [7]:
#selected_companies_list = ["EC", "CIB", "ARGO"]
selected_companies_list = ["ARGO"]


df = YahooDownloader(
    start_date = TRAIN_START_DATE,
    end_date = TRADE_END_DATE,
    ticker_list = selected_companies_list, 
).fetch_data()

[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (4758, 8)


In [8]:
df.sort_values(['date','tic'],ignore_index=True).head()

Unnamed: 0,date,open,high,low,close,volume,tic,day
0,2001-01-02,20.477472,20.845331,20.109613,14.046654,45874,ARGO,1
1,2001-01-03,20.017649,20.600092,19.680445,14.003822,48626,ARGO,2
2,2001-01-04,19.7111,20.232233,19.189966,13.661223,51480,ARGO,3
3,2001-01-05,19.006039,19.741755,19.006039,13.318616,20694,ARGO,4
4,2001-01-08,19.619137,19.619137,19.067347,13.618401,6524,ARGO,0


In [36]:

prices = pd.DataFrame(
    {
        "Bancolombia": df.query("tic == 'CIB'")["close"],
        "Ecopetrol": df.query("tic == 'EC'")["close"],
        "Cementos Argos": df.query("tic == 'ARGO'")["close"]

    }
)

prices.index = pd.to_datetime(df.date)
fig = px.area(
    data_frame=prices,
    title = "Precio Cierre Acciones "
)
fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=10, label="10 Years", step="year", stepmode="backward"),
            dict(count=5, label="5 Years", step="year", stepmode="backward"),
            dict(count=1, label="1 Year", step="year", stepmode="backward"),
            dict(step="all")
        ])
    )
)
fig.show()

# Part 4: Preprocess Data
We need to check for missing data and do feature engineering to convert the data point into a state.
* **Adding technical indicators**. In practical trading, various information needs to be taken into account, such as historical prices, current holding shares, technical indicators, etc. Here, we demonstrate two trend-following technical indicators: MACD and RSI.
* **Adding turbulence index**. Risk-aversion reflects whether an investor prefers to protect the capital. It also influences one's trading strategy when facing different market volatility level. To control the risk in a worst-case scenario, such as financial crisis of 2007–2008, FinRL employs the turbulence index that measures extreme fluctuation of asset price.

In [10]:
fe = FeatureEngineer(
                    use_technical_indicator=True,
                    tech_indicator_list = INDICATORS,
                    use_vix=True,
                    use_turbulence=True,
                    user_defined_feature = False)

processed = fe.preprocess_data(df)

Successfully added technical indicators
[*********************100%***********************]  1 of 1 completed
Shape of DataFrame:  (4757, 8)
Successfully added vix
Successfully added turbulence index


In [11]:
list_ticker = processed["tic"].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))

processed_full = pd.DataFrame(combination,columns=["date","tic"]).merge(processed,on=["date","tic"],how="left")
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date','tic'])

processed_full = processed_full.fillna(0)

processed_full.sort_values(['date','tic'],ignore_index=True).head()

Unnamed: 0,date,tic,open,high,low,close,volume,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence
0,2001-01-02,ARGO,20.477472,20.845331,20.109613,14.046654,45874.0,1.0,0.0,14.085811,13.964665,0.0,-66.666667,100.0,14.046654,14.046654,29.99,0.0
1,2001-01-03,ARGO,20.017649,20.600092,19.680445,14.003822,48626.0,2.0,-0.000961,14.085811,13.964665,0.0,-66.666667,100.0,14.025238,14.025238,26.6,0.0
2,2001-01-04,ARGO,19.7111,20.232233,19.189966,13.661223,51480.0,3.0,-0.011848,14.326404,13.481395,0.0,-100.0,100.0,13.9039,13.9039,26.969999,0.0
3,2001-01-05,ARGO,19.006039,19.741755,19.006039,13.318616,20694.0,4.0,-0.02888,14.436964,13.078194,0.0,-99.451408,100.0,13.757579,13.757579,28.67,0.0
4,2001-01-08,ARGO,19.619137,19.619137,19.067347,13.618401,6524.0,0.0,-0.025597,14.331133,13.128354,30.287366,-67.141332,100.0,13.729743,13.729743,29.84,0.0


In [12]:
mvo_df = processed_full.sort_values(['date','tic'],ignore_index=True)[['date','tic','close']]

<a id='4'></a>
# Part 5. Build A Market Environment in OpenAI Gym-style
The training process involves observing stock price change, taking an action and reward's calculation. By interacting with the market environment, the agent will eventually derive a trading strategy that may maximize (expected) rewards.

Our market environment, based on OpenAI Gym, simulates stock markets with historical market data.

## Data Split
We split the data into training set and testing set as follows:

Training data period: 2009-01-01 to 2020-07-01

Trading data period: 2020-07-01 to 2021-10-31


In [13]:
train = data_split(processed_full, TRAIN_START_DATE,TRAIN_END_DATE)
trade = data_split(processed_full, TRADE_START_DATE,TRADE_END_DATE)
print(len(train))
print(len(trade))

4630
126


In [14]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 1, State Space: 11


In [15]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension

env_kwargs = {
    "hmax": 100_000,
    "initial_amount": 1_000_000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4
}

timesteps_value = 10_000

e_train_gym = StockTradingEnv(df = train, **env_kwargs)

In [16]:
num_stock_shares

[0]

## Environment for Training



In [17]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


<a id='5'></a>
# Part 6: Train DRL Agents
* The DRL algorithms are from **Stable Baselines 3**. Users are also encouraged to try **ElegantRL** and **Ray RLlib**.
* FinRL includes fine-tuned standard DRL algorithms, such as DQN, DDPG, Multi-Agent DDPG, PPO, SAC, A2C and TD3. We also allow users to
design their own DRL algorithms by adapting these DRL algorithms.

### Agent Training: 5 algorithms (A2C, DDPG, PPO, TD3, SAC)


### Agent 1: A2C


In [18]:
agent = DRLAgent(env = env_train)
model_a2c = agent.get_model("a2c")

# set up logger
tmp_path = RESULTS_DIR + '/a2c'
new_logger_a2c = configure(tmp_path, ["stdout", "csv", "tensorboard"])
# Set new logger
model_a2c.set_logger(new_logger_a2c)

trained_a2c = agent.train_model(model=model_a2c,
                             tb_log_name='a2c',
                             total_timesteps=timesteps_value)

{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}
Using cpu device
Logging to results/a2c
--------------------------------------
| time/                 |            |
|    fps                | 640        |
|    iterations         | 100        |
|    time_elapsed       | 0          |
|    total_timesteps    | 500        |
| train/                |            |
|    entropy_loss       | -1.45      |
|    explained_variance | 1.19e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 99         |
|    policy_loss        | 2.98       |
|    reward             | 0.07475842 |
|    std                | 1.03       |
|    value_loss         | 13.6       |
--------------------------------------
--------------------------------------
| time/                 |            |
|    fps                | 711        |
|    iterations         | 200        |
|    time_elapsed       | 1          |
|    total_timesteps    | 1000       |
| train/                |            |
|    

### Agent 2: DDPG

### Agent 3: PPO

Testing

In [20]:
e_trade_gym = StockTradingEnv(df = trade, turbulence_threshold = 70,risk_indicator_col='vix', **env_kwargs)

df_account_value_a2c, df_actions_a2c = DRLAgent.DRL_prediction(
    model=trained_a2c,
    environment = e_trade_gym)



hit end!


In [22]:
df_result_a2c = df_account_value_a2c.set_index(df_account_value_a2c.columns[0])
#df_result_ddpg = df_account_value_ddpg.set_index(df_account_value_ddpg.columns[0])
#df_result_ppo = df_account_value_ppo.set_index(df_account_value_ppo.columns[0])

#result = pd.merge(df_result_a2c, df_result_ddpg, left_index=True, right_index=True)
#result = pd.merge(result, df_result_ppo, left_index=True, right_index=True)
#result.columns = ['A2C', 'DDPG', 'PPO']
result = df_result_a2c
print("result: ", result.head())

result:              account_value
date                     
2019-06-03   1.000000e+06
2019-06-04   1.014236e+06
2019-06-05   1.015517e+06
2019-06-06   1.031892e+06
2019-06-07   1.044706e+06


In [23]:
import plotly.express as px

fig = px.line(
  data_frame=result,
  title = "Resultado Portafolio Trading Bot Argos"
)
fig.show()

In [31]:
print(len(df_account_value_a2c))
print(len(df_actions_a2c))

126
125


In [30]:
df_actions_a2c.actions.values

array([array([15815]), array([0]), array([0]), array([0]), array([0]),
       array([0]), array([0]), array([0]), array([0]), array([0]),
       array([0]), array([0]), array([0]), array([0]), array([0]),
       array([0]), array([0]), array([0]), array([0]), array([0]),
       array([0]), array([0]), array([0]), array([0]), array([0]),
       array([0]), array([0]), array([0]), array([0]), array([0]),
       array([0]), array([0]), array([0]), array([0]), array([0]),
       array([0]), array([0]), array([0]), array([0]), array([0]),
       array([0]), array([0]), array([0]), array([0]), array([0]),
       array([0]), array([0]), array([0]), array([0]), array([0]),
       array([0]), array([0]), array([0]), array([0]), array([0]),
       array([0]), array([0]), array([0]), array([0]), array([0]),
       array([0]), array([0]), array([0]), array([0]), array([0]),
       array([0]), array([0]), array([0]), array([0]), array([0]),
       array([0]), array([0]), array([0]), array([0]), arr