##Installing Requirements

In [None]:
# Installing finRL
!pip install swig
!pip install wrds
!pip install pyportfolioopt
!pip install git+https://github.com/AI4Finance-Foundation/FinRL.git

# Installing Talib
url = 'https://anaconda.org/conda-forge/libta-lib/0.4.0/download/linux-64/libta-lib-0.4.0-h166bdaf_1.tar.bz2'
!curl -L $url | tar xj -C /usr/lib/x86_64-linux-gnu/ lib --strip-components=1
url = 'https://anaconda.org/conda-forge/ta-lib/0.4.19/download/linux-64/ta-lib-0.4.19-py310hde88566_4.tar.bz2'
!curl -L $url | tar xj -C /usr/local/lib/python3.10/dist-packages/ lib/python3.10/site-packages/talib --strip-components=3

#Installing Alphavantage
!pip install alpha_vantage

## Importing neccessary libraries and dependencies

In [None]:
# Standard libraries
import pandas as pd
import numpy as np
import datetime
import yfinance as yf

#FinRl requirements
from finrl import config_tickers
from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.main import check_and_make_directories
from finrl.meta.env_stock_trading.env_stocktrading import StockTradingEnv
from stable_baselines3.common import utils
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.config import INDICATORS, TRAINED_MODEL_DIR, RESULTS_DIR

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

check_and_make_directories([TRAINED_MODEL_DIR])
import itertools
import talib

#Other dependecies
from alpha_vantage.fundamentaldata import FundamentalData
from stable_baselines3 import DDPG, PPO
from plotnine import *
import seaborn as sns
from pypfopt.efficient_frontier import EfficientFrontier
from stable_baselines3.common.logger import configure
from stable_baselines3 import DDPG, PPO
import plotly.graph_objs as go
from finrl import config_tickers

#Portfolio environment requirements
import os
from gym.utils import seeding
import gym
from gym import spaces
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from stable_baselines3.common.vec_env import DummyVecEnv

##Data Preprocessor Class:
* Download the required data for the train and test period for all the DOW_30 tickers.
* Calculate corresponding technical indicators as well as fundamental data using Alpha vatage.
* Preprocess and normalize the data

In [None]:
class DataProcessor:
    def __init__(self, Train_start, Test_start, Train_end, Test_end, config_tickers):
        """
        Initialize DataProcessor with start and end dates for training and testing periods,
        along with a configuration of tickers.

        Args:
        - Train_start (str): Start date for the training period.
        - Test_start (str): Start date for the testing period.
        - Train_end (str): End date for the training period.
        - Test_end (str): End date for the testing period.
        - config_tickers (object): Object containing ticker configuration.
        """
        self.Train_start = Train_start
        self.Test_start = Test_start
        self.Train_end = Train_end
        self.Test_end = Test_end
        self.config_tickers = config_tickers
        self.yahoo_downloader = YahooDownloader(start_date=self.Train_start,
                                                end_date=self.Test_end,
                                                ticker_list=self.config_tickers.DOW_30_TICKER)
        self.df_temp = None
        self.fundamental_data = None
        self.df_new = None

    def fetch_data(self):
        """
        Fetch data from Yahoo Finance API and calculate volatility.
        """
        self.df_temp = self.yahoo_downloader.fetch_data()
        self.df_temp['return'] = self.df_temp.groupby('tic')['close'].pct_change()
        df_sorted = self.df_temp.sort_values(by='date', ascending=False)
        recent_returns = df_sorted.groupby('tic').head(14)
        volatility_per_symbol = recent_returns.groupby('tic')['return'].std()
        self.df_temp = self.df_temp.merge(volatility_per_symbol, on='tic', suffixes=('', '_volatility'))
        self.df_temp.rename(columns={'return_volatility': 'volatility'}, inplace=True)

    def preprocess_data(self):
        """
        Preprocess data including feature engineering and technical indicators calculation.
        """
        fe = FeatureEngineer(
            use_technical_indicator=True,
            tech_indicator_list=INDICATORS,
            use_vix=True,
            use_turbulence=True,
            user_defined_feature=False
        )
        self.df_new = fe.preprocess_data(self.df_temp)

        sar_values = self.df_new.groupby('tic').apply(lambda x: talib.SAR(x['high'], x['low']))
        adx_values = self.df_new.groupby('tic').apply(lambda x: talib.ADX(x['high'], x['low'], x['close']))

        self.df_new['sar'] = sar_values.values
        self.df_new['adx'] = adx_values.values
        self.df_new['true_range'] = np.nan
        for ticker in self.df_new['tic'].unique():
            mask = self.df_new['tic'] == ticker
            high = self.df_new.loc[mask, 'high']
            low = self.df_new.loc[mask, 'low']
            close_prev = self.df_new.loc[mask, 'close'].shift(1)

            # Calculate true range
            true_range = np.maximum.reduce([
                high - low,
                abs(high - close_prev),
                abs(low - close_prev)
            ])
            # Assign true range values to the corresponding rows
            self.df_new.loc[mask, 'true_range'] = true_range


    def fetch_fundamental_data(self):
        """
        Fetch fundamental data using Alpha Vantage API.
        """
        api_key = 'AI9IKPBRI8DXDXPI'
        fd = FundamentalData(key=api_key)
        fundamental_data = {}
        for ticker in self.config_tickers.DOW_30_TICKER:
            overview_data, meta_data = fd.get_company_overview(symbol=ticker)
            fundamental_data[ticker] = {
                'book_value_per_share': float(overview_data['BookValue']),
                'sales_per_share': float(overview_data['RevenuePerShareTTM'])
            }
        self.fundamental_data = fundamental_data

    def calculate_ratios(self, row):
        """
        Calculate price-to-book and price-to-sales ratios for each row.
        """
        ticker_symbol = row['tic']
        if ticker_symbol in self.fundamental_data:
            book_value_per_share = self.fundamental_data[ticker_symbol]['book_value_per_share']
            sales_per_share = self.fundamental_data[ticker_symbol]['sales_per_share']
            row['pb_ratio'] = row['close'] / book_value_per_share
            row['ps_ratio'] = row['close'] / sales_per_share
        else:
            row['pb_ratio'] = float('nan')
            row['ps_ratio'] = float('nan')
        return row

    def process_data(self):
        """
        Process data including fetching, preprocessing, and calculating ratios.
        """
        self.fetch_data()
        self.preprocess_data()
        self.fetch_fundamental_data()
        self.df_new = self.df_new.apply(self.calculate_ratios, axis=1)
        self.df_new['pb_ratio'] = np.abs(self.df_new['pb_ratio'])
        #self.df_new.drop(['vix', 'turbulence'], axis=1, inplace=True)
        list_ticker = self.df_new["tic"].unique().tolist()
        list_date = list(pd.date_range(self.df_new['date'].min(), self.df_new['date'].max()).astype(str))
        combination = list(itertools.product(list_date, list_ticker))
        processed_full = pd.DataFrame(combination, columns=["date", "tic"]).merge(self.df_new, on=["date", "tic"],
                                                                                    how="left")
        processed_full = processed_full[processed_full['date'].isin(self.df_new['date'])]
        processed_full = processed_full.sort_values(['date', 'tic'])
        processed_full.fillna(0, inplace=True)
        df_normalized = self.df_new.copy()

        def normalize_by_std(x):
            if x.dtype == 'float64':
                return x / x.std()
            else:
                return x

        cols_to_normalize = df_normalized.columns.difference(['date', 'tic'])
        df_normalized[cols_to_normalize] = self.df_new.groupby('tic')[cols_to_normalize].transform(normalize_by_std)

        return processed_full, df_normalized

## Funtion to add covariance for the StockPortfolio Environment and cleaning the preprocessed normalized data

In [None]:
def preprocess_dataframe(df_normalized):
    df_normalized = df_normalized.replace([np.inf, -np.inf], np.nan).fillna(0)
    df_normalized = df_normalized.sort_values(['date', 'tic'], ignore_index=True)
    df_normalized.index = df_normalized.date.factorize()[0]

    cov_list_normalized = []
    lookback = 252  # Look back is one year

    # Calculate covariance matrix for each date
    for i in range(lookback, len(df_normalized.index.unique())):
        data_lookback = df_normalized.loc[i - lookback:i, :]
        price_lookback = data_lookback.pivot_table(index='date', columns='tic', values='close')
        return_lookback = price_lookback.pct_change().dropna()
        covs = return_lookback.cov().values
        cov_list_normalized.append(covs)

    # Create dataframe for covariance list
    df_cov_normalized = pd.DataFrame({'date': df_normalized.date.unique()[lookback:], 'cov_list': cov_list_normalized})

    # Merge covariance dataframe with original dataframe
    df_normalized = df_normalized.merge(df_cov_normalized, on='date')
    df_normalized = df_normalized.sort_values(['date', 'tic']).reset_index(drop=True)

    return df_normalized

##StockPortfolioEnv: The portfolio reallocation and reward calculation to take place here

In [None]:
class StockPortfolioEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self,
                df,
                stock_dim,
                hmax,
                initial_amount,
                transaction_cost_pct,
                reward_scaling,
                state_space,
                action_space,
                tech_indicator_list,
                turbulence_threshold=None,
                lookback=252,
                day=0):
        # Initialize environment parameters
        self.day = day
        self.lookback = lookback
        self.df = df
        self.stock_dim = stock_dim
        self.hmax = hmax
        self.initial_amount = initial_amount
        self.transaction_cost_pct = transaction_cost_pct
        self.reward_scaling = reward_scaling
        self.state_space = state_space
        self.action_space = action_space
        self.tech_indicator_list = tech_indicator_list

        # Define action space as a Box
        self.action_space = spaces.Box(low=0, high=1, shape=(self.action_space,))

        # Define observation space as a Box
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.state_space+len(self.tech_indicator_list), self.state_space))

        # Get initial data for the environment
        self.data = self.df.loc[self.day,:]
        self.covs = self.data['cov_list'].values[0]
        self.state =  np.append(np.array(self.covs), [self.data[tech].values.tolist() for tech in self.tech_indicator_list ], axis=0)
        self.terminal = False
        self.turbulence_threshold = turbulence_threshold
        self.portfolio_value = self.initial_amount
        self.asset_memory = [self.initial_amount]
        self.portfolio_return_memory = [0]
        self.actions_memory=[[1/self.stock_dim]*self.stock_dim]
        self.date_memory=[self.data.date.unique()[0]]

        # Create results directory if it doesn't exist
        if not os.path.exists("results"):
          os.mkdir("results")

    def step(self, actions):
        # Define the step function
        self.terminal = self.day >= len(self.df.index.unique())-1
        if self.terminal:
            # Calculate and plot cumulative rewards
            df = pd.DataFrame(self.portfolio_return_memory)
            df.columns = ['daily_return']
            plt.plot(df.daily_return.cumsum(),'r')
            plt.savefig('results/cumulative_reward.png')
            plt.close()

            # Plot rewards
            plt.plot(self.portfolio_return_memory,'r')
            plt.savefig('results/rewards.png')
            plt.close()

            # Print statistics
            print("=================================")
            print("begin_total_asset:{}".format(self.asset_memory[0]))
            print("end_total_asset:{}".format(self.portfolio_value))
            df_daily_return = pd.DataFrame(self.portfolio_return_memory)
            df_daily_return.columns = ['daily_return']
            if df_daily_return['daily_return'].std() != 0:
                sharpe = (252**0.5)*df_daily_return['daily_return'].mean() / df_daily_return['daily_return'].std()
                print("Sharpe: ",sharpe)
            print("=================================")

            return self.state, self.reward, self.terminal, {}
        else:
            # Perform actions and update state
            top_6_indices = np.argsort(actions)[-6:]
            new_actions = np.zeros_like(actions)
            top_6_weights = actions[top_6_indices]
            total_weight = np.sum(top_6_weights)
            if total_weight != 0:
                top_6_weights /= total_weight
            new_actions[top_6_indices] = top_6_weights
            weights = new_actions
            self.actions_memory.append(weights)
            last_day_memory = self.data
            self.day += 1
            self.data = self.df.loc[self.day,:]
            self.covs = self.data['cov_list'].values[0]
            self.state =  np.append(np.array(self.covs), [self.data[tech].values.tolist() for tech in self.tech_indicator_list ], axis=0)
            portfolio_return = sum(((self.data.close.values / last_day_memory.close.values)-1)*weights)
            new_portfolio_value = self.portfolio_value*(1+portfolio_return)
            self.portfolio_value = new_portfolio_value
            self.portfolio_return_memory.append(portfolio_return)
            self.date_memory.append(self.data.date.unique()[0])
            self.asset_memory.append(new_portfolio_value)
            self.reward = new_portfolio_value
        return self.state, self.reward, self.terminal, {}

    def reset(self):
        # Reset the environment
        self.asset_memory = [self.initial_amount]
        self.day = 0
        self.data = self.df.loc[self.day,:]
        self.covs = self.data['cov_list'].values[0]
        self.state =  np.append(np.array(self.covs), [self.data[tech].values.tolist() for tech in self.tech_indicator_list ], axis=0)
        self.portfolio_value = self.initial_amount
        self.terminal = False
        self.portfolio_return_memory = [0]
        self.actions_memory=[[1/self.stock_dim]*self.stock_dim]
        self.date_memory=[self.data.date.unique()[0]]
        return self.state

    def render(self, mode='human'):
        # Render the environment
        return self.state

    def softmax_normalization(self, actions):
        # Perform softmax normalization
        numerator = np.exp(actions)
        denominator = np.sum(np.exp(actions))
        softmax_output = numerator / denominator
        return softmax_output

    def save_asset_memory(self):
        # Save asset memory
        date_list = self.date_memory
        portfolio_return = self.portfolio_return_memory
        df_account_value = pd.DataFrame({'date':date_list, 'daily_return':portfolio_return})
        return df_account_value

    def save_action_memory(self):
        # Save action memory
        date_list = self.date_memory
        df_date = pd.DataFrame(date_list)
        df_date.columns = ['date']
        action_list = self.actions_memory
        df_actions = pd.DataFrame(action_list)
        df_actions.columns = self.data.tic.values
        df_actions.index = df_date.date
        return df_actions

    def _seed(self, seed=None):
        # Seed the environment
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def get_sb_env(self):
        # Get the environment
        e = DummyVecEnv([lambda: self])
        obs = e.reset()
        return e, obs


##StockTradingTrainer Class to train the 3 models, a2c (Just as a baseline), ddpg and ppo (since mentioned in the paper), Taking actions (weights) and daily return as the output

In [None]:
class StockTradingTrainer:
    def __init__(self, df_train, df_test, model_params):
        self.df_train = df_train
        self.df_test = df_test
        self.model_params = model_params

    def train_models(self):
        stock_dimension = len(self.df_train.tic.unique())
        state_space = stock_dimension

        env_kwargs = {
            "hmax": 100,
            "initial_amount": 1000000,
            "transaction_cost_pct": 0.001,
            "state_space": state_space,
            "stock_dim": stock_dimension,
            "tech_indicator_list": INDICATORS,
            "action_space": stock_dimension,
            "reward_scaling": 1e-4
        }

        # Create training environment
        e_train_gym = StockPortfolioEnv(df=self.df_train, **env_kwargs)
        env_train, _ = e_train_gym.get_sb_env()

        # Train A2C model
        agent = DRLAgent(env=env_train)
        A2C_PARAMS = self.model_params.get("A2C_PARAMS", {})
        model_a2c = agent.get_model(model_name="a2c", model_kwargs=A2C_PARAMS)
        trained_a2c = agent.train_model(model=model_a2c, tb_log_name='a2c', total_timesteps=60000)

        # Train DDPG model
        DDPG_PARAMS = self.model_params.get("DDPG_PARAMS", {})
        model_ddpg = agent.get_model(model_name="ddpg", model_kwargs=DDPG_PARAMS)
        trained_ddpg = agent.train_model(model=model_ddpg, tb_log_name='ddpg', total_timesteps=60000)

        # Train PPO model
        PPO_PARAMS = self.model_params.get("PPO_PARAMS", {})
        model_ppo = agent.get_model(model_name="ppo", model_kwargs=PPO_PARAMS)
        trained_ppo = agent.train_model(model=model_ppo, tb_log_name='ppo', total_timesteps=60000)

        # Create testing environment
        e_test_gym = StockPortfolioEnv(df=self.df_test, **env_kwargs)

        # Make predictions for A2C model
        df_daily_return_a2c, df_actions_a2c = DRLAgent.DRL_prediction(model=trained_a2c,
                                                                       environment=e_test_gym)

        # Make predictions for DDPG model
        df_daily_return_ddpg, df_actions_ddpg = DRLAgent.DRL_prediction(model=trained_ddpg,
                                                                         environment=e_test_gym)

        # Make predictions for PPO model
        df_daily_return_ppo, df_actions_ppo = DRLAgent.DRL_prediction(model=trained_ppo,
                                                                       environment=e_test_gym)

        return (df_daily_return_a2c, df_actions_a2c,
                df_daily_return_ddpg, df_actions_ddpg,
                df_daily_return_ppo, df_actions_ppo)

##MVO Class (used as a baseline)


In [None]:
class MVOAnalyzer:
    def __init__(self, df_train, df_test):
        """
        Initialize the MVOAnalyzer.

        Args:
        - df_train (DataFrame): DataFrame containing training data.
        - df_test (DataFrame): DataFrame containing test data.
        """
        self.df_train = df_train
        self.df_test = df_test

        # Initialize important DataFrames
        self.df_mvo_train = self.df_train.pivot(index="date", columns="tic", values="close")
        self.df_mvo_test = self.df_test.pivot(index="date", columns="tic", values="close")

    def calculate_mvo_results(self):
        """
        Calculate Mean-Variance Optimization (MVO) results.

        Returns:
        - df_mvo_results (DataFrame): DataFrame containing MVO daily portfolio values.
        """
        def calculate_stock_return(df_prices):
            num_days, num_assets = df_prices.shape
            stock_return = np.zeros([num_days - 1, num_assets])

            for j in range(num_assets):
                for i in range(num_days - 1):
                    stock_return[i, j] = ((df_prices.iloc[i + 1, j] - df_prices.iloc[i, j]) / df_prices.iloc[i, j]) * 100

            return stock_return

        portfolio_daily_returns = calculate_stock_return(self.df_mvo_train)
        mean_returns = np.mean(portfolio_daily_returns, axis=0)
        cov_returns = np.cov(portfolio_daily_returns, rowvar=False)

        ef_mean = EfficientFrontier(mean_returns, cov_returns, weight_bounds=(0, 0.5))
        raw_weights_mean = ef_mean.max_sharpe() # Maximize the Sharpe Ratio
        cleaned_weights_mean = ef_mean.clean_weights() # Round the weights neatly
        mvo_weights = 1_000_000 * np.array([weight for weight in cleaned_weights_mean.values()])

        last_asset_prices = 1 / self.df_mvo_train.tail(1).values[0]
        initial_testing_portfolio = np.multiply(mvo_weights, last_asset_prices)

        mvo_daily_portfolio_value = self.df_mvo_test @ initial_testing_portfolio
        df_mvo_results = pd.DataFrame(mvo_daily_portfolio_value, columns=["MVO"])
        df_mvo_results.index = pd.to_datetime(df_mvo_results.index)

        return df_mvo_results


In [None]:
# The DOW30 tickers
tickers = config_tickers.DOW_30_TICKER
tickers

['AXP',
 'AMGN',
 'AAPL',
 'BA',
 'CAT',
 'CSCO',
 'CVX',
 'GS',
 'HD',
 'HON',
 'IBM',
 'INTC',
 'JNJ',
 'KO',
 'JPM',
 'MCD',
 'MMM',
 'MRK',
 'MSFT',
 'NKE',
 'PG',
 'TRV',
 'UNH',
 'CRM',
 'VZ',
 'V',
 'WBA',
 'WMT',
 'DIS',
 'DOW']

In [None]:
#In-Built Indicators
INDICATORS

['macd',
 'boll_ub',
 'boll_lb',
 'rsi_30',
 'cci_30',
 'dx_30',
 'close_30_sma',
 'close_60_sma']

## The whole analysis using the above classes are done below

In [None]:
Train_start = '2010-01-01'
Train_end = '2019-12-31'
Test_start = '2020-06-01'
Test_end = '2021-06-01'
processor = DataProcessor(Train_start,Test_start, Train_end, Test_end,config_tickers)
processed_full, df_normalized = processor.process_data()

In [None]:
df_normalized = preprocess_dataframe(df_normalized)



In [None]:
df_train = data_split(df_normalized,Train_start,Train_end)
df_test = data_split(df_normalized,Test_start,Test_end)

In [None]:
# Define model parameters
model_params = {
    "A2C_PARAMS": {"n_steps": 5, "ent_coef": 0.005, "learning_rate": 0.0002},
    "DDPG_PARAMS": {"batch_size": 128, "buffer_size": 50000, "learning_rate": 0.0001},
    "PPO_PARAMS": {"ent_coef": 0.005, "learning_rate": 0.0002}
}

trainer = StockTradingTrainer(df_train, df_test, model_params)
(df_daily_return_a2c, df_actions_a2c,
 df_daily_return_ddpg, df_actions_ddpg,
 df_daily_return_ppo, df_actions_ppo) = trainer.train_models()



mvo_analyzer = MVOAnalyzer(df_train, df_test)
df_mvo_results = mvo_analyzer.calculate_mvo_results()



{'n_steps': 5, 'ent_coef': 0.005, 'learning_rate': 0.0002}
Using cpu device
-------------------------------------
| time/                 |           |
|    fps                | 300       |
|    iterations         | 100       |
|    time_elapsed       | 1         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -41.1     |
|    explained_variance | 0         |
|    learning_rate      | 0.0002    |
|    n_updates          | 99        |
|    policy_loss        | 1.5e+08   |
|    reward             | 1220219.5 |
|    std                | 0.997     |
|    value_loss         | 1.66e+13  |
-------------------------------------
-------------------------------------
| time/                 |           |
|    fps                | 299       |
|    iterations         | 200       |
|    time_elapsed       | 3         |
|    total_timesteps    | 1000      |
| train/                |           |
|    entropy_loss       | -41.1     |
|    explain



begin_total_asset:1000000
end_total_asset:1524121.1355317323
Sharpe:  2.3479962519881243
hit end!




begin_total_asset:1000000
end_total_asset:1494359.002181901
Sharpe:  2.350071233038423
hit end!


In [None]:
ticker_symbol = '^DJI'
start_date = '2020-06-01'
end_date = '2021-05-28'

djia_data = yf.download(ticker_symbol, start=start_date, end=end_date)
djia_data['daily_Return'] = djia_data['Close'].pct_change()
djia_data = djia_data[['daily_Return']].reset_index()
djia_data.fillna(0 ,inplace= True)
djia_data.rename(columns={'Date': 'date', 'daily_Return': 'daily_return'}, inplace=True)

[*********************100%%**********************]  1 of 1 completed


In [None]:
df_daily_return_a2c['date'] = pd.to_datetime(df_daily_return_a2c['date'])
df_daily_return_ddpg['date'] = pd.to_datetime(df_daily_return_ddpg['date'])
df_daily_return_ppo['date'] = pd.to_datetime(df_daily_return_ppo['date'])
djia_data['date'] = pd.to_datetime(djia_data['date'])

merged_df = df_daily_return_a2c.merge(df_daily_return_ddpg, on='date', suffixes=('_a2c', '_ddpg'))
merged_df = merged_df.merge(df_daily_return_ppo, on='date')
final_df = merged_df.merge(djia_data, on='date')
final_df.rename(columns={'daily_return_x': 'daily_return_ppo', 'daily_return_y': 'daily_return_DJIA'}, inplace=True)

In [None]:
initial_investment = 1000000
final_df['cumulative_return_a2c'] = (1 + final_df['daily_return_a2c']).cumprod() * initial_investment
final_df['cumulative_return_ddpg'] = (1 + final_df['daily_return_ddpg']).cumprod() * initial_investment
final_df['cumulative_return_ppo'] = (1 + final_df['daily_return_ppo']).cumprod() * initial_investment
final_df['cumulative_return_djia'] = (1 + final_df['daily_return_DJIA']).cumprod() * initial_investment
df_mvo_results_reset = df_mvo_results.reset_index()
final_df['cumulative_return_mvo']=df_mvo_results_reset['MVO']

In [None]:
final_df



Unnamed: 0,date,daily_return_a2c,daily_return_ddpg,daily_return_ppo,daily_return_DJIA,cumulative_return_a2c,cumulative_return_ddpg,cumulative_return_ppo,cumulative_return_djia,cumulative_return_mvo
0,2020-06-01,0.000000,0.000000,0.000000,0.000000,1.000000e+06,1.000000e+06,1.000000e+06,1.000000e+06,1.072701e+06
1,2020-06-02,0.011029,0.005023,0.005020,0.010506,1.011029e+06,1.005023e+06,1.005020e+06,1.010506e+06,1.083867e+06
2,2020-06-03,0.014913,0.018808,0.013838,0.020481,1.026107e+06,1.023925e+06,1.018927e+06,1.031202e+06,1.084865e+06
3,2020-06-04,-0.003212,0.002729,-0.000853,0.000454,1.022811e+06,1.026719e+06,1.018058e+06,1.031670e+06,1.068941e+06
4,2020-06-05,0.036635,0.017579,0.021776,0.031549,1.060282e+06,1.044768e+06,1.040227e+06,1.064218e+06,1.103846e+06
...,...,...,...,...,...,...,...,...,...,...
246,2021-05-21,-0.000801,0.003840,0.001422,0.003629,1.388324e+06,1.499153e+06,1.492818e+06,1.342799e+06,1.402433e+06
247,2021-05-24,0.003938,0.007905,0.007044,0.005441,1.393791e+06,1.511004e+06,1.503334e+06,1.350106e+06,1.408603e+06
248,2021-05-25,-0.003254,-0.000063,-0.000106,-0.002370,1.389257e+06,1.510909e+06,1.503174e+06,1.346906e+06,1.412198e+06
249,2021-05-26,0.001484,0.003958,-0.007786,0.000309,1.391318e+06,1.516889e+06,1.491470e+06,1.347322e+06,1.409402e+06


In [None]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scatter(x=final_df['date'], y=final_df['cumulative_return_a2c'], mode='lines', name='A2C'))
fig.add_trace(go.Scatter(x=final_df['date'], y=final_df['cumulative_return_ddpg'], mode='lines', name='DDPG'))
fig.add_trace(go.Scatter(x=final_df['date'], y=final_df['cumulative_return_ppo'], mode='lines', name='PPO'))
fig.add_trace(go.Scatter(x=final_df['date'], y=final_df['cumulative_return_djia'], mode='lines', name='DJIA'))
fig.add_trace(go.Scatter(x=final_df['date'], y=final_df['cumulative_return_mvo'], mode='lines', name='MVO'))
fig.update_layout(title='Cumulative Returns',
                   xaxis_title='Date',
                   yaxis_title='Cumulative Return',
                   legend_title='Strategy')

# Show plot
fig.show()


##More Analysis using the pyfolio library for all 4 strategies



In [None]:
!pip install pyfolio

In [None]:
def convert_daily_return_to_pyfolio_ts(df):
    strategy_ret = df.copy()
    strategy_ret["date"] = pd.to_datetime(strategy_ret["date"])
    strategy_ret.set_index("date", drop=False, inplace=True)
    strategy_ret.index = strategy_ret.index.tz_localize("UTC")
    del strategy_ret["date"]
    return pd.Series(strategy_ret["daily_return"].values, index=strategy_ret.index)

In [None]:
def convert_daily_return_to_pyfolio_ts(df, strategy_name):
    strategy_ret = df.copy()
    return pd.Series(strategy_ret[strategy_name].values)  # Using the provided strategy_name

from pyfolio import timeseries

def calculate_performance_stats(df):
    strategy_stats = {}
    strategy_names = ['daily_return_a2c', 'daily_return_ddpg', 'daily_return_ppo', 'daily_return_DJIA']

    for strategy_name in strategy_names:
        # Convert daily return to pyfolio time series format
        strategy_returns = convert_daily_return_to_pyfolio_ts(df, strategy_name)

        # Calculate performance statistics
        perf_func = timeseries.perf_stats
        perf_stats = perf_func(returns=strategy_returns, factor_returns=strategy_returns, positions=None, transactions=None, turnover_denom="AGB")

        # Store the performance statistics
        strategy_stats[strategy_name] = perf_stats

    # Create a DataFrame from the performance statistics dictionary
    df_performance_stats = pd.DataFrame(strategy_stats)

    return df_performance_stats

# Call the function with the provided DataFrame
df_performance_stats = calculate_performance_stats(final_df)


In [None]:
df_performance_stats

Unnamed: 0,daily_return_a2c,daily_return_ddpg,daily_return_ppo,daily_return_DJIA
Annual return,0.394347,0.526682,0.496752,0.35451
Cumulative returns,0.392508,0.524121,0.494359,0.35288
Annual volatility,0.180442,0.18785,0.178559,0.174486
Sharpe ratio,1.933925,2.347996,2.350071,1.827713
Calmar ratio,4.218839,6.201783,4.293047,3.822889
Stability,0.937966,0.960497,0.961594,0.943069
Max drawdown,-0.093473,-0.084924,-0.115711,-0.092733
Omega ratio,1.390841,1.518774,1.508641,1.376623
Sortino ratio,2.742065,3.522019,3.361386,2.542411
Skew,-0.949529,-0.484632,-1.22896,-1.238262
