In [None]:
'''
Be sure to install the latest Gym environments on your local machine. 
Navigate to .../rl-portfolio/Gym Environments/Portfolio_Management/
and run the command:
pip install -e .
'''

import numpy             as np
import multiprocessing   as mp
import matplotlib.pyplot as plt
import gym
import Portfolio_Gym
import A2C_X as A2C
import Wrapper


In [None]:
''' Settings & descriptions for the AC Agent '''

'''
    Actor_Hypers & Critic_Hypers | dict
        'Learning Rate' | float, list | The learning rate of the Actor.
        'Epoch'         | int         | The number of times each observation passes through the network.
        'Network Size'  | int         | The size of the Actor NN
        'Activation'    | string      | The activation of the Actor. Acceptable inputs include ['Relu', 'Sigmoid', 'Tanh', 'Softplus']
        'Batch Size'    | int         | The number of observations which pass through the network in each pass.
        'Alpha'         | float       | L2 regularization coefficient for the Actor.
 
     Gamma | float
        The discount rate for reward recieved by the agent.

    Sigma_Range | list
        A list of two floats, the first gives the starting sigma, and the last giving the terminal sigma. Sigma here referes to the sigma of the policy.

    Sigma_Anneal | float
        The fraction of training episodes which must pass before sigma decays to its terminal value.

    Retrain_Frequency | int
        The number of episodes between refits of the Actor and Critic

    Action_Space_Clip | float
        The value at which to clip the levergae the agent can take, to prevent it from randomly acting too agressively.

    Experiance_Mode | string
        A key which indicates the method to be used to generate experiance targets. Acceptable inputs include: ['Monte_Carlo', 'TD_1', 'TD_Lambda']

    TD_Lambda | float, list
        The lambda to use if using Experiance_Mode 'TD_Lambda'. If a float is passes lambda is constant. If a list of length 3 is passed then Lambda will fall from the 0th value to the 1st value, and will take the 2nd value fraction of training episodes to do so. i.e. by default will fall from 1 to 0.8 across 0.5 of the training episodes. A value of 1 is equivalent to Monte_Carlo, and a value of zero is equivalent to TD_1.

    Monte_Carlo_Frac | float
        The fraction of episodes to run overwriting Experiance_Mode with 'Monte_Carlo', as this method is most stable when the critic is poorly trained at the start of the training sequence.

    Ignore_Actor_Frac | float
        The fraction of episodes to train only Critic. Prevents the Actor being trained on nonsense at the start of training.
'''


'''
Reading the plots:
In order from left to right the plots represent:
    1. The critic prediction vs wealth, taken at 5 snapshots equally spaced throughout training. 
       The black curve represents the true utility function which the critic should approximate.
       
    2. Action vs wealth, taken at 5 snapshots equally spaced throughout training. Ideally this should be a horizontal line, 
       as wealth should not impact leverage, however this plot generated with the factor at its stationary point, and the 
       relation could change dramatically as the factor value fluctuates.
       
    3. Actor vs Factor, taken at 5 snapshots equally spaced throughout training. Ideally should see a linear relation with 
       positive gradient. 
       
    4. The Mu of the policy vs training episode. (Note this is not the action taken, but instead the action the agent thinks is optimal)
       A widening spread in this plot indicates that the agent is adjusting its action based upon variations in the state parameters, which
       is the desired behaviour. A thin line indicates the action is independent of state parameters.
       
    5. Agent terminal utility vs merton terminal utility. Recalculated every 10,000 steps. Represents the agents performance were exploration 
       rate set to zero.
       
    6. Fraction of actions which are within 10% of the merton action. 
'''
pass

In [None]:

# Model Parameters.
N_Eps  = int(1e3)

Actor_Hypers   = {"Learning Rate" : 0.005,
                  "Epoch"         : 1,
                  "Network Size"  : [8,4],
                  "Activation"    : "Sigmoid",
                  "Batch Size"    : 60,
                  "Alpha"         : 0.1}

Critic_Hypers = {"Learning Rate" : 0.005,
                 "Epoch"         : 10,
                 "Network Size"  : [8,4],
                 "Activation"    : "Sigmoid",
                 "Batch Size"    : 60,
                 "Alpha"         : 0.1}

Gamma             = 0.999
Sigma_Range       = [2, 0.5]
Sigma_Anneal      = 1
Retrain_Frequency = 20
Action_Space_Clip = 75
Experiance_Mode   = 'TD_Lambda'
TD_Lambda         = 0.50
Monte_Carlo_Frac  = 0.2


In [None]:
'''
Simulated GBM Environment (With no Factors)
'''

# Function to facilitate mulitprocessing.
def Run_0(seed):
    np.random.seed(seed)
    Env = gym.make('Simulated-v0')
    Env.Set_Params(Max_Leverage = 100, Min_Leverage = -100)
        
    myAC = A2C.Actor_Critic(Env, Actor_Hypers, Critic_Hypers, Gamma = Gamma, Sigma_Range = Sigma_Range, Sigma_Anneal = Sigma_Anneal, Retrain_Frequency = Retrain_Frequency, 
                            Action_Space_Clip = Action_Space_Clip, Experiance_Mode = Experiance_Mode, TD_Lambda = TD_Lambda, Monte_Carlo_Frac = Monte_Carlo_Frac)
    
    myWrapper = Wrapper.Wrapper(myAC)
    myWrapper.Train(N_Eps, Plot = ['Mu', 'Merton_Benchmark', 'Percent_Merton_Action'], Validate = True)
    return None


# Run the investigation...
# with mp.Pool(mp.cpu_count()) as pool:
#     _ = pool.map(Run_0, np.random.randint(0, int(1e9), 3))


In [None]:
''' 
Simulated VAR Environment, Factor per 'Portfolio Choice Problems' by Brandt. (R2 about 0.033)     
'''

# Function to facilitate mulitprocessing.
def Run_1(seed):
    np.random.seed(seed)
    Env = gym.make('Simulated-v1')
    Env.Set_Params(Max_Leverage = 100, Min_Leverage = -100)
        
    myAC = A2C.Actor_Critic(Env, Actor_Hypers, Critic_Hypers, Gamma = Gamma, Sigma_Range = Sigma_Range, Sigma_Anneal = Sigma_Anneal, Retrain_Frequency = Retrain_Frequency, 
                            Action_Space_Clip = Action_Space_Clip, Experiance_Mode = Experiance_Mode, TD_Lambda = TD_Lambda, Monte_Carlo_Frac = Monte_Carlo_Frac)
        
    myWrapper = Wrapper.Wrapper(myAC)
    myWrapper.Train(N_Eps, Plot = ['Mu', 'Merton_Benchmark', 'VAR_Benchmark'], Validate = True)
    return None


# Run the investigation...
# with mp.Pool(mp.cpu_count()) as pool:
#     _ = pool.map(Run_1, np.random.randint(0, int(1e9), 3))


In [None]:
''' 
Historical Environment, Using Fama Market Average.   
'''


# 'ltr', 'corpr', 'CRSP_SPvw', 'CRSP_SPvwx', 'Mom', 'HML', 'SMB'
def Run_Hist(seed):
    np.random.seed(seed)
    Env = gym.make('Historical-v0')
    Env.Set_Params(Max_Leverage = 100, Min_Leverage = -100, State_Parameters = ['DY', 'EY', 'DP', 'DE', 'svar', 'AAA', 'BAA', 'lty', 'defaultspread', 'tbl',
                                                                                'b/m', 'ntis', 'ltr', 'corpr', 'CRSP_SPvw', 'CRSP_SPvwx', 'Mom', 'HML', 'SMB'],
                   First_Difference_Params = ['DY', 'EY', 'DP', 'DE', 'svar', 'AAA', 'BAA', 'lty', 'defaultspread', 'tbl', 'b/m', 'ntis'],
                   Normalise = False)
    
    Env.Over_Sample(Mult = 5, N_ = 5)
            
    myAC = A2C.Actor_Critic(Environment = Env, Actor_Hypers = Actor_Hypers, Critic_Hypers = Critic_Hypers, Gamma = 0.999, Sigma_Range = [2, 0.5], Sigma_Anneal = 1,
                            Retrain_Frequency = 20, Action_Space_Clip = 75, Experiance_Mode = 'TD_Lambda', TD_Lambda = 0.50, Monte_Carlo_Frac = 0.2)
    
    myWrapper = Wrapper.Wrapper(myAC)
    myWrapper.Train(N_Eps, Plot = ['Mu', 'Merton_Benchmark'], Validate = True, Equity_Curve = True)
    
    return None

# Run the investigation...
with mp.Pool(mp.cpu_count()) as pool:
    _ = pool.map(Run_Hist, np.random.randint(0, int(1e9), 4))


In [1]:
import HPC_Analysis      as HPC
import matplotlib.pyplot as plt
import numpy             as np

myWrapper = HPC.HPC_Wrapper('Historical-v0', N_Eps = int(1e3), N_Instances = 16)
myWrapper.Set_Env_Params(Max_Leverage = 100, Min_Leverage = -100, State_Parameters = ['ltr', 'corpr', 'CRSP_SPvw', 'CRSP_SPvwx', 'Mom', 'HML', 'SMB'],
                         Normalise = False)


Instructions for updating:
non-resource variables are not supported in the long term


In [None]:

Results = myWrapper.Agent_Repeatability()

plt.scatter(Results[:,0], Results[:,1])
plt.xlabel('Sharpe')
plt.ylabel('Delta Utility')
plt.show()

print(round(np.std(Results[:,0]), 3), round(np.std(Results[:,1]), 3))


In [2]:
myWrapper.Hyper_Genetic_Search(Population = 16, Num_Generations = 5)


  result = entry_point.load(False)
  result = entry_point.load(False)
  result = entry_point.load(False)
  result = entry_point.load(False)
100%|██████████| 1000/1000 [00:06<00:00, 163.22it/s]
100%|██████████| 1000/1000 [00:09<00:00, 100.57it/s]
100%|██████████| 1000/1000 [00:10<00:00, 94.76it/s]
100%|██████████| 1000/1000 [00:11<00:00, 90.77it/s]
  result = entry_point.load(False)
100%|██████████| 1000/1000 [00:08<00:00, 111.86it/s]
  result = entry_point.load(False)
  result = entry_point.load(False)
  result = entry_point.load(False)
100%|██████████| 1000/1000 [00:10<00:00, 96.20it/s]
100%|██████████| 1000/1000 [00:10<00:00, 97.09it/s]
100%|██████████| 1000/1000 [00:10<00:00, 96.84it/s]
100%|██████████| 1000/1000 [00:10<00:00, 99.41it/s]
100%|██████████| 1000/1000 [00:09<00:00, 102.48it/s]
100%|██████████| 1000/1000 [00:11<00:00, 88.78it/s]
100%|██████████| 1000/1000 [00:12<00:00, 82.87it/s]
100%|██████████| 1000/1000 [00:09<00:00, 100.58it/s]
100%|██████████| 1000/1000 [00:09<00:00

100%|██████████| 1000/1000 [00:09<00:00, 107.45it/s]
100%|██████████| 1000/1000 [00:10<00:00, 98.79it/s]
100%|██████████| 1000/1000 [00:09<00:00, 106.22it/s]
100%|██████████| 1000/1000 [00:09<00:00, 105.87it/s]
100%|██████████| 1000/1000 [00:09<00:00, 107.73it/s]
100%|██████████| 1000/1000 [00:09<00:00, 103.58it/s]
100%|██████████| 1000/1000 [00:09<00:00, 105.38it/s]
100%|██████████| 1000/1000 [00:09<00:00, 105.55it/s]
  result = entry_point.load(False)
  result = entry_point.load(False)
  result = entry_point.load(False)
  result = entry_point.load(False)
100%|██████████| 1000/1000 [00:09<00:00, 101.36it/s]
100%|██████████| 1000/1000 [00:10<00:00, 94.98it/s]
100%|██████████| 1000/1000 [00:10<00:00, 104.68it/s]
100%|██████████| 1000/1000 [00:11<00:00, 87.77it/s]
  result = entry_point.load(False)
  result = entry_point.load(False)
  result = entry_point.load(False)
  result = entry_point.load(False)
100%|██████████| 1000/1000 [00:12<00:00, 83.08it/s]
100%|██████████| 1000/1000 [00:12<0