<h1><center>
    DSOR 646 - Reinforcement Learning </br>
    Brandon Hosley
</center></h1>

# On-policy Monte Carlo Control (every-visit):
- [ ] Execute at least 40 runs
- [ ] Show a scatter plot (See the Lesson 9 Handout for an example of the desired scatter plot)

In [1]:
import numpy as np
import time
from scipy.stats.qmc import LatinHypercube
from datetime import datetime
from joblib import Parallel, delayed

%rm -r __pycache__/
from mcc import MCC     # Local Python script


NUM_EPISODES = int(0.5e3)
REPLICATIONS = 10     # At least 10 reps per run


In [2]:
def run_experiment (run_index:int, factors:np.ndarray) -> tuple[int,float,float,float]:
    run_start_time = time.time()
    pol = MCC(factors[0],factors[1],factors[2])
    pol.train_on_policy(NUM_EPISODES ,REPLICATIONS)
    maxETDR, maxETDRhw, meanMaxTestEETDR, maxTestHW, meanAULC, hwAULC, secs_taken = pol.get_results()
    alg_score = 0.6*(meanMaxTestEETDR-maxTestHW) + 0.4*(meanAULC-hwAULC)
    print(f"Complete experiment run {run_index} with a score of {alg_score:.2f} ({time.time() - run_start_time:.1f}s)")
    return run_index, maxETDR, maxETDRhw, meanMaxTestEETDR, maxTestHW, meanAULC, hwAULC, secs_taken

In [4]:
def parallel_lhs(experiment):
    """ Execute LHS Experiment in Parallel """
    NUM_CPU_CORE_PROCS = 6
    num_runs = 10 # Needs 40

    num_alg_feats = 3
    rng_seed = 0
    sampler = LatinHypercube(num_alg_feats, scramble=False, optimization="lloyd", seed=rng_seed)
    # sampler = LatinHypercube(num_alg_feats,seed=rng_seed)
    factor_table = sampler.random(n=num_runs)

    print(f"\nInitializing LHS experiment wie = time.time()th {num_runs} runs...")
    experiment_start_time = time.time()
    # create an instance of the Parallel object to manage execution of our processes
    parallel_manager = Parallel(n_jobs = NUM_CPU_CORE_PROCS)
    # generate a list of function calls to run_experiment () for each row of the factor table 
    # each row of the factor table is an algorithm design run 
    # delayed () creates the list without actually executing run_experiment ()
    run_list = (delayed (experiment)(run_index, factor_table[run_index]) for run_index in range (num_runs) )
    #execute the list of run_experiment() calls in parallel
    print ("\nExecuting experiment...")
    results_table = parallel_manager (run_list)
    results_table = np.array(results_table)
    print (f"\n\nCompleted experiment ({time.time () - experiment_start_time:.3f}s)")

    # combine the factor table with the results table, add column headers, and save the date to a CSV file
    # compute algorithm run score, the average of the 95% CI lowerbounds for maximum and mean performance
    maxEETDR_95CI_LB = results_table[:,3] - results_table[:,4]
    meanEETDR_95CI_LB = results_table[:,5] - results_table[:,6]
    score = 0.6*maxEETDR_95CI_LB + 0.4*meanEETDR_95CI_LB
    results_table = np.column_stack((results_table[:,0], factor_table, results_table[:,1:], score))
    # grab data for performance scatter plot
    x = results_table[:,6]
    y = results_table[:,8]
    column_names = ["Run Index", "eps_a", "eps_b", "Init Qbar", "Sup EETDR", "Sup EETDR hw", "Mean Max EETDR", "Mean Max EETDR hw", "Time-Avg EETDR", "Time-Avg EETDR hw", "Secs per run", "Score"]
    results_table = np.row_stack((column_names, results_table))
    filename_DOE = "MCC_onpolicy_results_DOE_" + datetime.now().strftime('%Y%m%d_%H%M%S') + ".csv"
    np.savetxt(filename_DOE, results_table, delimiter = ",", fmt = "%s")


parallel_lhs(run_experiment)


Initializing LHS experiment wie = time.time()th 10 runs...

Executing experiment...

MCC (on-policy)(eps_a=0.35,eps_b=0.77,q0=0.6891150311324498) rep 0...
   Test... Episode:    0, EETDR CI:  13.30 +/- 2.68 New Top 10 EETDR 95CILB -- Q recorded

MCC (on-policy)(eps_a=0.47,eps_b=0.55,q0=0.336759612944694) rep 0...
   Test... Episode:    0, EETDR CI:  13.30 +/- 2.68 New Top 10 EETDR 95CILB -- Q recorded

MCC (on-policy)(eps_a=0.35,eps_b=0.05,q0=0.85) rep 0...
   Test... Episode:    0, EETDR CI:  13.30 +/- 2.68 New Top 10 EETDR 95CILB -- Q recorded

MCC (on-policy)(eps_a=0.45,eps_b=0.25,q0=0.55) rep 0...

MCC (on-policy)(eps_a=0.75,eps_b=0.65,q0=0.05) rep 0...
   Test... Episode:    0, EETDR CI:  15.30 +/- 1.86 New Top 10 EETDR 95CILB -- Q recorded

MCC (on-policy)(eps_a=0.55,eps_b=0.45,q0=0.25) rep 0...
   Test... Episode:    0, EETDR CI:  13.30 +/- 2.68 New Top 10 EETDR 95CILB -- Q recorded
   Test... Episode:    0, EETDR CI:  17.27 +/- 2.34 New Top 10 EETDR 95CILB -- Q recorded
   Tes

In [None]:
"""
Plot Performance Results

For example, we can compare different algorithms or same algorithm with large, 
nominal structural differences using these visual aids

"""

#import necessary libraries
import matplotlib. pyplot as plt

# create scatter plot
plt. scatter(x, y, label="MCC (on-policy) -- 40 reps per run, 10k episodes per rep")

# setting axes boundaries
plt.xlim(0,1) 
plt.ylim(0,1)
# setting title and labels
plt.title("MCC (on-policy) LHS DOE Performance Results")
plt.xlabel("Mean Maximum EETDR")
plt.ylabel("Mean Time-Average EETDR")
# grid on
plt.grid()
# legend on
plt.legend(loc='upper left', fontsize=7)
# display the plot
plt.show()

""" ANOVA """
import pandas as pd
import statsmodels.api as sm
from sklearn.preprocessing import PolynomialFeatures
from statsmodels.formula.api import ols

# Input data
X = factor_table

# Generate full factorial polynomial function up to degree 2
poly = PolynomialFeatures(2)
X_poly = poly.fit_transform(X)

# Clean up the feature names
input_features = column_names[1:num_alg_feats+1]
feature_names = [name.replace(' ','_').replace('^','_pow_').replace('*','_times_')
                 for name in poly.get_feature_names_out(input_features=input_features)]
df = pd.DataFrame(X_poly, columns=feature_names)

# define response variable
df['AlgScore'] = score

# Create the formula string for the OLS model
# Exclude the first column (the constant term) from the predictors
predictors = '+'.join(df. columns [1: -1]) # Exclude '1' and 'y'
formula = f'AlgScore ~ {predictors}'

# Create and fit the OLS model
model = ols(formula, data=df)
results = model. fit ()

# Display the summary
print ("\n\n" )
print (results.summary())

# Perform ANOVA and display the table
anova_results = sm.stats.anova_lm(results, typ=2)
print(anova_results)

# Off-policy Monte Carlo Control (every-visit, weighted importance sampling):

# Compare

In [9]:
# Testable Params
Q1 = None
eps_a = 0
eps_b = None


0


<scipy.stats._qmc.LatinHypercube at 0x1576e8f90>

In [None]:
# Example single experiment
num_episodes=int(0.5e3)
replications=10     # At least 10 reps per run

pol = MCC()
pol.train_on_policy(num_episodes, replications)
pol.show_results()


In [3]:
from joblib import Parallel, delayed

# Function to compute square of a number
def compute_square(number):
    return number * number

# List of numbers
numbers = list(range(10))

# Use joblib's Parallel and delayed to compute squares in parallel
#if __name__ == "__main__":
results = Parallel(n_jobs=-1)(delayed(compute_square)(i) for i in numbers)
print("Squares:", results)


Squares: [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
