## Run simulation

In [1]:
import os
import itertools as it
import numpy as np
import pandas as pd
import multiprocessing as mp
import time
from tqdm.notebook import tqdm
from game_state import GameState
from blackjack import blackjack_train_agent, blackjack_test_agent

In [2]:
#trials = [10000, 50000, 100000, 250000, 500000, 1000000]
trials = [10000]
decks = range(1, 7)
etas = [np.round(i, 2) for i in np.arange(0.1, 0.9, 0.1)]
gammas = [np.round(i, 2) for i in np.arange(0.2, 1.0, 0.1)]
epss = [np.round(i, 2) for i in np.arange(0.1, 0.6, 0.1)]

### Serial

In [3]:
param_iter = it.product(decks, trials, epss, etas, gammas)
num_experiments = len(list(param_iter))
# Need to reinitialize
param_iter = it.product(decks, trials, epss, etas, gammas)

exp_data = []
for ndecks, ntrials, eps, eta, gamma in tqdm(param_iter, total=num_experiments):
    #print(ndecks, ntrials, eps, eta, gamma)
    state = GameState(ndecks, 2)
    agent, data = blackjack_train_agent(state, ndecks, ntrials, eps, eta, gamma, out_dir=None)
    exp_data.extend(data)

  0%|          | 0/1920 [00:00<?, ?it/s]

In [5]:
len(exp_data)

24336184

In [4]:
rdf = pd.DataFrame(exp_data)

In [6]:
rdf.shape

(24336184, 10)

In [7]:
rdf.head()

Unnamed: 0,num_episodes,num_decks,epsilon,eta,gamma,episode,action_num,reward,wins,winrate
0,10000,1,0.1,0.1,0.2,0,1,0.0,0,0.0
1,10000,1,0.1,0.1,0.2,0,1,-5.0,0,0.0
2,10000,1,0.1,0.1,0.2,1,1,-5.0,0,0.0
3,10000,1,0.1,0.1,0.2,2,1,-5.0,0,0.0
4,10000,1,0.1,0.1,0.2,3,1,-3.75,0,0.0


In [8]:
rdf.groupby('num_decks').winrate.mean()

num_decks
1    37.365168
2    37.215350
3    37.193729
4    37.134973
5    37.184133
6    37.155264
Name: winrate, dtype: float64

In [9]:
rdf.to_csv('training_10k_episodes2.csv.gz', index=False)

### Parallel w/progress

In [8]:
param_iter = (t for t, _ in zip(range(1000), it.product(decks, trials, epss, etas, gammas)))
cnt = mp.Value('i', 0)
num_exp = len(list(param_iter))
param_iter = it.product(decks, trials, epss, etas, gammas, [counter], [num_exp])

In [2]:
def run_exp(ndecks, ntrials, eps, eta, gamma, num_exp):
    state = GameState(ndecks, 2)
    _, data = blackjack_train_agent(state, ntrials, eps, eta, gamma)
    
    with cnt.get_lock():
        cnt.value += 1
        print(f"Progress: {cnt.value + 1} / {num_exp} ({(cnt.value + 1) / num_exp * 100:.2f} %)", end='\r')
        
    return data

In [5]:
def func(x):
    for i in range(x):
        assert i == i
        with cnt.get_lock():
            cnt.value += 1
            print(f"Progress: {(cnt.value + 1) / 100000 * 100:.2f}  %", end='\r')
            #print(f'{os.getpid()} | counter incremented to: {cnt.value}\n')

In [3]:
def init_globals(counter):
    global cnt
    cnt = counter

In [None]:
num_exp = 100000
iterable = [10000 for _ in range(10)]
with mp.Pool(initializer=init_globals, initargs=(cnt,)) as pool:
    #pool.map_async(func, iterable)
    pool.map(func, iterable)

bar = tqdm(total=num_exp)
while cnt.value < num_exp:
    time.sleep(0.1)
    bar.update(cnt.value)
bar.close()

Progress: 11.26  %rogress: 7.44  %Progress: 7.44  %

In [8]:
exp_data = []
with mp.Pool() as pool:
    results = pool.starmap(run_exp, param_iter, chunksize=1000)    

RuntimeError: Synchronized objects should only be shared between processes through inheritance

### Parallel wo/progress

In [9]:
param_iter = it.product(decks, trials, epss, etas, gammas)

In [2]:
def run_blackjack_exp(ndecks, ntrials, eps, eta, gamma, num_exp):
    state = GameState(ndecks, 2)
    _, data = blackjack_train_agent(state, ntrials, eps, eta, gamma)
    
    with cnt.get_lock():
        cnt.value += 1
        print(f"Progress: {cnt.value + 1} / {num_exp} ({(cnt.value + 1) / num_exp * 100:.2f} %)", end='\r')
        
    return data


def init_globals(counter):
    global cnt
    cnt = counter
    
episodes = [10000, 50000, 100000, 250000, 500000, 1000000]
decks = range(1, 7)
etas = np.arange(0.1, 0.9, 0.1)
gammas = np.arange(0.2, 1.0, 0.1)
epss = np.arange(0.1, 0.6, 0.1)

param_iter = (t for _, t in zip(range(100), it.product(decks, episodes, epss, etas, gammas)))
cnt = mp.Value('i', 0)
num_exp = len(list(param_iter))
param_iter = (t for _, t in zip(range(100), it.product(decks, episodes, epss, etas, gammas, [num_exp])))

exp_data = []
start = time.time()
with mp.Pool(processes=mp.cpu_count(), initializer=init_globals, initargs=(cnt,)) as pool:
    results = pool.starmap(run_blackjack_exp, param_iter, chunksize=1000)
end = time.time(n)
print(n)
print(f"Took {(end - start) / 60} mins")

Took 1.0463617364565532 mins%)


In [5]:
len(results)

100

In [6]:
results[0]

[{'num_trials': 10000,
  'epsilon': 0.1,
  'eta': 0.1,
  'gamma': 0.2,
  'trial': 0,
  'reward': -5.0,
  'winrate': 0.0},
 {'num_trials': 10000,
  'epsilon': 0.1,
  'eta': 0.1,
  'gamma': 0.2,
  'trial': 1,
  'reward': -2.5,
  'winrate': 0.0},
 {'num_trials': 10000,
  'epsilon': 0.1,
  'eta': 0.1,
  'gamma': 0.2,
  'trial': 1,
  'reward': -5.0,
  'winrate': 0.0},
 {'num_trials': 10000,
  'epsilon': 0.1,
  'eta': 0.1,
  'gamma': 0.2,
  'trial': 2,
  'reward': -3.3333333333333335,
  'winrate': 0.0},
 {'num_trials': 10000,
  'epsilon': 0.1,
  'eta': 0.1,
  'gamma': 0.2,
  'trial': 2,
  'reward': -5.0,
  'winrate': 0.0},
 {'num_trials': 10000,
  'epsilon': 0.1,
  'eta': 0.1,
  'gamma': 0.2,
  'trial': 3,
  'reward': -4.0,
  'winrate': 0.0},
 {'num_trials': 10000,
  'epsilon': 0.1,
  'eta': 0.1,
  'gamma': 0.2,
  'trial': 4,
  'reward': -4.2,
  'winrate': 0.0},
 {'num_trials': 10000,
  'epsilon': 0.1,
  'eta': 0.1,
  'gamma': 0.2,
  'trial': 5,
  'reward': -3.5,
  'winrate': 0.0},
 {'num_tr

In [7]:
r2 = list(it.chain.from_iterable(results))

In [8]:
len(r2)

1254530

In [10]:
r2[:10]

[{'num_trials': 10000,
  'epsilon': 0.1,
  'eta': 0.1,
  'gamma': 0.2,
  'trial': 0,
  'reward': -5.0,
  'winrate': 0.0},
 {'num_trials': 10000,
  'epsilon': 0.1,
  'eta': 0.1,
  'gamma': 0.2,
  'trial': 1,
  'reward': -2.5,
  'winrate': 0.0},
 {'num_trials': 10000,
  'epsilon': 0.1,
  'eta': 0.1,
  'gamma': 0.2,
  'trial': 1,
  'reward': -5.0,
  'winrate': 0.0},
 {'num_trials': 10000,
  'epsilon': 0.1,
  'eta': 0.1,
  'gamma': 0.2,
  'trial': 2,
  'reward': -3.3333333333333335,
  'winrate': 0.0},
 {'num_trials': 10000,
  'epsilon': 0.1,
  'eta': 0.1,
  'gamma': 0.2,
  'trial': 2,
  'reward': -5.0,
  'winrate': 0.0},
 {'num_trials': 10000,
  'epsilon': 0.1,
  'eta': 0.1,
  'gamma': 0.2,
  'trial': 3,
  'reward': -4.0,
  'winrate': 0.0},
 {'num_trials': 10000,
  'epsilon': 0.1,
  'eta': 0.1,
  'gamma': 0.2,
  'trial': 4,
  'reward': -4.2,
  'winrate': 0.0},
 {'num_trials': 10000,
  'epsilon': 0.1,
  'eta': 0.1,
  'gamma': 0.2,
  'trial': 5,
  'reward': -3.5,
  'winrate': 0.0},
 {'num_tr

In [12]:
import random

In [13]:
episodes = [10000, 50000, 100000, 250000, 500000, 1000000]
decks = range(1, 7)
etas = np.arange(0.1, 0.9, 0.1)
gammas = np.arange(0.2, 1.0, 0.1)
epss = np.arange(0.1, 0.6, 0.1)

In [14]:
data_file = 'training_results.csv'

In [15]:
param_iter = it.product(decks, trials, epss, etas, gammas)
num_exp = len(list(param_iter))
param_iter = list(it.product(decks, trials, epss, etas, gammas, [num_exp], [data_file]))

In [16]:
len(param_iter)

11520

In [17]:
param_iter[:20]

[(1, 10000, 0.1, 0.1, 0.2, 11520, 'training_results.csv'),
 (1, 10000, 0.1, 0.1, 0.30000000000000004, 11520, 'training_results.csv'),
 (1, 10000, 0.1, 0.1, 0.4000000000000001, 11520, 'training_results.csv'),
 (1, 10000, 0.1, 0.1, 0.5000000000000001, 11520, 'training_results.csv'),
 (1, 10000, 0.1, 0.1, 0.6000000000000001, 11520, 'training_results.csv'),
 (1, 10000, 0.1, 0.1, 0.7000000000000002, 11520, 'training_results.csv'),
 (1, 10000, 0.1, 0.1, 0.8000000000000003, 11520, 'training_results.csv'),
 (1, 10000, 0.1, 0.1, 0.9000000000000001, 11520, 'training_results.csv'),
 (1, 10000, 0.1, 0.2, 0.2, 11520, 'training_results.csv'),
 (1, 10000, 0.1, 0.2, 0.30000000000000004, 11520, 'training_results.csv'),
 (1, 10000, 0.1, 0.2, 0.4000000000000001, 11520, 'training_results.csv'),
 (1, 10000, 0.1, 0.2, 0.5000000000000001, 11520, 'training_results.csv'),
 (1, 10000, 0.1, 0.2, 0.6000000000000001, 11520, 'training_results.csv'),
 (1, 10000, 0.1, 0.2, 0.7000000000000002, 11520, 'training_results

In [18]:
random.shuffle(param_iter)

In [19]:
param_iter[:20]

[(6, 1000000, 0.2, 0.30000000000000004, 0.2, 11520, 'training_results.csv'),
 (1, 100000, 0.1, 0.2, 0.9000000000000001, 11520, 'training_results.csv'),
 (6,
  10000,
  0.4,
  0.7000000000000001,
  0.9000000000000001,
  11520,
  'training_results.csv'),
 (4, 50000, 0.5, 0.5, 0.30000000000000004, 11520, 'training_results.csv'),
 (2,
  50000,
  0.30000000000000004,
  0.30000000000000004,
  0.30000000000000004,
  11520,
  'training_results.csv'),
 (1, 250000, 0.1, 0.2, 0.8000000000000003, 11520, 'training_results.csv'),
 (4, 500000, 0.1, 0.2, 0.5000000000000001, 11520, 'training_results.csv'),
 (1, 250000, 0.30000000000000004, 0.1, 0.2, 11520, 'training_results.csv'),
 (4, 50000, 0.2, 0.6, 0.9000000000000001, 11520, 'training_results.csv'),
 (4, 10000, 0.1, 0.5, 0.6000000000000001, 11520, 'training_results.csv'),
 (4,
  50000,
  0.5,
  0.30000000000000004,
  0.8000000000000003,
  11520,
  'training_results.csv'),
 (6, 100000, 0.4, 0.5, 0.30000000000000004, 11520, 'training_results.csv'),
 

In [81]:
data_dir = '/stash/tlab/jc2/rl_training_results'

In [82]:
def list_files(data_dir):
    for f in os.listdir(data_dir):
        try:
            df = pd.read_csv(os.path.join(data_dir, f))
        except Exception as err:
            print(f, err)

In [83]:
list_files(data_dir)

In [84]:
rdf = pd.concat([pd.read_csv(os.path.join(data_dir, p)) for p in os.listdir(data_dir)], ignore_index=True)

In [86]:
rdf.shape

(4057916, 8)

In [87]:
rdf.to_csv('training_10k_episodes.csv.gz', index=False)

In [58]:
np.arange(0.1, 0.9, 0.1)

array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8])

In [61]:
[np.round(i, 2) for i in np.arange(0.1, 0.9, 0.1)]

[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]

In [33]:
s = pd.Series(sizes)

In [67]:
f = 'abc.234'.replace('.','')

In [68]:
f

'abc234'