In [43]:
import os
import pandas as pd

from battleship.grammar import BattleshipGrammar
from battleship.scoring import compute_score, compute_score_parallel
from battleship.board import Board
from tqdm import tqdm

import matplotlib.pyplot as plt

In [44]:
grammar = BattleshipGrammar(include_lambdas=False)

In [45]:
import time 
from math import ceil

dataDict = {
    'program':[],
    'board_id':[],
    'score':[],
    'min_depth':[],
    'max_depth':[],
    'single_tokens':[]
}
df = pd.DataFrame(dataDict)

if not os.path.isfile('sampling_data.csv'):
   df.to_csv('sampling_data.csv', header='column_names')


In [46]:
def repeated_generation(samples: int = 10000, min_depth: int = 3, max_depth: int = 16, allow_single_token: bool = False):
    generations = []
    while len(generations) != samples:
        for i in range(samples - len(generations)):
            prog = grammar.sample(min_depth=min_depth,max_depth=max_depth, allow_single_token=allow_single_token)
            generations.append(prog)
        generations = [i for i in generations if i != None]
    return generations

In [47]:
def sample_baseline(cores=int(os.cpu_count()/2), samples: int = 10000, min_depth: int = 3, max_depth: int = 16, allow_single_token: bool = False):
    scores = []
    for id in range(1,18+1):
        start_time = time.time()
        print(f"board {id}", end=" | ") 
        acceptable_programs = repeated_generation(samples, min_depth, max_depth, allow_single_token)
        print(f"finished generating programs in {round(time.time()-start_time,2)}s", end=" | ")
     
        if cores > 1:
            #If more than one core is used, breaks it up into chunks of 50 programs to score in parallel as to not overwork the machine and kill the EC2 instance or the kernel.
            sample_size = 50
            for i in tqdm(range(ceil(samples/sample_size))):
                program_selection = [i[0] for i in acceptable_programs[i*sample_size:((i+1)*sample_size)]]
                program_scores = compute_score_parallel(programs=program_selection,board=Board.from_trial_id(id),processes=cores,show_progress=False)
                scores.extend(program_scores)
                dataDict['program'].extend(program_selection)
                dataDict['board_id'].extend([id for _ in range(len(program_scores))])
                dataDict['score'].extend(program_scores)
                dataDict['min_depth'].extend([min_depth for i in range(len(program_scores))])
                dataDict['max_depth'].extend([max_depth for i in range(len(program_scores))])
                dataDict['single_tokens'].extend([allow_single_token for i in range(len(program_scores))])
                print(f"pass {i} completed in {time.time() - start_time}")
        else:
            #If only one core is used, computes programs scores sequentially
            for prog in acceptable_programs:
                score = compute_score(program=prog[0], board=Board.from_trial_id(id))
                scores.append(score)
        print(f"finished scoring in {round(time.time()-start_time,2)}s from the start")

    df = pd.DataFrame(dataDict)
    df.to_csv('sampling_data.csv', mode='a', header=False)
    print(f"finished {samples}-shot sampling at depth {(min_depth,max_depth)}")
    return df

In [48]:
df = sample_baseline(cores=os.cpu_count()-1,samples=100000, min_depth=3, max_depth=16, allow_single_token=False)

board 1 | finished generating programs in 5.52s | 

  0%|          | 1/3125 [00:10<8:49:43, 10.17s/it]

pass 0 completed in 15.696661233901978


  0%|          | 2/3125 [00:25<11:33:44, 13.33s/it]

pass 1 completed in 31.23309087753296


  0%|          | 3/3125 [00:38<11:30:27, 13.27s/it]

pass 2 completed in 44.43284773826599


  0%|          | 3/3125 [00:39<11:22:25, 13.12s/it]


KeyboardInterrupt: 