In [1]:
import os
import pandas as pd
from nltk.parse.generate import generate
from nltk import CFG
from nltk.grammar import Nonterminal
from tqdm import tqdm
import time
from math import ceil

from eig.battleship import Parser

from battleship.grammar import BattleshipGrammar
from battleship.scoring import compute_score_parallel, compute_score
from battleship.v1.board import Board

In [2]:
grammar = BattleshipGrammar(include_lambdas=False)

In [3]:
#valid, invalid = grammar.generate(n=int(1e6), depth=5, start=Nonterminal('A'))
#print('Valid programs:', len(valid))
#print('Invalid programs:', len(invalid))

In [4]:
dataDict = {
    'program':[],
    'board_id':[],
    'score':[],
    'max_depth':[]
}
df = pd.DataFrame(dataDict)

if not os.path.isfile('enumeration_data.csv'):
   df.to_csv('enumeration_data.csv', header='column_names')

In [5]:
def enumeration_baseline(cores=int(os.cpu_count()/2), samples: int = 10000, max_depth: int = 5):
    scores = []
    for id in range(1,18+1):
        start_time = time.time()
        print(f"board {id}", end=" | ") 
        acceptable_programs, _ = grammar.generate(n=samples, depth=max_depth, start=Nonterminal('A'))
        print(f"finished generating programs in {round(time.time()-start_time,2)}s", end=" | ")
     
        if cores > 1:
            #If more than one core is used, breaks it up into chunks of 50 programs to score in parallel as to not overwork the machine and kill the EC2 instance or the kernel.
            for i in tqdm(range(ceil(samples/50))):
                program_selection = [i for i in acceptable_programs[i*50:((i+1)*50)]]
                program_scores = compute_score_parallel(programs=program_selection,board=Board.from_trial_id(id),processes=cores,show_progress=False)
                scores.extend(program_scores)
                dataDict['program'].extend(program_selection)
                dataDict['board_id'].extend([id for _ in range(len(program_scores))])
                dataDict['score'].extend(program_scores)
                dataDict['max_depth'].extend([max_depth for i in range(len(program_scores))])
        else:
            #If only one core is used, computes programs scores sequentially
            for prog in acceptable_programs:
                score = compute_score(program=prog[0], board=Board.from_trial_id(id))
                scores.append(score)
        print(f"finished scoring in {round(time.time()-start_time,2)}s from the start")

    df = pd.DataFrame(dataDict)
    df.to_csv('enumeration_data.csv', mode='a', header=False)
    print(f"finished {samples}-shot sampling at depth {max_depth}")
    return df

In [6]:
df = enumeration_baseline(cores=int(os.cpu_count())-1, samples = 100000, max_depth = 5)

board 1 | 

100%|██████████| 100000/100000 [00:02<00:00, 35708.73it/s]

finished generating programs in 2.8s | 




KeyboardInterrupt: 