In [1]:
import numpy as np
import pickle

np.random.seed(31)

In [2]:
BLANK = 0
VERTICAL = 1
HORIZONTAL = 2

def sample_field(H=24, W=64, mu_bars=4):
    field = np.full((H,W), BLANK, dtype=np.uint8)

    # horizontal bars at the top and bottom
    field[0,:] |= HORIZONTAL
    field[-1,:] |= HORIZONTAL

    # vertical bars on the left and right
    field[:,0] |= VERTICAL
    field[:,-1] |= VERTICAL

    # randomly sample the number of interstial bars to draw
    nbars = np.random.poisson(mu_bars)

    # for simplicity, we don't allow lines within +/- 1 space of another line
    # of the same orientation. we'll keep track of excluded positions as we go.
    xblocked = set()
    yblocked = set()

    for _ in range(nbars):
        # for each bar, loop up to 5 times to try to find a compatible sample
        for itry in range(5):
            # randomly decide the bar's center position and orientation
            yval = np.random.randint(2,H-2)
            xval = np.random.randint(2,W-2)
            horiz = np.random.choice(2)
            
            if horiz:
                if yval in yblocked:
                    # oops, we're too close to another horizontal bar.
                    # try again.
                    continue
                yblocked.update((yval-1, yval, yval+1))
                    
                # chose bar length
                barlen = np.random.randint(2,3*W//4)

                # then write the appropriate gliph to the field along
                # the length of the bar.
                xlow = max(0, int(round(xval-barlen/2)))
                xhi = min(W, int(round(xval+barlen/2)))
                field[yval, xlow:xhi] |= HORIZONTAL
            else:
                if xval in xblocked:
                    continue
                xblocked.update((xval-1,xval,xval+1))
                
                barlen = np.random.randint(2,3*H//4)
                
                ylow = max(0, int(round(yval-barlen/2)))
                yhi = min(H, int(round(yval+barlen/2)))
                field[ylow:yhi,xval] |= VERTICAL

            # if we made it here, we successfully sampled a bar.
            # can break out of the inner loop now.
            break

    return field


In [3]:
# basis of characters to initialize the ASCII representation with spaces and walls
CHAR_LOOKUP = np.array([' ','|','-','+'])

In [4]:
# by fancy-indexing the char lookup with the enum flags of the generated field,
# we can render an ASCII representation.
print('\n'.join(''.join(line) for line in CHAR_LOOKUP[sample_field()]))
print('\n'.join(''.join(line) for line in CHAR_LOOKUP[sample_field(H=12, W=94)]))
print('\n'.join(''.join(line) for line in CHAR_LOOKUP[sample_field(H=16, W=16)]))

+-------------------+------------------------------------------+
|                   |                                          |
|                   |                                          |
|                   |                                          |
|                   |                                          |
|                   |                                          |
|                   |                                          |
|                   |                                          |
|                   |                                          |
|                   |                                          |
|                                               |              |
|                                               |              |
|                     |                         |              |
|                     | ------------            |              |
|                     |                                        |
|                     |  

In [5]:
from dataclasses import dataclass

# simple class to keep track of physical simulator state
@dataclass
class State:
    px: float
    py: float
    vx: float
    vy: float
    field: np.ndarray

In [6]:
# find the (relative) times until the next collision with vertical and horizontal walls.
def next_collisions(state: State):
    H, W = state.field.shape
    
    # get the locations in the field where there are vertical walls
    verticals = np.argwhere(state.field&VERTICAL)
    tx = None
    # loop the unique x coordinates of the vertical walls
    for v in np.unique(verticals[:,1]):
        # find the time when we will cross the vertical plane of this wall
        d = (1-np.sign(state.vx))/2
        t = (v + d - state.px)/state.vx
        
        if t < 0:
            # we're moving away from this wall, not towards it.
            continue

        # find the y coordinate at intercept
        yproj = state.py + state.vy * t
        
        # get the range of y values along the wall at this x location
        extent = verticals[verticals[:,1]==v][:,0]

        # check if the intercept is actually within the wall
        if (extent.min() <= yproj <= extent.max()) or v in (0,W-1):
            # collision confirmed. register the time.
            if tx is None or t < tx:
                tx = t
                
    horizontals = np.argwhere(state.field&HORIZONTAL)
    ty = None
    for h in np.unique(horizontals[:,0]):
        d = (1-np.sign(state.vy))/2
        t = (h + d - state.py)/state.vy
        
        if t < 0:
            continue
            
        xproj = state.px + state.vx * t

        extent = horizontals[horizontals[:,0]==h][:,1]
        if (extent.min() <= xproj <= extent.max()) or h in (0,H-1):
            # collision confirmed. register the time.
            if ty is None or t < ty:
                ty = t

    # if we found no future collision, set collision time to zero
    # this means we escaped the box, which can happen due to
    # numerical precision; we should treat it as a collision
    # so the velocity will be reflected immediately
    if tx is None:
        tx = 0
    if ty is None:
        ty = 0
    return tx, ty

In [7]:
# progress the simulation by a timestep of dt.
# if a collision was encountered, simulate up to that point,
# and return the remainder of dt.
def sim_step(state: State, dt: float):
    tx, ty = next_collisions(state)

    # if we're at the moment of collision, reflect the velocity
    # the relevant axes
    if tx == 0:
        state.vx *= -1
        tx = 9e9
    elif tx > dt:
        tx = 9e9

    if ty == 0:
        state.vy *= -1
        ty = 9e9
    elif ty > dt:
        ty = 9e9

    # easy case: no collsion occurs within the
    # timestep, so we step the full amount
    # and return 0 remaining time.
    if min(tx, ty) > dt:
        state.px += dt * state.vx
        state.py += dt * state.vy
        return 0

    assert (tx, ty) != (0, 0)

    # check which collision happens first
    if tx < ty:
        # if x happens first, advance the simulation only
        # up to that time. reflect the x velocity, then return
        # the remaining un-stepped time.
        state.px += state.vx * tx
        state.py += state.vy * tx
        state.vx *= -1
        return dt - tx
    else:
        # ditto y
        state.px += state.vx * ty
        state.py += state.vy * ty
        state.vy *= -1
        return dt - ty

In [8]:
with open('quotes.txt') as fquotes:
    quotes = [l.rstrip() for l in fquotes.readlines() if len(l.split(' ')) < 13]

In [9]:
quotes[80], len(quotes)

('Beauty in things exists in the mind which contemplates them.', 1412)

In [10]:
np.random.choice(quotes)

'It is never too late to give up your prejudices.'

In [11]:
from tqdm import trange

def generate_samples(N):
    # main sample generator loop.
    dataset = []
    for igen in trange(N):
        # pick a random starting direction and quote
        direction = np.random.choice(['NE','SE','SW','NW'])
        quote = np.random.choice(quotes)
        
        # sample a playing field of random size
        H = 2*np.random.randint(9,13+1)
        W = 2*np.random.randint(16,43+1)
        field = sample_field(H, W)
        
        # initialize the ascii representation
        ascii_field = CHAR_LOOKUP[field]

        while True:
            p0x = np.random.randint(2,W-2)
            p0y = np.random.randint(2,H-2)
            if field[p0y,p0x] == BLANK:
                break
                
        # start position at the center
        #p0x, p0y = W//2, H//2
        
        # set initial velocity based on direction
        v0x, v0y = {
            'NE': (1,-1),
            'SE': (1,1),
            'SW': (-1,1),
            'NW': (-1,-1),
        }[direction]
        
        # gather state information together
        state = State(px=p0x, py=p0y, vx=v0x, vy=v0y, field=field)
    
        # each character in the quote corresponds to a single timestep
        # of size dt=1.
        for istep, ch in enumerate(quote):
            # get the current physical position as an integer for the grid
            px_, py_ = np.int_(np.round((state.px, state.py)))
            
            if ch != ' ':
                # write the current letter of the quote, keeping spaces "transparent"
                ascii_field[max(0,min(py_,H-1)), max(0,min(px_,W-1))] = ch
    
            dt = 1
            # keep running simulation (which pauses upon collision)
            # until full timestep has elapsed.
            while dt:
                dt = sim_step(state, dt)

        prompt_field = CHAR_LOOKUP[field]
        prompt_field[p0y, p0x] = '*'
        
        # save the result!
        dataset.append({
            'direction': direction,
            'quote': quote,
            'field': '\n'.join(''.join(line) for line in prompt_field),
            'answer': '\n'.join(''.join(line) for line in ascii_field),
        })
    return dataset

In [12]:
ds1k = generate_samples(1_000)

100%|██████████| 1000/1000 [00:04<00:00, 211.38it/s]


In [13]:
with open('bounce1k.pkl', 'wb') as fpkl:
    pickle.dump(ds1k, fpkl)

In [14]:
ds10k = generate_samples(10_000)

100%|██████████| 10000/10000 [00:48<00:00, 206.72it/s]


In [15]:
with open('bounce10k.pkl', 'wb') as fpkl:
    pickle.dump(ds10k, fpkl)