# Genetic Algorithm on GPU + Constraint Programming on CPU 

In [None]:
import multiprocessing
import os
import random
import time
import numpy as np
import pandas as pd
import torch

from datetime import datetime
from joblib import Parallel, delayed
from ortools.sat.python import cp_model
from scipy.signal import convolve2d
from sklearn.metrics import mean_absolute_error

### Constants

In [None]:
N = 25  # grid dimension
device = 'cuda'
TEST_CSV = '../input/conways-reverse-game-of-life-2020/test.csv'
OUTPUT_CSV = 'submission.csv'
GA_OUTPUT_CSV = 'submission-ga.csv'
OR_OUTPUT_CSV = 'submission-or.csv'
TIME_BUDGET = 1.85*3600
OR_MAX_BUDGET = 10

In [None]:
start_time = time.time()

### Genetic algorithm

Useful reading: https://www.mathworks.com/help/gads/how-the-genetic-algorithm-works.html

In [None]:
torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = False

In [None]:
@torch.jit.script
def loss(input, target):
    return torch.sum(input ^ target, dim=(-1,-2))

In [None]:
cv = torch.nn.Conv2d(1, 1, kernel_size=3, padding=1, padding_mode='circular', bias=False)
cv.requires_grad=False
cv.weight = torch.nn.Parameter(
    torch.tensor(
        [[[[ 1., 1., 1.],
           [ 1., 0., 1.],
           [ 1., 1., 1.]]]],
        device=device,
        dtype=torch.float16
    ),
    requires_grad=False,
)


@torch.jit.script
def forward(grid, delta: int):
    N=25
    g = grid.reshape(-1, 1, N, N)
    for _ in torch.arange(delta):
        g = g.to(torch.float16)
        neighbor_sum = cv(g)
        g = ((neighbor_sum == 3) | ((g == 1) & (neighbor_sum == 2)))
    return g.reshape(-1, N, N)

In [None]:
@torch.jit.script
def random_parents(data, n_parents: int, device: str):
    N = 25
    RANDOM_ALIVE = .25
    return torch.rand((n_parents, N, N), device=device) > (1-RANDOM_ALIVE)

In [None]:
@torch.jit.script
def select_best(parents, delta: int, target, n_best: int):
    scores = loss(forward(parents, delta), target)
    best_values, best_indices = torch.topk(scores, n_best, dim=0, largest=False, sorted=True)
    new_parents = parents[best_indices, ...]
    return new_parents, best_values[0], new_parents[0, ...]

In [None]:
def precomputes_masks():
    N = 25
    BLOCK_SIZE = 17

    block = torch.nn.Conv2d(1, 1, kernel_size=BLOCK_SIZE, padding=BLOCK_SIZE//2,
                            padding_mode='circular', bias=False)
    block.requires_grad=False
    block.weight = torch.nn.Parameter(
        torch.ones((1, 1, BLOCK_SIZE, BLOCK_SIZE),
            device=device,
            dtype=torch.float16
        ),
        requires_grad=False,
    )

    masks = torch.zeros((N * N, 1, N, N), device=device, dtype=torch.float16)
    
    for x in range(N):
        for y in range(N):
            masks[x * N + y, 0, x, y] = 1.
    masks = block(masks)
    
    return masks[:, 0, ...] > .5

In [None]:
@torch.jit.script
def random_combine(parents, n_offsprings: int, device: str, pre_masks):
    N = 25
    
    dads = torch.randint(low=0, high=parents.shape[0], size=(n_offsprings,),
                         device=device, dtype=torch.long)
    dads = parents[dads, ...]
    
    moms = torch.randint(low=0, high=parents.shape[0], size=(n_offsprings,),
                         device=device, dtype=torch.long)
    moms = parents[moms, ...]
    
    masks = pre_masks[torch.randint(low=0, high=pre_masks.shape[0], size=(n_offsprings,),
                                    device=device, dtype=torch.long)]

    return torch.where(masks, dads, moms)

In [None]:
@torch.jit.script
def mutate(parents, device: str):
    MUTATION = .0016  # .005 
    mutations = torch.rand(parents.shape, device=device) < MUTATION
    return parents ^ mutations

In [None]:
@torch.jit.script
def optimize_one_puzzle(delta: int, data, device: str, pre_masks):
    N = 25
    N_GENERATION = 30  # Number of generations
    P = 4_150  # population
    N_BEST = P // 30  # best to keep as new parents
    N_ELITES = 8  # parents unchanged for next generation
    
    best_score = torch.tensor([N*N], device=device)
    best = torch.zeros((N,N), device=device).to(torch.bool)
    parents = random_parents(data, P, device)

    elites = torch.empty((1, N, N), dtype=torch.bool, device=device)
    elites[0, ...] = data  # set target as potential dad ;)

    for i in range(N_GENERATION):
        parents = random_combine(parents, P, device, pre_masks)
        parents = mutate(parents, device)
        parents[:N_ELITES, ...] = elites
        parents, best_score, best = select_best(parents, delta, data, N_BEST)
        elites = parents[:N_ELITES, ...]
        if best_score == 0:  # early stopping
            break

    return best_score, best

In [None]:
@torch.jit.script
def optimize_all_puzzles(deltas, df, device: str, pre_masks):
    sub = df.clone()
    
    for n in torch.arange(df.shape[0]):
        delta = deltas[n]
        data = df[n, ...]
        _, sub[n, ...] = optimize_one_puzzle(delta, data, device, pre_masks)

    return sub

In [None]:
def leaderboard_score(deltas, df, sub, device: str):
    result = torch.empty(sub.shape[0], device=device, dtype=torch.long)
    for delta in range(1, 6):
        start = sub[deltas == delta]
        end   = df[deltas == delta]
        result[deltas == delta] = loss(forward(start, delta), end)
    print('Leaderboard score (TORCH):', torch.sum(result).item() / (result.shape[0]*N*N))

In [None]:
def genetic():
    df = pd.read_csv(TEST_CSV, index_col='id')
#     df = df.iloc[:1000, :]
    submission = df.copy()
    submission.drop(['delta'], inplace=True, axis=1)
    indexes = df.index
    deltas = torch.from_numpy(df.delta.values).to(device)
    df = torch.BoolTensor(df.values[:, 1:].reshape((-1, N, N))).to(device)
    start_time = time.time()
    pre_masks = precomputes_masks()
    sub = optimize_all_puzzles(deltas, df, device, pre_masks)
    print(f'Processed {sub.shape[0]:,} puzzles in {time.time() - start_time:.2f} seconds 🔥🔥🔥')
    submission.iloc[:sub.shape[0], :] = sub.reshape((-1, N*N)).cpu().numpy().astype(int)
    submission.to_csv(GA_OUTPUT_CSV)
    leaderboard_score(deltas, df, sub, device)

### Ortools

In [None]:
def life_step(X):
    nbrs_count = convolve2d(X, np.ones((3, 3)), mode='same', boundary='wrap') - X
    return (nbrs_count == 3) | (X & (nbrs_count == 2))

In [None]:
def individual_scores(df, pred):
    scores = []
    for i in pred.index:
        delta = df.loc[i][0]
        start = np.asarray(pred.loc[i]).reshape(N, N)
        end   = np.asarray(df.loc[i][1:]).reshape(N, N)
        x = start
        for _ in range(delta):
            x = life_step(x)
        scores.append(mean_absolute_error(x, end))
    return scores

In [None]:
def move_forward(start, delta):
    x = start
    for _ in range(delta):
        x = life_step(x)
    return x

In [None]:
def pd_leaderboard_score(df, pred):
    scores = individual_scores(df, pred)
    print('Leaderboard score (PD):', sum(scores) / len(scores))

In [None]:
class Grid:
    def __init__(self, model, dim):
        self._cells = {}
        self._dim = dim
        for x in range(dim):
            for y in range(dim):
                self._cells[x,y] = model.NewBoolVar(f'cell {x}, {y}')

    def __getitem__(self, pos):
        x, y = pos
        n = self._dim
        return self._cells[(x+n) % n, (y+n) % n]
    
    def solution(self, solver):
        n = self._dim
        s = np.ones((n, n), dtype=np.uint8)
        for x in range(n):
            for y in range(n):
                s[x, y] = solver.Value(self._cells[x, y])
        return s        

In [None]:
def solve_one_step(end, use_zero_point):
    budget = TIME_BUDGET - (time.time() - start_time)
    if budget <= 0:
        return None

    # Creates the model.
    model = cp_model.CpModel()

    grid = Grid(model, N)

    # Creates the constraints with final state
    for x in range(N):
        for y in range(N):
            crown = (grid[x-1,y-1] + grid[x-1,y] + grid[x-1,y+1] + 
                     grid[x  ,y-1] +               grid[x  ,y+1] + 
                     grid[x+1,y-1] + grid[x+1,y] + grid[x+1,y+1])
            if end[x,y] == 1:
                # (crown == 3) or (crown == 2 and grid[x,y] == 1)
                b = model.NewBoolVar("")
                model.Add(crown == 3).OnlyEnforceIf(b)
                model.Add(crown == 2).OnlyEnforceIf(b.Not())
                model.Add(grid[x,y] == 1).OnlyEnforceIf(b.Not())
            # zero point idea stolen from
            # https://www.kaggle.com/jamesmcguigan/game-of-life-z3-constraint-satisfaction
            elif(use_zero_point and
                 (end[(x-1)%N,(y-1)%N] + end[(x-1)%N,y  ] + end[(x-1)%N,(y+1)%N] + 
                  end[x      ,(y-1)%N] +                    end[x      ,(y+1)%N] + 
                  end[(x+1)%N,(y-1)%N] + end[(x+1)%N,y  ] + end[(x+1)%N,(y+1)%N]) == 0):
#                  and delta != 1):
                model.Add(grid[x,y] == 0)
                model.Add(crown != 3)
            else:
                # (crown != 3) and (crown != 2 or grid[x,y] == 0)
                b = model.NewBoolVar("")
                model.Add(crown != 3)
                model.Add(grid[x,y] == 0).OnlyEnforceIf(b)
                model.Add(crown != 2).OnlyEnforceIf(b.Not())
            
    # Creates a solver and solves the model.
    solver = cp_model.CpSolver()

    # Sets a time limit.
    solver.parameters.max_time_in_seconds = min(budget, OR_MAX_BUDGET)

    status = solver.Solve(model)

    if status in (cp_model.FEASIBLE, cp_model.OPTIMAL):
        return grid.solution(solver)
    return None

In [None]:
def solve(idx, delta, end, max_err):
    start = end
    
    for d in range(delta):
        start = solve_one_step(start, True)
        if start is None:
            break

    return start

In [None]:
def load_solve(df, i, err):
    budget = TIME_BUDGET - (time.time() - start_time)
    if budget <= 0:
        return None
    
    start = time.time()
    delta = df.loc[i][0]
    end = np.asarray(df.loc[i][1:]).reshape(N, N)
    ret = solve(i, delta, end, err)
    if ret is None:
        print(f'Not solved {i}: delta={delta} in {time.time() - start}s')
#     else:
#         print(f'Solved {i}: delta={delta} in {time.time() - start}s           <----------==')
    return ret

In [None]:
def ortools():
    os.nice(10)
    df = pd.read_csv(TEST_CSV, index_col='id')
    submission = df.copy()
    submission.drop(['delta'], inplace=True, axis=1)
    scores = individual_scores(df, submission)
    submission['score'] = scores
    print('Leaderboard score:', sum(scores) / len(scores))

    submission.loc[df.index, 'alive2'] = df.loc[df.index][1:].mean(axis=1) * df.loc[df.index].delta

    to_solve_df = submission.loc[submission.score > 0]
    to_solve_df = to_solve_df.sort_values('alive2')
    to_solve = [(idx, int(round(score * N * N))) for idx, score in zip(to_solve_df.index.values, to_solve_df.score.values)]

    submission.drop(['score', 'alive2'], axis=1, inplace=True)

    start_time = time.time()
    print(f'Starts solving {len(to_solve)} puzzles')

    solutions = Parallel(n_jobs=2)(
        delayed(load_solve)(df, i, e) for i, e in to_solve
    )

    print(f'Improved {sum([x is not None for x in solutions])} puzzles in {time.time() - start_time:.2f} seconds 🔥🔥🔥')

    for (i, _), s in zip(to_solve, solutions):
        if s is not None:
            submission.loc[i] = s.reshape(N*N)

    submission.to_csv(OR_OUTPUT_CSV)
    pd_leaderboard_score(df, submission)

### Run GA and OR in parallel

In [None]:
%%time

d = multiprocessing.Process(name='ortools', target=ortools)
d.daemon = True
d.start()

genetic()

d.join()

### Merge solutions

In [None]:
df = pd.read_csv(TEST_CSV, index_col='id')

df1 = pd.read_csv(OR_OUTPUT_CSV, index_col='id')
df1['score'] = individual_scores(df, df1)

df2 = pd.read_csv(GA_OUTPUT_CSV, index_col='id')
df2['score'] = individual_scores(df, df2)

In [None]:
for i in df2.index:
    if df1.loc[i].score > df2.loc[i].score:
        df1.loc[i] = df2.loc[i]

In [None]:
df1.drop('score', axis=1, inplace=True)
df1 = df1.astype('int')
pd_leaderboard_score(df, df1)

In [None]:
df1.to_csv(OUTPUT_CSV)

In [None]:
df1.head()