To get sudokus
https://github.com/dimitri/sudoku/blob/master/sudoku.txt

# Import

In [1]:
import pandas as pd
import numpy as np
import sys

# Original

In [2]:
original = "003020600900305001001806400008102900700000008006708200002609500800203009005010300"
solution = "483921657967345821251876493548132976729564138136798245372689514814253769695417382"

In [3]:
original = np.array([int(x) for x in list(original)])
solution = np.array([int(x) for x in list(solution)])

# Functions

In [4]:
def np2df(s):
    return pd.DataFrame(s.reshape((9,9)))

In [5]:
def get_mutables(original):
    return list(np.where(original == 0)[0])

In [6]:
def _generate_init(original):
    init = original.copy()
    init[init==0] = np.random.randint(1,10, size=np.sum(init==0))
    return init

In [7]:
def generate_inits(original, how_many=1000):
    return [_generate_init(original) for _ in range(how_many)]

In [8]:
def _get_combos(s):
    """ rows, columns and the squares """
    rows = [s[9*i:9*i+9] for i in range(9)]
    cols = [s[i:81:9] for i in range(9)]
    squares = [np.array([s[3*i:3*i+3] for i in range(x, x+9, 3)]).flatten() for x in range(21) if x%9 in [0,1,2]]
    return rows + cols + squares

In [9]:
all_combos = _get_combos(list(range(81)))

In [10]:
def _score_set_of_9(set_of_9):
    return 9 - np.unique(set_of_9).shape[0]

In [11]:
def _score_partial_population(population, list_of_colnumbers):
    # https://stackoverflow.com/questions/48473056/number-of-unique-elements-per-row-in-a-numpy-array
    
    result = population[:, list_of_colnumbers]
    result = np.sort(result,axis=1)
    result = 8 - (result[:,1:] != result[:,:-1]).sum(axis=1)
    
    return result

In [12]:
def score_population(population, all_combos=all_combos):
    result = [_score_partial_population(population, combo) for combo in all_combos]
    result = np.array(result).sum(axis=0)
    return result

In [13]:
def _make_kids(p1, p2):
    random_cut = np.random.randint(1, len(p1))
    c1 = np.concatenate([p1[:random_cut], p2[random_cut:]])
    c2 = np.concatenate([p2[:random_cut], p1[random_cut:]])
    return c1, c2

In [14]:
def _get_parent_combis(parents):
    parent_combis = []
    for i in range(len(parents)):
        parent_combis.extend([(parents[i], x) for x in parents[i+1:]])

    return parent_combis

In [15]:
def make_many_kids(parents):
    parent_combis = _get_parent_combis(parents)
    
    kids = []
    for par_com in parent_combis:
        kids.extend(_make_kids(*par_com))

    return kids

In [16]:
def get_best(population, scores, top=45):
    idx = (np.array(scores)).argsort()[:top]
    return np.array(population)[idx]

In [17]:
def mutation(s, position):
    s[position] = np.random.randint(1,10)
    return s

In [18]:
def mutate(original, to_mutate, number_of_mutations=1):
    mutables = get_mutables(original)
    positions = np.random.choice(mutables, size=number_of_mutations)
    for pos in positions:
        to_mutate = mutation(to_mutate, pos)
                             
    return to_mutate

In [None]:
def mutate_population(original, population, fraction=1, number_of_mutations=1):
    population = [mutate(original, x, number_of_mutations) if np.random.random()<fraction else x for x in population]
    return population

# Run

In [None]:
%%time

# Itiate some variables
final_solution = ''
scores = [100]  # just a high number to start with
population = generate_inits(original, 45)

# Let's make lot's of kids
for i in range(10**10):
    kids = make_many_kids(population)
    
    # Mutate kids in a smart fashion. Not too agressive at the end
    if np.min(scores) > 20:
        kids = mutate_population(original, kids, fraction=1, number_of_mutations=2)
    elif np.min(scores) > 10:
        kids = mutate_population(original, kids, fraction=1, number_of_mutations=1)
    else:
        kids = mutate_population(original, kids, fraction=.5, number_of_mutations=1)
        
    # score population
    population = np.unique(np.concatenate([population, kids]), axis=0)
    scores = score_population(population)
    
    # Check if we found the solution
    if np.min(scores) == 0:
        final_solution = get_best(population, scores, 1)[0]
        sys.stdout.write(f'\r The solution has been found at iteration {i}. Ow yeah :D')
        break
    
    # Progessbar
    if i%10==0:
        sys.stdout.write(f'\r Iteration: {i} \t Best score: {np.min(scores)} \t Mean score: {np.mean(scores).round(2)}')
    
    # Pick best to reproduce
    population = get_best(population, scores)

 Iteration: 2080 	 Best score: 2 	 Mean score: 6.71

In [None]:
print(final_solution==solution)
np2df(final_solution)