To get sudokus
https://github.com/dimitri/sudoku/blob/master/sudoku.txt

# Import

In [1]:
import pandas as pd
import numpy as np
import sys

# Original

In [2]:
original = "003020600900305001001806400008102900700000008006708200002609500800203009005010300"
solution = "483921657967345821251876493548132976729564138136798245372689514814253769695417382"

In [3]:
original = np.array([int(x) for x in list(original)])
solution = np.array(list(solution))

# Functions

In [4]:
def np2df(s):
    return pd.DataFrame(s.reshape((9,9)))

In [5]:
def get_mutables(original):
    return list(np.where(original == 0)[0])

In [6]:
def _generate_init(original):
    init = original.copy()
    init[init==0] = np.random.randint(1,10, size=np.sum(init==0))
    return init

In [7]:
def generate_inits(original, how_many=1000):
    return [_generate_init(original) for _ in range(how_many)]

In [8]:
def _get_combos(s):
    """ rows, columns and the squares """
    rows = [s[9*i:9*i+9] for i in range(9)]
    cols = [s[i:81:9] for i in range(9)]
    squares = [np.array([s[3*i:3*i+3] for i in range(x, x+9, 3)]).flatten() for x in range(21) if x%9 in [0,1,2]]
    return rows + cols + squares

In [9]:
def _score_set_of_9(set_of_9):
    return 9 - np.unique(set_of_9).shape[0]

In [10]:
def score_single(s):
    combos = _get_combos(s)
    return np.sum([_score_set_of_9(combo) for combo in combos])

In [11]:
def score_population(population):
    return [score_single(x) for x in population]

In [12]:
def _make_kids(p1, p2):
    random_cut = np.random.randint(1, len(p1))
    c1 = np.concatenate([p1[:random_cut], p2[random_cut:]])
    c2 = np.concatenate([p2[:random_cut], p1[random_cut:]])
    return c1, c2

In [13]:
def _get_parent_combis(parents):
    parent_combis = []
    for i in range(len(parents)):
        parent_combis.extend([(parents[i], x) for x in parents[i+1:]])

    return parent_combis

In [14]:
def make_many_kids(parents):
    parent_combis = _get_parent_combis(parents)
    
    kids = []
    for par_com in parent_combis:
        kids.extend(_make_kids(*par_com))

    return kids

In [15]:
def get_best(population, scores, top=45):
    idx = (np.array(scores)).argsort()[:top]
    return np.array(population)[idx]

In [16]:
def mutation(s, position):
    s[position] = np.random.randint(1,10)
    return s

In [17]:
def mutate(original, to_mutate, number_of_mutations=1):
    mutables = get_mutables(original)
    positions = np.random.choice(mutables, size=number_of_mutations)
    for pos in positions:
        to_mutate = mutation(to_mutate, pos)
                             
    return to_mutate

In [18]:
def mutate_population(original, population, fraction=1, number_of_mutations=1):
    population = [mutate(original, x, number_of_mutations) if np.random.random()<fraction else x for x in population]
    return population

# Run

In [49]:
population = generate_inits(original, 45)
kids = make_many_kids(population)
population = np.unique(np.concatenate([population, kids]), axis=0)

In [50]:
len(population)

2021

In [79]:
df = pd.DataFrame(population)
for i, combo in enumerate(all_combos):
    df[f'combo_{i}'] = df.iloc[:,combo].nunique(axis=1)

In [86]:
df[[x for x in list(df) if type(x) == str]].sum(axis=1)

0       173
1       168
2       160
3       163
4       163
5       166
6       178
7       168
8       169
9       175
10      172
11      167
12      166
13      174
14      167
15      173
16      168
17      169
18      167
19      163
20      164
21      163
22      165
23      165
24      166
25      166
26      167
27      169
28      171
29      165
       ... 
1991    181
1992    178
1993    180
1994    178
1995    176
1996    177
1997    180
1998    179
1999    178
2000    179
2001    176
2002    174
2003    176
2004    174
2005    179
2006    173
2007    176
2008    178
2009    180
2010    168
2011    175
2012    173
2013    177
2014    164
2015    175
2016    177
2017    161
2018    163
2019    167
2020    170
Length: 2021, dtype: int64

In [20]:
score0 = score_population(population)

In [62]:
np.unique(population[:, first_row], axis=0).shape

(239, 9)

In [78]:
all_combos = _get_combos(list(range(81)))