In [22]:
#Project COSC6354.001 Sudoku AI Notebook 
  
   # COSC6354.001
   # Dr. Minhua Huang

In [23]:
# The below implementation loads a Sudoku puzzle from a CSV file, 
# creates a population of random solutions, and applies the genetic algorithm to evolve the solutions 
# over multiple generations. At each generation, the fitness of each solution is evaluated, the population 
# is sorted by fitness, and a certain percentage of thepopulation is selected for survival and reproduction based on
# their fitness score.

In [24]:
#Importing the neccessary libraries
import pandas as pd
import numpy as np
import random

In [25]:

# Load the Sudoku puzzle from a CSV file
sudoku_file = "C:\\Users\\n\\Downloads\\AI mvs project\\Soduku2.csv" # can be replaced with any other sudoku file
sudoku_df = pd.read_csv(sudoku_file, header=None)

In [26]:
print(sudoku_df)

     0    1    2    3    4    5    6    7    8
0  1.0  NaN  NaN  4.0  8.0  9.0  NaN  NaN  6.0
1  7.0  3.0  NaN  NaN  NaN  NaN  NaN  4.0  NaN
2  NaN  NaN  NaN  NaN  NaN  1.0  2.0  9.0  5.0
3  NaN  NaN  7.0  1.0  2.0  NaN  6.0  NaN  NaN
4  5.0  NaN  NaN  7.0  NaN  3.0  NaN  NaN  8.0
5  NaN  NaN  6.0  NaN  9.0  5.0  7.0  NaN  NaN
6  9.0  1.0  4.0  6.0  NaN  NaN  NaN  NaN  NaN
7  NaN  2.0  NaN  NaN  NaN  NaN  NaN  3.0  7.0
8  8.0  NaN  NaN  5.0  1.0  2.0  NaN  NaN  4.0


In [27]:
# Define the fitness function to count the number of duplicate symbols in rows and columns
def fitness_function(solution):
    # Count the number of duplicates in rows and columns
    row_duplicates = sum([len(row) - len(set(row)) for row in solution])
    col_duplicates = sum([len(col) - len(set(col)) for col in solution.transpose()])
    # Return the total number of duplicates
    return row_duplicates + col_duplicates

In [28]:
# Define the function to generate a random population of solutions
def generate_population(sudoku_df, size):
    population = []
    for i in range(size):
        # Replace NaN values in the puzzle with random digits
        new_solution = sudoku_df.copy()
        for row in range(9):
            for col in range(9):
                if pd.isna(new_solution[row][col]):
                    new_solution[row][col] = random.randint(1, 9)
        # Add the new solution to the population
        population.append(new_solution)
    return population

In [29]:
# Define the parameters for the genetic algorithm
population_size = 100
num_parents = 20
num_generations = 1000
mutation_probability = 0.1
# Generate an initial population of random solutions
population = generate_population(sudoku_df, population_size)
print(population)

[     0    1    2    3    4    5    6    7    8
0  1.0  1.0  6.0  4.0  8.0  9.0  8.0  8.0  6.0
1  7.0  3.0  4.0  5.0  7.0  3.0  1.0  4.0  3.0
2  1.0  4.0  8.0  5.0  3.0  1.0  2.0  9.0  5.0
3  5.0  8.0  7.0  1.0  2.0  4.0  6.0  9.0  7.0
4  5.0  4.0  5.0  7.0  3.0  3.0  5.0  6.0  8.0
5  4.0  5.0  6.0  1.0  9.0  5.0  7.0  3.0  4.0
6  9.0  1.0  4.0  6.0  8.0  1.0  6.0  6.0  2.0
7  5.0  2.0  9.0  8.0  2.0  2.0  3.0  3.0  7.0
8  8.0  4.0  6.0  5.0  1.0  2.0  3.0  5.0  4.0,      0    1    2    3    4    5    6    7    8
0  1.0  3.0  6.0  4.0  8.0  9.0  5.0  1.0  6.0
1  7.0  3.0  8.0  5.0  9.0  8.0  5.0  4.0  7.0
2  2.0  9.0  6.0  3.0  5.0  1.0  2.0  9.0  5.0
3  6.0  9.0  7.0  1.0  2.0  8.0  6.0  1.0  3.0
4  5.0  9.0  8.0  7.0  6.0  3.0  1.0  2.0  8.0
5  7.0  6.0  6.0  9.0  9.0  5.0  7.0  9.0  2.0
6  9.0  1.0  4.0  6.0  8.0  8.0  1.0  8.0  4.0
7  3.0  2.0  7.0  7.0  2.0  4.0  5.0  3.0  7.0
8  8.0  1.0  8.0  5.0  1.0  2.0  5.0  8.0  4.0,      0    1    2    3    4    5    6    7    8
0  1.0  2.

In [30]:
#Evaluate the fittness
def evaluate_fitness(population):
    """
    Evaluate the fitness of each solution in the population.

    Parameters:
    population (list): List of Sudoku puzzles in the population.

    Returns:
    fitness_scores (numpy array): Array of fitness scores for each solution.
    """
    fitness_scores = np.zeros(len(population))
    for i, puzzle in enumerate(population):
        # count the number of duplicate symbols in rows and columns
        duplicates = 0
        for j in range(9):
            duplicates += (len(set(puzzle[j, :])) - np.count_nonzero(np.isnan(puzzle[j, :])))
            duplicates += (len(set(puzzle[:, j])) - np.count_nonzero(np.isnan(puzzle[:, j])))
        # count the number of duplicate symbols in 3x3 subgrids
        for j in range(0, 9, 3):
            for k in range(0, 9, 3):
                duplicates += (len(set(puzzle[j:j+3, k:k+3].flatten())) -
                               np.count_nonzero(np.isnan(puzzle[j:j+3, k:k+3])))
        fitness_scores[i] = duplicates
    return fitness_scores

In [31]:
# Define the fitness function as the sum of the number of duplicate values in rows and columns
def fitness_function(solution):
    duplicates = 0
    for i in range(9):
        # Check for duplicates in rows
        duplicates += 9 - len(set(solution[i,:]))
        # Check for duplicates in columns
        duplicates += 9 - len(set(solution[:,i]))
    return duplicates

In [32]:
# Create a random population of solutions
population_size = 50
population = []
for i in range(population_size):
    solution = sudoku_df.copy().values
    # Fill in the empty cells with random values
    for row in range(9):
        for col in range(9):
            if pd.isnull(solution[row,col]):
                solution[row,col] = np.random.randint(1,10)
                population.append(solution)

In [33]:

print(population.append(solution))

None


In [34]:
# Run the genetic algorithm
max_iterations_without_improvement = 50
iterations_without_improvement = 0
best_solution = None
best_fitness = np.inf
generation = 1
while iterations_without_improvement < max_iterations_without_improvement:
    # Evaluate the fitness of each solution in the population
    fitness = [fitness_function(solution) for solution in population]
    # Sort the population by fitness
    sorted_population = sorted(zip(fitness, population), key=lambda x: x[0])
    # Keep track of the best solution so far
    if fitness[0] < best_fitness:
        best_solution = sorted_population[0][1]
        best_fitness = fitness[0]
        iterations_without_improvement = 0
        print("Generation:", generation, "Best fitness:", best_fitness)
        print(best_solution)
    else:
        iterations_without_improvement += 1
    # Select a certain percentage of the population for survival and reproduction
    num_survivors = int(population_size * 0.2)
    survivors = [x[1] for x in sorted_population[:num_survivors]]
   
          #CAN BE UNCOMMENTED TO SELECT THE PARENTS PROBABILISTIC BASED ON THE FITNESS
 
 # # Select parents probabilistically based on fitness
#     fitness_values = [fitness_function(solution) for solution in survivors]
#     probabilities = np.array([1/(i+1) for i in range(num_survivors)])
#     probabilities /= sum(probabilities)
#     parents = np.random.choice(num_survivors, size=population_size - num_survivors, p=probabilities)
#     parents = [survivors[i] for i in parents]

#     # Perform crossover and mutation to create new solutions
#     new_population = []
#     for i in range(num_survivors):
#         parent1 = survivors[i]
#         parent2 = np.random.choice(parents)
#         crossover_point = np.random.randint(1,8)
#         child = np.concatenate((parent1[:,:crossover_point], parent2[:,crossover_point:]), axis=1)
#         mutation_prob = 0.1
#         for row in range(9):
#             for col in range(9):
#                 if pd.isnull(sudoku_df.iloc[row,col]) and np.random.rand() < mutation_prob:
#                     # Mutate the value
#                     child[row,col] = np.random.randint(1,10)
#         new_population.append(child)
#     population = survivors + new_population
#     generation += 1


Generation: 1 Best fitness: 42
[[1. 3. 9. 4. 8. 9. 3. 7. 6.]
 [7. 3. 6. 1. 7. 3. 4. 4. 7.]
 [6. 8. 6. 4. 3. 1. 2. 9. 5.]
 [3. 5. 7. 1. 2. 7. 6. 6. 9.]
 [5. 7. 2. 7. 7. 3. 9. 5. 8.]
 [1. 4. 6. 1. 9. 5. 7. 3. 2.]
 [9. 1. 4. 6. 8. 6. 7. 3. 5.]
 [1. 2. 6. 9. 5. 4. 4. 3. 7.]
 [8. 7. 4. 5. 1. 2. 9. 4. 4.]]


In [21]:
#THE END