In [47]:
import numpy as np
import random
import math
import copy

from rubik.cube import Cube

from collections import deque
import tensorflow as tf

**Cube Environment**

In [101]:
def move(state: Cube, face):
    if face == 0:
        state.sequence('U')
        return state, 0
            
    elif face == 1:
        state.sequence('D')
        return state, 1
        
    elif face == 2:
        state.sequence('F')
        return state, 2
        
    elif face == 3:
        state.sequence('B')
        return state, 3
        
    elif face == 4:
        state.sequence('L')
        return state, 4
        
    elif face == 5:
        state.sequence('R')
        return state, 5
        
def scramble(c, num):
    for n in range(num):
        c, _ = move(c, random.randint(0, 5))
    return c

def one_hot(c):
    colors = ['w', 'o', 'g', 'r', 'b', 'y']  # Assuming 'w' for white, 'o' for orange, 'g' for green, 'r' for red, 'b' for blue, 'y' for yellow
    cube_size = 9  # Assuming a 3x3 Rubik's Cube

    # Initialize an empty one-hot encoded cube state
    one_hot_cube = []

    # Iterate over the cube representation
    for cubelet in c:
        # Initialize a one-hot vector for the current cubelet
        one_hot_cubelet = [0] * (len(colors) * cube_size)

        # Calculate the position index for the current cubelet
        position_index = colors.index(cubelet) * cube_size

        # Set the corresponding element in the one-hot vector to 1
        one_hot_cubelet[position_index] = 1

        # Append the one-hot vector for the current cubelet to the one-hot encoded cube state
        one_hot_cube.extend(one_hot_cubelet)

    return one_hot_cube

def is_solved(c):
    return c.is_solved()

**Neural Network**

In [103]:
class NeuralNetwork:
        
        def __init__(self):
            self.create_model()
        
        def create_model(self):
            self.model = tf.keras.models.Sequential()
            self.model.add(tf.keras.layers.Dense(24, input_dim=54, activation='relu'))
            self.model.add(tf.keras.layers.Dense(24, activation='relu'))
            self.model.add(tf.keras.layers.Dense(12, activation='softmax'))
            self.model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(0.01))
            
        def __call__(self):
            return self.model
        
        def preprocess(self, game, discount):
            processed_out = []

            for l in range(0, len(game)):
                processed_out.append(np.asarray(game[l])*(discount**(len(game) - l - 1))) # reward dep

            return processed_out
        
        def train(self, games, e_greedy, epochs, discount):
            instance = Cube('WWWWWWWWWOOOGGGRRRBBBOOOGGGRRRBBBOOOGGGRRRBBBYYYYYYYYY')

            long_states = []
            long_actions = []
            
            for i in range(games):
                states = []
                actions = []
                counter = 0
                
                # generate a random scramble
                instance = scramble(instance, 100)
                
                # iterate while the cube is not solved
                while not is_solved(instance):
                    states.append(one_hot(instance))

                    if random.uniform(0,1) <= e_greedy:
                        sampled = scramble(instance, 1)
                        act = np.zeros(12)
                        act[sampled] = 1
                    else:
                        pre = self.model.predict(np.asarray([one_hot(instance)]))[0]
                        best = np.argmax(pre)
                        act = np.zeros(12)
                        instance, _ = move(instance, math.floor(best / 2))
                        act[best] = 1
                    
                    actions.append(act)
                    
                    counter += 1
                
                long_states.extend(copy.deepcopy(states))
                long_actions.extend(copy.deepcopy(self.preprocess(actions, discount)))
            
            self.model.fit(np.asarray(long_states), np.asarray(long_actions), batch_size=len(long_states), epochs=10)

**Reinforcement Learning**

In [104]:
network = NeuralNetwork()
while True:
	network.train(1, 1, 10, .97)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


TypeError: rubik.cube.Cube.get_piece() argument after * must be an iterable, not int