#**Installing relevant libraries**

In [0]:
pip install pycuber



In [0]:
pip install tqdm



In [0]:
pip install keras



In [0]:
pip install numpy



In [0]:
from collections import Counter
from random import choice

import numpy as np
import pycuber as pc

action_map = {'F': 0, 'B': 1, 'U': 2, 'D': 3, 'L': 4, 'R': 5, "F'": 6, "B'": 7, "U'": 8, "D'": 9, "L'": 10, "R'": 11,
              'F2': 12, 'B2': 13, 'U2': 14, 'D2': 15, 'L2': 16, 'R2': 17, "F2'": 18, "B2'": 19, "U2'": 20, "D2'": 21,
              "L2'": 22, "R2'": 23}
action_map_small = {'F': 0, 'B': 1, 'U': 2, 'D': 3, 'L': 4, 'R': 5, "F'": 6, "B'": 7, "U'": 8, "D'": 9, "L'": 10, "R'": 11}
inv_action_map = {v: k for k, v in action_map.items()}
color_map = {'green': 0, 'blue': 1, 'yellow': 2, 'red': 3, 'orange': 4, 'white': 5}

color_list_map = {'green': [1, 0, 0, 0, 0, 0], 'blue': [0, 1, 0, 0, 0, 0], 'yellow': [0, 0, 1, 0, 0, 0],
                  'red': [0, 0, 0, 1, 0, 0], 'orange': [0, 0, 0, 0, 1, 0], 'white': [0, 0, 0, 0, 0, 1]}


def flatten(cube):
    sides = [cube.F, cube.B, cube.U, cube.D, cube.L, cube.R]
    flat = []
    for x in sides:
        for i in range(3):
            for j in range(3):
                flat.append(x[i][j].colour)
    return flat


def flatten_1d_b(cube):

    #generates a 6*3*3 matrix -> 6 sides, each side has 3 rows and each row has 3 coloms
    sides = [cube.F, cube.B, cube.U, cube.D, cube.L, cube.R]

    flat = []
    for x in sides:
        for i in range(3):
            for j in range(3):
                #taking color of every cell and encoding it according to the color_list_map
                flat.extend(color_list_map[x[i][j].colour])
    return flat


def order(data):
    if len(data) <= 1:
        return 0

    counts = Counter()

    for d in data:
        counts[d] += 1

    probs = [float(c) / len(data) for c in counts.values()]

    return max(probs)


#calculate the percebtage the cube is solved
def perc_solved_cube(cube):

    #flatten cube by colors
    #['yellow', 'white', 'orange', 'yellow', 'green', 'yellow', 'red', 'red', 'green', 'yellow', 'yellow', 'red', 'orange', 'blue', 'red', 'green', 'red', 'red', 'green', 'blue', 'blue', 'green', 'yellow', 'orange', 'orange', 'blue', 'blue', 'white', 'yellow', 'orange', 'red', 'white', 'white', 'green', 'green', 'orange', 'white', 'orange', 'blue', 'blue', 'red', 'green', 'yellow', 'white', 'blue', 'white', 'white', 'red', 'orange', 'orange', 'blue', 'yellow', 'green', 'white']
    flat = flatten(cube)

    #passing each face total 54 cells, passing 9 cells at a time between the range 0,54 and incrementing by 9
    perc_side = [order(flat[i:(i + 9)]) for i in range(0, 9 * 6, 9)]

    #perc_side is percentage each side is solved
    #[0.4444444444444444, 0.5555555555555556, 0.5555555555555556, 0.4444444444444444, 0.3333333333333333, 0.6666666666666666]
    #we return the mean of the percentage each side is solved
    return np.mean(perc_side)


def gen_sample(n_steps=6):
    cube = pc.Cube()

    transformation = [choice(list(action_map.keys())) for _ in range(n_steps)]

    my_formula = pc.Formula(transformation)
    print(my_formula)
    cube(my_formula)
    my_formula.reverse()

    sample_X = []
    sample_Y = []
    cubes = []

    for s in my_formula:
        sample_X.append(flatten_1d_b(cube))
        sample_Y.append(action_map[s.name])
        cubes.append(cube.copy())
        cube(s.name)

    return sample_X, sample_Y, cubes


def gen_sample_small(n_steps=6):
    cube = pc.Cube()
    transformation = [choice(list(action_map_small.keys())) for _ in range(n_steps)]
    print(transformation)
    my_formula = pc.Formula(transformation)

    cube(my_formula)

    my_formula.reverse()

    sample_X = []
    sample_Y = []
    cubes = []

    for s in my_formula:
        sample_X.append(flatten_1d_b(cube))
        sample_Y.append(action_map[s.name])
        cubes.append(cube.copy())
        cube(s.name)

    return sample_X, sample_Y, cubes


#Generate Cubes which are maximum 25 steps away from goal state
#transformation is series of rotations (actions) out of 12 possible actions
def gen_sequence(n_steps=6):
    cube = pc.Cube()
    transformation = [choice(list(action_map_small.keys())) for _ in range(n_steps)]

    #creating formula for cube
    my_formula = pc.Formula(transformation)

    #applying transformation to cube
    cube(my_formula)
    my_formula.reverse()

    cubes = []
    distance_to_solved = []

    for i, s in enumerate(my_formula):
        cubes.append(cube.copy())
        cube(s.name)
        distance_to_solved.append(n_steps-i)

    # print(cubes[0])
    # print(distance_to_be_solved)
    return cubes, distance_to_solved


#every cube generated in previous genetaor is passed in this function
def get_all_possible_actions_cube_small(cube):
    flat_cubes = []
    rewards = []

    #for every action out of 12 actions
    for a in action_map_small:
        cube_copy = cube.copy()
        cube_copy = cube_copy(a)

        #flattening the cube
        #[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0]
        flat_cubes.append(flatten_1d_b(cube_copy))

        #reward if the cube is more than 99% solved
        rewards.append(2*int(perc_solved_cube(cube_copy)>0.99)-1)

    return flat_cubes, rewards

def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

In [0]:
import keras.backend as K
import numpy as np
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.layers import Dense, Input, LeakyReLU
from keras.models import Model
from keras.optimizers import Adam
from tqdm import tqdm

# from utils import action_map_small, gen_sequence, get_all_possible_actions_cube_small, chunker, \
#     flatten_1d_b


def acc(y_true, y_pred):
    return K.cast(K.equal(K.max(y_true, axis=-1),
                          K.cast(K.argmax(y_pred, axis=-1), K.floatx())),
                  K.floatx())


def get_model(lr=0.0001):

    #9*6*6 -> 9 pieces on a single face, 6  faces in a cube...total 6 colors,each color one hot encoded 0 1 0 0 0 0
    input1 = Input((324,))

    d1 = Dense(1024)
    d2 = Dense(1024)
    d3 = Dense(1024)

    d4 = Dense(50)

  #Leaky ReLu is the activation function for dense layers

    x1 = d1(input1)
    x1 = LeakyReLU()(x1)
    x1 = d2(x1)
    x1 = LeakyReLU()(x1)
    x1 = d3(x1)
    x1 = LeakyReLU()(x1)
    x1 = d4(x1)
    x1 = LeakyReLU()(x1)

    #linear activation function for out value -> determines how far the current state is from the goal state
    #output length = 1
    out_value = Dense(1, activation="linear", name="value")(x1)

    #Softmax activation function for out policy -> determines the probability of each action that can be taken on the current state
    #output length = 12
    out_policy = Dense(len(action_map_small), activation="softmax", name="policy")(x1)

    model = Model(input1, [out_value, out_policy])

    #loss functions are different for value an policy
    #value -> mean absoulute error
    #policy -> sparse categorical crossentroy -> used when the multiclass classification is not one hot encoded
    model.compile(loss={"value": "mae", "policy": "sparse_categorical_crossentropy"}, optimizer=Adam(lr),
                  metrics={"policy": acc})
    model.summary()

    return model


if __name__ == "__main__":

    N_SAMPLES = 100
    N_EPOCH = 10000

    file_path = "auto.h5"

    checkpoint = ModelCheckpoint(file_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')

    early = EarlyStopping(monitor="val_loss", mode="min", patience=1000)

    reduce_on_plateau = ReduceLROnPlateau(monitor="val_loss", mode="min", factor=0.1, patience=50, min_lr=1e-8)

    callbacks_list = [checkpoint, early, reduce_on_plateau]

    model = get_model(lr=0.0001)
    #model.load_weights(file_path)

    for i in range(N_EPOCH):
        cubes = []
        distance_to_solved = []
        for j in tqdm(range(N_SAMPLES)):
            _cubes, _distance_to_solved = gen_sequence(25)
            cubes.extend(_cubes)
            distance_to_solved.extend(_distance_to_solved)
   
        cube_next_reward = []
        flat_next_states = []
        cube_flat = []
        for c in (cubes):
            flat_cubes, rewards = get_all_possible_actions_cube_small(c)
            cube_next_reward.append(rewards)
            flat_next_states.extend(flat_cubes)
            cube_flat.append(flatten_1d_b(c))

        for _ in range(20):

            cube_target_value = []
            cube_target_policy = []

            #[0.17537619173526764, 0.22079625725746155, 0.19799943268299103, 0.2864714562892914, 0.06657489389181137, 0.2457917332649231, 0.11725758761167526, 0.08047498762607574, -0.07351604849100113, 0.12871649861335754, 0.11832078546285629, -0.027862975373864174]
            next_state_value, _ = model.predict(np.array(flat_next_states), batch_size=1024)
            next_state_value = next_state_value.ravel().tolist()
            next_state_value = list(chunker(next_state_value, size=len(action_map_small)))
            print(next_state_value)
            for c, rewards, values in tqdm(zip(cubes, cube_next_reward, next_state_value)):
                r_plus_v = 0.4*np.array(rewards) + np.array(values)

                #vᵢ = maxₐ(v(sᵢ,a)+R(A(sᵢ,a)))
                target_v = np.max(r_plus_v)
                #pᵢ = argmaxₐ (v(sᵢ,a)+R(A(sᵢ,a)))
                target_p = np.argmax(r_plus_v)
                cube_target_value.append(target_v)
                cube_target_policy.append(target_p)

            cube_target_value = (cube_target_value-np.mean(cube_target_value))/(np.std(cube_target_value)+0.01)

            print(cube_target_policy[-30:])
            print(cube_target_value[-30:])

            sample_weights = 1. / np.array(distance_to_solved)
            sample_weights = sample_weights * sample_weights.size / np.sum(sample_weights)

            model.fit(np.array(cube_flat), [np.array(cube_target_value), np.array(cube_target_policy)[..., np.newaxis]],
                      nb_epoch=1, batch_size=128, sample_weight=[sample_weights, sample_weights])
            sample_weight=[sample_weights, sample_weights],

        model.save_weights(file_path)

  2%|▏         | 2/100 [00:00<00:07, 12.87it/s]

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            (None, 324)          0                                            
__________________________________________________________________________________________________
dense_9 (Dense)                 (None, 1024)         332800      input_3[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_9 (LeakyReLU)       (None, 1024)         0           dense_9[0][0]                    
__________________________________________________________________________________________________
dense_10 (Dense)                (None, 1024)         1049600     leaky_re_lu_9[0][0]              
____________________________________________________________________________________________

100%|██████████| 100/100 [00:09<00:00, 12.51it/s]
2500it [00:00, 60248.10it/s]

[[0.05946238711476326, -0.13086038827896118, -0.055439677089452744, -0.22810040414333344, -0.04557612165808678, 0.08474838733673096, -0.16454412043094635, 0.022268399596214294, -0.07644208520650864, -0.2505735158920288, 0.01848885975778103, -0.17849187552928925], [0.12549391388893127, 0.0010471709538251162, 0.005186083260923624, -0.15264753997325897, 0.2231816202402115, 0.10007141530513763, -0.06759940087795258, 0.1454637348651886, -0.012162109836935997, -0.04588766768574715, 0.17693637311458588, -0.12241906672716141], [0.022794604301452637, 0.07362914085388184, -0.004574075806885958, -0.024199655279517174, 0.1438467800617218, -0.058135710656642914, -0.030019672587513924, 0.08474838733673096, -0.04612954333424568, -0.09787442535161972, 0.10931956768035889, -0.16084979474544525], [0.033263806253671646, 0.06496778875589371, -0.030489996075630188, -0.0739588588476181, 0.08692751079797745, 0.14653369784355164, -0.01643509417772293, 0.14502903819084167, 0.0682554766535759, 0.080626837909221




Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 1/1


2500it [00:00, 60456.17it/s]