<a href="https://colab.research.google.com/github/arunaabh95/Grid-Search/blob/main/Imitation_Game_1st_agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --upgrade tables

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
from ast import literal_eval
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from matplotlib import pyplot as plt
from sklearn.utils import shuffle

In [None]:
#df1 = pd.read_csv('/content/contentgdrive/MyDrive/data-520/data.csv')
filename = '/content/train-prob-1.h5'
df1 = pd.read_hdf(filename, key = 'df', mode='r')
print(df1.shape)

# test data
test_file = '/content/test-1.h5'
df2 = pd.read_hdf(test_file, mode='r')
print(df2.shape)

# validation data
validation_file = '/content/validate-1.h5'
df3 = pd.read_hdf(validation_file, mode='r')
print(df3.shape)

df1 = shuffle(df1)
df2 = shuffle(df2)
df3 = shuffle(df3)

In [None]:
# Preprocessing data

def make_grid(probability, size):
  grid = np.zeros((size, size))
  if probability == 0:
    return grid
  for i in range(0, size - 1):
    for j in range(0, size - 1):
      p0 = random.uniform(0, 1)
      if probability > p0:
          grid[i][j] = 1

  grid[0][0] = 0
  grid[size - 1][size - 1] = 0
  return grid

def preprocess_single(pos):
  g = make_grid(0,50)
  g[pos[0]][pos[1]] = 1
  return g

def preprocess(input):
  i = 0
  toRet = np.empty((len(input),50,50))
  for pos in input:
    toRet[i] = preprocess_single(pos)
    i += 1
  return toRet

#start = preprocess(df1.iloc[0:10000:,0])
start_test = preprocess(df2.iloc[0:30000:,0])
start_validation = preprocess(df3.iloc[0:30000:,0])
#start.shape

# train
#start = np.vstack(start.apply(np.array).to_numpy()).reshape(100000,2)
# grid = np.vstack(df1.iloc[0:10000:,1].apply(np.array).to_numpy()).reshape(10000,50,50)
# move = df1.iloc[0:10000:,2].apply(str).to_numpy()

#test
#start_test = np.vstack(df2.iloc[0:20000:,0].apply(np.array).to_numpy()).reshape(20000,2)
grid_test = np.vstack(df2.iloc[0:30000:,1].apply(np.array).to_numpy()).reshape(30000,50,50)
move_test = df2.iloc[0:30000:,2].apply(str).to_numpy()

# validation
#start_validation = np.vstack(df3.iloc[0:20000:,0].apply(np.array).to_numpy()).reshape(20000,2)
grid_validation = np.vstack(df3.iloc[0:30000:,1].apply(np.array).to_numpy()).reshape(30000,50,50)
move_validation = df3.iloc[0:30000:,2].apply(str).to_numpy()

label_encoder = LabelEncoder()
onehot_encoder = OneHotEncoder(sparse=False)

#One hot encoding

# encode test data
test_values = np.array(move_test)
test_integer_encoded = label_encoder.fit_transform(test_values)
# binary encode
test_integer_encoded = test_integer_encoded.reshape(len(test_integer_encoded), 1)
onehot_encoded_test = onehot_encoder.fit_transform(test_integer_encoded)
print(onehot_encoded_test.shape)

# encode validation data
validation_values = np.array(move_validation)
validation_integer_encoded = label_encoder.fit_transform(validation_values)
# binary encode
validation_integer_encoded = validation_integer_encoded.reshape(len(validation_integer_encoded), 1)
onehot_encoded_validation = onehot_encoder.fit_transform(validation_integer_encoded)
#onehot_encoded_validation = tf.data.Dataset.from_tensor_slices(tf.convert_to_tensor(onehot_encoded_validation))

print(onehot_encoded_validation.shape)

In [None]:
epochs=100
from tensorflow.keras.optimizers import Adam

input_start = tf.keras.Input(shape=(50,50), name="start")
input_grid = tf.keras.Input(shape=(50,50), name="grid")

x = tf.keras.layers.Dense(512,input_dim=2,activation='relu')(input_start)
x = tf.keras.layers.Flatten()(x)
y = tf.keras.layers.Dense(512,input_dim=2,activation='relu')(input_grid)
y = tf.keras.layers.Flatten()(y)
z = tf.keras.layers.Concatenate()([x,y])
z = tf.keras.layers.Dropout(0.8)(z)
z = tf.keras.layers.Dense(512, activation='relu')(z)
z = tf.keras.layers.Dropout(0.8)(z)
#z = tf.keras.layers.Dense(128, activation='relu')(z)
z = tf.keras.layers.Dense(128, kernel_regularizer=tf.keras.regularizers.l1(1e-5),activation='relu')(z)
z = tf.keras.layers.Dense(4, activation = 'softmax')(z)
model = tf.keras.models.Model(inputs=[input_start, input_grid], outputs=z)
model.compile(loss='categorical_crossentropy',optimizer=Adam(learning_rate=1e-3),metrics=['accuracy'])
checkpoint = tf.keras.callbacks.ModelCheckpoint('agent-1', monitor='accuracy', verbose=1, save_best_only=True, mode='max')
callbacks_list = [checkpoint]

In [None]:
# Fit the model
def train():
  for i in range(5):
    print(i, "th iteration")
    first = (i*10000 + 1)
    last = (i + 1) * 10000
    start = preprocess(df1.iloc[first:last:,0]) 
    grid = np.vstack(df1.iloc[first:last:,1].apply(np.array).to_numpy()).reshape(9999,50,50)
    move = df1.iloc[first:last:,2].apply(str).to_numpy()
    # encode train data
    values = np.array(move)
    integer_encoded = label_encoder.fit_transform(values)
    integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
    onehot_encoded = onehot_encoder.fit_transform(integer_encoded).astype(int)
    #onehot_encoded = tf.data.Dataset.from_tensor_slices(tf.convert_to_tensor(onehot_encoded))
    print(onehot_encoded.shape)
    model_history = model.fit((start, grid), 
                         onehot_encoded, 
                          epochs=epochs,
                          callbacks=callbacks_list,
                          batch_size=256, 
                          verbose=1)
    
train()


In [None]:
# Fit the model

first = 600000
last = 700000
start = preprocess(df1.iloc[first:last:,0]) 
grid = np.vstack(df1.iloc[first:last:,1].apply(np.array).to_numpy()).reshape(100000,50,50)
move = df1.iloc[first:last:,2].apply(str).to_numpy()
# encode train data
values = np.array(move)
integer_encoded = label_encoder.fit_transform(values)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
onehot_encoded = onehot_encoder.fit_transform(integer_encoded).astype(int)
#onehot_encoded = tf.data.Dataset.from_tensor_slices(tf.convert_to_tensor(onehot_encoded))

model_history = model.fit((start, grid), 
                          onehot_encoded, 
                          epochs=epochs, 
                          batch_size=2048, 
                          verbose=1,
                          validation_data=((start_validation, grid_validation), onehot_encoded_validation))


model.save('agent-1')

In [None]:
score = model.evaluate((start_test, grid_test), onehot_encoded_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# Plot the loss function
fig, ax = plt.subplots(1, 1, figsize=(10,6))
ax.plot(np.sqrt(model_history.history['loss']), 'r', label='train')
ax.plot(np.sqrt(model_history.history['val_loss']), 'b' ,label='val')
ax.set_xlabel(r'Epoch', fontsize=20)
ax.set_ylabel(r'Loss', fontsize=20)
ax.legend()
ax.tick_params(labelsize=20)

# Plot the accuracy
fig, ax = plt.subplots(1, 1, figsize=(10,6))
ax.plot(np.sqrt(model_history.history['accuracy']), 'r', label='train')
ax.plot(np.sqrt(model_history.history['val_accuracy']), 'b' ,label='val')
ax.set_xlabel(r'Epoch', fontsize=20)
ax.set_ylabel(r'Accuracy', fontsize=20)
ax.legend()
ax.tick_params(labelsize=20)

# Generate confusion matrix

from sklearn.metrics import confusion_matrix
def decode_predictions(predictions):
  temp = np.empty((len(predictions)))
  i = 0
  for prediction in predictions:
    temp[i] = np.argmax(prediction, axis = 0)
    i += 1
  temp = label_encoder.inverse_transform(temp.astype(int))
  return temp
predictions = model.predict((start_test, grid_test))
predictions = decode_predictions(predictions)
# predictions = label_encoder.inverse_transform(predictions)
confusion_matrix(np.array(predictions), np.array(move_test), labels=['left', 'right', 'up', 'down'])

In [None]:
# load test grids
filename = '/content/grids.h5'
ip = pd.read_hdf(filename, key = 'df', mode='r')
ip_grids = np.vstack(ip.apply(np.array).to_numpy()).reshape(100,50,50)
ip_grids.shape

In [None]:
# evaluate ml agent
path = []

def make_grid(probability, size):
  grid = np.zeros((size, size))
  if probability == 0:
    return grid
  for i in range(0, size - 1):
    for j in range(0, size - 1):
      p0 = random.uniform(0, 1)
      if probability > p0:
        grid[i][j] = 1

    grid[0][0] = 0
    grid[size - 1][size - 1] = 0
    return grid


def convert_state_to_grid(state):
  g = make_grid(0, 50)
  g[state[0]][state[1]] = 1
  return g

def is_valid_state(state, grid, check_grid = False):
  if 0 > state[0] or state[0]> 49:
    return False
  if 0 > state[1] or state[1]> 49:
    return False
  return True


def get_pos_from_direction(pos, direction, grid):
  if direction == 'stay':
    return pos
  
  if direction == 'left':
    new_pos = [pos[0], pos[1] - 1]
    return new_pos if is_valid_state(new_pos, grid, True) else pos

  if direction == 'right':
    new_pos = [pos[0], pos[1] + 1]
    return new_pos if is_valid_state(new_pos, grid, True) else pos

  if direction == 'up':
    new_pos = [pos[0] + 1, pos[1]]
    return new_pos if is_valid_state(new_pos, grid, True) else pos

  if direction == 'down':
    new_pos = [pos[0] - 1, pos[1]]
    return new_pos if is_valid_state(new_pos, grid, True) else pos


def update_three(exp_grid, grid, pos):
  neighbors = list()
  neighbors.append(pos[0] + 1, pos[1])
  neighbors.append(pos[0] - 1, pos[1])
  neighbors.append(pos[0], pos[1] + 1)
  neighbors.append(pos[0], pos[1] - 1)
  for n in neighbors:
    if is_valid_state(n, grid) and grid[n[0]][n[1]] == 1:
      exp_grid[n[0]][n[1]] = 1


def update_one(exp_grid, grid, pos):
  neighbors = list()
  neighbors.append([pos[0] + 1, pos[1]])
  neighbors.append([pos[0] - 1, pos[1]])
  neighbors.append([pos[0], pos[1] + 1])
  neighbors.append([pos[0], pos[1] - 1])
  for n in neighbors:
    if is_valid_state(n, grid) and grid[n[0]][n[1]] == 1:
      exp_grid[n[0]][n[1]] = 1


def update_grid(exp_grid, grid, agent, pos):
  if agent == 1:
    update_one(exp_grid, grid, pos)
  if agent == 3:
    update_three(exp_grid, grid, pos)


def run_agent(pos, grid, agent):
  new_pos = pos
  explored_grid = make_grid(0,50)
  i=0
  path.clear()
  while (new_pos[0] != 49 or new_pos[1] != 49) and i < 500:
    update_grid(explored_grid, grid, agent, new_pos)
    new_pos = convert_state_to_grid(new_pos)
    result = model.predict((new_pos.reshape((1, 50,50)), explored_grid.reshape((1, 50,50))))
    new_pos = decode_predictions(result)
    new_pos = get_pos_from_direction(pos, new_pos, grid)
    pos = new_pos
    path.append(new_pos)
    i += 1

def main_runner():
  path_len = list()
  i = 0
  for main_grid in ip_grids:
    run_agent([0,0], main_grid, 1)
    print("Output")
    print(ip_grids[10])
    print(path)
    path_len.append(len(path))
  print(path_len)

main_runner()