# Test supervised pyrat 

1. Generate data in terminal PyRat: 

python3 pyrat.py -p 40 -md 0 -d 0 --nonsymmetric --rat AIs/manh.py --python AIs/manh.py --tests 1000 --nodrawing --synchronous --save

In [1]:
# The tqdm package is useful to visualize progress with long computations. 
# Install it using pip 
import tqdm

import numpy as np
import ast
import os

2. Generate Dataset 


In [1]:
import scipy
import scipy.sparse

PHRASES = {
    "# Random seed\n": "seed",
    "# MazeMap\n": "maze",
    "# Pieces of cheese\n": "pieces"    ,
    "# Rat initial location\n": "rat"    ,
    "# Python initial location\n": "python"   , 
    "rat_location then python_location then pieces_of_cheese then rat_decision then python_decision\n": "play"
}

MOVE_DOWN = 'D'
MOVE_LEFT = 'L'
MOVE_RIGHT = 'R'
MOVE_UP = 'U'

translate_action = {
    MOVE_LEFT:0,
    MOVE_RIGHT:1,
    MOVE_UP:2,
    MOVE_DOWN:3
}# This data structures defines the encoding of the four possible movements

def process_file_2(filename):
    f = open(filename,"r")    
    info = f.readline()
    params = dict(play=list())
    while info is not None:
        if info.startswith("{"):
            params["end"] = ast.literal_eval(info)
            break
        if "turn " in info:
            info = info[info.find('rat_location'):]
        if info in PHRASES.keys():
            param = PHRASES[info]
            if param == "play":
                rat = ast.literal_eval(f.readline())
                python = ast.literal_eval(f.readline())
                pieces = ast.literal_eval(f.readline())
                rat_decision = f.readline().replace("\n","")
                python_decision = f.readline().replace("\n","")
                play_dict = dict(
                    rat=rat,python=python,piecesOfCheese=pieces,
                    rat_decision=rat_decision,python_decision=python_decision)
                params[param].append(play_dict)
            else:
                params[param] = ast.literal_eval(f.readline())
        else:
            print("did not understand:", info)
            break
        info = f.readline()
    return params

def dict_to_x_y(end,rat, python, maze, piecesOfCheese,rat_decision,python_decision,
                mazeWidth=21, mazeHeight=15):
    # We only use the winner
    if end["win_python"] == 1: 
        player = python
        opponent = rat        
        decision = python_decision
    elif end["win_rat"] == 1:
        player = rat
        opponent = python        
        decision = rat_decision
    else:
        return False
    if decision == "None" or decision == "": #No play
        return False
    x_1 = convert_input_2(player, maze, opponent, mazeHeight, mazeWidth, piecesOfCheese)
    y = np.zeros((1,4),dtype=np.int8)
    y[0][translate_action[decision]] = 1
    return x_1,y

In [2]:
# center in player

In [3]:
### The goal of this function is to create a canvas, which will be the vector used to train the classifier. 
### As we want to predict a next move, we will create a canvas that is centered on the player, so that we can easily with the translation invariance. 


def convert_input_2(player, maze, opponent, mazeHeight, mazeWidth, piecesOfCheese):
	# We will consider twice the size of the maze to simplify the creation of the canvas 
	# The canvas is initialized as a numpy tensor with 3 modes (meaning it is indexed using three integers), the third one corresponding to "layers" of the canvas. 
	# Here, we just use one layer, but you can defined other ones to put more information on the play (e.g. the location of the opponent could be put in a second layer)

    im_size = (2*mazeHeight-1,2*mazeWidth-1,1)

    # We initialize a canvas with only zeros
    canvas = np.zeros(im_size)


    (x,y) = player
    (x_op,y_op) = opponent

    # fill in the first layer of the canvas with the value 1 at the location of the cheeses, relative to the position of the player (i.e. the canvas is centered on the player location)
    center_x, center_y = mazeWidth-1, mazeHeight-1
    for (x_cheese,y_cheese) in piecesOfCheese:
        canvas[y_cheese+center_y-y,x_cheese+center_x-x,0] = 1
    
    return canvas


In [6]:
games = list()
directory = "/home/brain/IA/PyRat/saves/"
for root, dirs, files in os.walk(directory):
    for filename in tqdm.tqdm(files):
        if filename.startswith("."):
            continue
        game_params = process_file_2(directory+filename)
        games.append(game_params)

x_1_train = list()
y_train = list()
wins_python = 0
wins_rat = 0
for game in tqdm.tqdm(games):
    if game["end"]["win_python"] == 1: 
        wins_python += 1
    elif game["end"]["win_rat"] == 1:
        wins_rat += 1
    else:
        continue
    plays = game["play"]
    for play in plays:
        x_y = dict_to_x_y(**play,maze=game_params["maze"],end=game["end"])
        if x_y:
            x1, y = x_y
            y_train.append(scipy.sparse.csr_matrix(y.reshape(1,-1)))
            x_1_train.append(scipy.sparse.csr_matrix(x1.reshape(1,-1)))
print("Greedy/Draw/Greedy, {}/{}/{}".format(wins_rat,1000 - wins_python - wins_rat, wins_python)) 

100%|██████████| 1000/1000 [00:28<00:00, 34.50it/s]
100%|██████████| 1000/1000 [00:17<00:00, 57.62it/s]

Greedy/Draw/Greedy, 435/127/438





In [7]:
# dataset moves
np.savez_compressed("dataset_challenge_moves_supervised.npz",x=x_1_train,y=y_train)
del x_1_train
del y_train

# LOAD DATASET

In [2]:
from sklearn.model_selection import train_test_split

### This cell reloads the pyrat_dataset that was stored as a pkl file by the generate dataset script. 

mazeWidth = 21
mazeHeight = 15

import pickle, scipy

x = np.load("dataset_challenge_moves_supervised.npz")['x']
y = np.load("dataset_challenge_moves_supervised.npz")['y']

x = scipy.sparse.vstack(x)

## The dataset was stored using scipy sparse arrays, because the matrices contain mostly zeros. In case you wish to use 
## supervised learning techniques that don't accept sparse matrices, you have to convert x into a dense array and reshape it accordingly
#x = x.todense()
#x = np.array(x).reshape(-1,(2*mazeHeight-1)*(2*mazeWidth-1))

y = scipy.sparse.vstack(y).todense()
y = np.argmax(np.array(y),1)

In [3]:
print(x.shape, y.shape)

(59461, 1189) (59461,)


## -  Neural Network model

In [4]:
from sklearn.neural_network import MLPClassifier

### Now you have to train a classifier using supervised learning and evaluate it's performance. 
#Split dataset

x_train, x_test, y_train, y_test = train_test_split(x[:,:], y[:], test_size=0.20, random_state=1)

clf = MLPClassifier(verbose = 1)
clf.fit(x_train,y_train)
print(clf.score(x_train,y_train),clf.score(x_test,y_test))

Iteration 1, loss = 0.70050588
Iteration 2, loss = 0.33514963
Iteration 3, loss = 0.28882734
Iteration 4, loss = 0.25451072
Iteration 5, loss = 0.22029610
Iteration 6, loss = 0.18725086
Iteration 7, loss = 0.15867774
Iteration 8, loss = 0.13337113
Iteration 9, loss = 0.11273966
Iteration 10, loss = 0.09548549
Iteration 11, loss = 0.08175348
Iteration 12, loss = 0.06883169
Iteration 13, loss = 0.05904687
Iteration 14, loss = 0.05101111
Iteration 15, loss = 0.04392624
Iteration 16, loss = 0.03791595
Iteration 17, loss = 0.03277547
Iteration 18, loss = 0.02876997
Iteration 19, loss = 0.02495931
Iteration 20, loss = 0.02162421
Iteration 21, loss = 0.01893362
Iteration 22, loss = 0.01692840
Iteration 23, loss = 0.01503397
Iteration 24, loss = 0.01309802
Iteration 25, loss = 0.01156308
Iteration 26, loss = 0.01029894
Iteration 27, loss = 0.00903473
Iteration 28, loss = 0.00813626
Iteration 29, loss = 0.00744220
Iteration 30, loss = 0.00705562
Iteration 31, loss = 0.00589657
Iteration 32, los

### report model 

In [5]:
from sklearn.metrics import classification_report,confusion_matrix
y_pred_train = clf.predict(x_train)
report = classification_report(y_true=y_train,y_pred=y_pred_train)

print("Train Set:")
print(report)


y_pred_test = clf.predict(x_test)
report = classification_report(y_true=y_test,y_pred=y_pred_test)

print("Test Set:")
print(report)


Train Set:
             precision    recall  f1-score   support

          0       1.00      1.00      1.00     12765
          1       1.00      1.00      1.00     12414
          2       1.00      1.00      1.00     11142
          3       1.00      1.00      1.00     11247

avg / total       1.00      1.00      1.00     47568

Test Set:
             precision    recall  f1-score   support

          0       0.92      0.91      0.91      3175
          1       0.91      0.90      0.90      3102
          2       0.94      0.94      0.94      2840
          3       0.93      0.94      0.93      2776

avg / total       0.92      0.92      0.92     11893



### save model 

In [6]:
### Let's assume you have named your classifier clf . You can save the trained object using the joblib.dump method, as follows: 

import pickle
from sklearn.externals import joblib

joblib.dump(clf, 'mlp_classifier_moves.pkl') 

# Test in pyrat
## Now you can use the supervised.py file as an AI directly in Pyrat. 


['mlp_classifier_moves.pkl']

All you have to do is copy the mlp_classifier_moves.pkl to the **pyrat folder** and supervised.py to the **pyrat AIs subfolder** to test how the trained classifier performs while playing !

In [None]:
from sklearn.manifold import TSNE
unsup = TSNE(random_state = 0)
examples = unsup.fit_transform(x.todense())
plt.scatter(examples[:,0],examples[:,1],c=y)
plt.colorbar()