In [2]:
import csv
import math
import random
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier

**Creating Type Table**

In [3]:
typeTable = {
    "Steel" : {
        "x2" : ["Fairy","Ice","Rock"],
        "x0.5" : ["Steel","Water","Electric","Fire"],
        "x0" : []
    },
    "Fighting": {
        "x2" : ["Steel","Ice","Normal","Rock","Dark"],
        "x0.5" : ["Fairy","Bug","Poison","Psychic","Flying"],
        "x0" : ["Ghost"]
    },
    "Dragon": {
        "x2" : ["Dragon"],
        "x0.5" : ["Steel"],
        "x0" : ["Fairy"]
    },
    "Water": {
        "x2" : ["Fire","Rock","Ground"],
        "x0.5" : ["Dragon","Water","Grass"],
        "x0" : []
    },
    "Electric": {
        "x2" : ["Water","Flying"],
        "x0.5" : ["Dragon","Electric","Grass"],
        "x0" : ["Ground"]
    },
    "Fairy": {
        "x2" : ["Fighting","Dragon","Dark"],
        "x0.5" : ["Steel","Fire","Poison"],
        "x0" : []
    },
    "Fire": {
        "x2" : ["Steel","Ice","Bug","Grass"],
        "x0.5" : ["Dragon","Water","Fire","Rock"],
        "x0" : []
    },
    "Ice": {
        "x2" : ["Dragon","Grass","Ground","Flying"],
        "x0.5" : ["Steel","Water","Fire","Ice"],
        "x0" : []
    },
    "Bug": {
        "x2" : ["Grass","Psychic","Dark"],
        "x0.5" : ["Steel","Fighting","Fairy","Fire","Poison","Ghost","Flying"],
        "x0" : []
    },
    "Normal": {
        "x2" : [],
        "x0.5" : ["Steel","Rock"],
        "x0" : ["Ghost"]
    },
    "Grass": {
        "x2" : ["Water","Rock","Ground"],
        "x0.5" : ["Steel","Dragon","Fire","Grass","Poison","Flying"],
        "x0" : []
    },
    "Poison": {
        "x2" : ["Fairy","Grass"],
        "x0.5" : ["Poison","Rock","Ground","Ghost"],
        "x0" : ["Steel"]
    },
    "Psychic": {
        "x2" : ["Fire","Rock","Ground"],
        "x0.5" : ["Dragon","Water","Grass"],
        "x0" : []
    },
    "Rock": {
        "x2" : ["Fire","Ice","Bug","Flying"],
        "x0.5" : ["Steel","Fighting","Ground"],
        "x0" : []
    },
    "Ground": {
        "x2" : ["Steel","Electric","Fire","Rock","Poison"],
        "x0.5" : ["Bug","Grass"],
        "x0" : ["Flying"]
    },
    "Ghost": {
        "x2" : ["Psychic","Ghost"],
        "x0.5" : ["Dark"],
        "x0" : ["Normal"]
    },
    "Dark": {
        "x2" : ["Psychic","Ghost"],
        "x0.5" : ["Fighting","Fairy","Dark"],
        "x0" : []
    },
    "Flying": {
        "x2" : ["Fighting","Bug","Grass"],
        "x0.5" : ["Steel","Electric","Rock"],
        "x0" : []
    },
}

In [4]:
def split_lines(input, seed, output1, output2):
  random.seed(seed)
  output1 = open(output1,'a')
  output1.truncate(0)
  output2 = open(output2,'a')
  output2.truncate(0)
  avoidHeader = 0

  for line in open(input, 'r').readlines():
      if avoidHeader != 0:
        if (random.random() < 0.8):
            write = output1;
        else:
            write = output2;
        write.write(line);
      else:
          avoidHeader+=1
            
def read_data(filename):
  X = []
  Y = []

  with open(filename) as csv_file:
      csv_reader = csv.reader(csv_file, delimiter=',')
      for line in csv_reader:
          X.append(list(map(int,line[:2])))
          Y.append(line[2] == line[0])

  return (X,Y);

In [5]:
split_lines('combats.csv',0,'train','test')
train_raw_x,train_raw_y = read_data('train')
test_raw_x,test_raw_y = read_data('test')

In [46]:
def winrate(pkm,allBattle,allBattleVictory):
    pkm = int(pkm) if type(pkm) == str else pkm
    count = 0
    win = 0
    for i in range(len(allBattle)):
        if pkm == allBattle[i][0]:
            if allBattleVictory[i]:
                win+=1
            count+=1
        elif pkm == allBattle[i][1]:
            if not allBattleVictory[i]:
                win+=1
            count+=1
    if count == 0 :return 0.0
    return win/count

def read_data_pokemon(filename):
    X = []

    with open(filename) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        next(csv_reader,None)
        X.append([])
        for line in csv_reader:
            pokemon = line[1:]
            del pokemon[-2]
            pokemon.append(winrate(line[0],train_raw_x,train_raw_y))
            X.append(pokemon)

    return X

def getPokemonName(pkm):
    return pokemon[pkm][0]

def sumInTable(tab):
    count = 0
    for elt in tab:
        count += elt
    return count

def getBaseStats(pkm):
    return list(map(lambda x:int(x), pkm[3:9]))

def checkMissingValue(table):
    header = {'Name':0,'Type 1': 1, 'Type 2': 2,'HP':3, 'Atk':4,'Def':5, 'Spatk':6,'SpDef':7,'Spd':8,'Legendary':9}
    missingValueIndex = []
    for x in range(len(table)):
        for i in range(len(table[x])):
            if (table[x][i] == '' and i != header['Type 2']):
                missingValueIndex.append((x,table[x]))
    
    return missingValueIndex

**Display Functions**

In [25]:
def displayPokemon(pokemon):
    Pokemon = pd.DataFrame([x for x in pokemon],columns = ['Pokémon','Type1','Type2','HP','Atk','Def',
                                                           'SpAtk','SpDef','Spd','Legendaire','Winrate']).iloc[1:,:]
    display(Pokemon)

def displayDecisionTable(table):
    DecisionTable = pd.DataFrame([x for x in table], columns=['DamageMultiplier','Hp1','Hp2','Hp1>Hp2','Atk1','Atk2','Atk1>Atk2',
                                                              'Def1','Def2','Def1>Def2','SpAtk1','SpAtk2',
                                                              'SpAtk1>SpAtk2','SpDef1','SpDef2','SpDef1>SpDef2',
                                                              'Spd1','Spd2','Spd1>Spd2','Sum1','Sum2',
                                                              'Sum1>Sum2','Winrate1','Winrate2'])
    display(DecisionTable)

In [54]:
pokemon = read_data_pokemon('pokemon.csv')

**Recherche de valeur manquante, et résolution**

In [72]:
(index,pkm) = checkMissingValue(pokemon)[0]
displayPokemon([pkm,pkm])
index

Unnamed: 0,Pokémon,Type1,Type2,HP,Atk,Def,SpAtk,SpDef,Spd,Legendaire,Winrate
1,,Fighting,,65,105,60,60,70,95,False,0.789474


63

In [73]:
pokemon[63][0] = 'Primeape'
checkMissingValue(pokemon)

[]

In [74]:
displayPokemon(pokemon)

Unnamed: 0,Pokémon,Type1,Type2,HP,Atk,Def,SpAtk,SpDef,Spd,Legendaire,Winrate
1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,False,0.257426
2,Ivysaur,Grass,Poison,60,62,63,80,80,60,False,0.354839
3,Venusaur,Grass,Poison,80,82,83,100,100,80,False,0.692308
4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,False,0.549020
5,Charmander,Fire,,39,52,43,60,50,65,False,0.476190
...,...,...,...,...,...,...,...,...,...,...,...
796,Diancie,Rock,Fairy,50,100,150,100,150,50,True,0.402439
797,Mega Diancie,Rock,Fairy,50,160,110,160,110,110,True,0.875000
798,Hoopa Confined,Psychic,Ghost,80,110,60,150,130,70,True,0.542553
799,Hoopa Unbound,Psychic,Dark,80,160,60,170,130,80,True,0.570093


**Functions to obtain type advantage ratio**

In [82]:
def isTypeEffective (type1,type2):
    type = typeTable[type1]
    if type2 in type["x2"]:
        return 2.0
    elif type2 in type["x0.5"]:
        return 0.5
    elif type2 in type["x0"]:
        return 0.0
    else:
        return 1.0
    
def singleTypeAdvantage(typeAttack,typeDefense):
    return 1.0 * isTypeEffective(typeAttack,typeDefense[0]) * (isTypeEffective(typeAttack,typeDefense[1]) if typeDefense[1] != '' else 1.0)

def doubleTypeAdvantage(type1,type2):
    advantage1= singleTypeAdvantage(type1[0],type2)
    advantage2= singleTypeAdvantage(type1[1],type2) if (len(type1) > 1) & (type1[1] != '') else 0
    result = max(advantage1,advantage2)
    return result

def typeBattle(pkm1,pkm2):
    typePkm1 = pokemon[pkm1][1:3]
    typePkm2 = pokemon[pkm2][1:3]
    return doubleTypeAdvantage(typePkm1,typePkm2)

In [10]:
def tableDecision(train_x,train_y):
    table_x = []

    for pkm1,pkm2 in train_x:
        table_x_elt = []

        damageMultiplier = typeBattle(pkm1,pkm2)
        table_x_elt.append(damageMultiplier)
        
        pokemon1 = pokemon[pkm1]
        pokemon2 = pokemon[pkm2]

        statsP1 = getBaseStats(pokemon1)
        statsP2 = getBaseStats(pokemon2)

        for j in range (len(statsP1)):
            table_x_elt.append(statsP1[j])
            table_x_elt.append(statsP2[j])
            table_x_elt.append(statsP1[j] > statsP2[j])

        sumStatsP1 = sumInTable(statsP1)
        sumStatsP2 = sumInTable(statsP2)   
        table_x_elt.append(sumStatsP1)
        table_x_elt.append(sumStatsP2)
        table_x_elt.append(sumStatsP1 > sumStatsP2)

        winRatepkm1 = pokemon1[-1]
        winRatepkm2 = pokemon2[-1]
        table_x_elt.append(winRatepkm1)
        table_x_elt.append(winRatepkm2)
        table_x.append(table_x_elt)

    return table_x,train_y

In [11]:
train_x,train_y = tableDecision(train_raw_x,train_raw_y)
test_x,test_y = tableDecision(test_raw_x,test_raw_y)

In [12]:
displayDecisionTable(train_x)

Unnamed: 0,DamageMultiplier,Hp1,Hp2,Hp1>Hp2,Atk1,Atk2,Atk1>Atk2,Def1,Def2,Def1>Def2,...,SpDef2,SpDef1>SpDef2,Spd1,Spd2,Spd1>Spd2,Sum1,Sum2,Sum1>Sum2,Winrate1,Winrate2
0,2.0,91,91,False,90,129,False,72,90,False,...,90,True,108,108,False,580,580,False,0.890000,0.860215
1,1.0,55,75,False,40,75,False,85,75,True,...,95,True,40,40,False,405,485,False,0.196262,0.166667
2,0.5,40,77,False,40,120,False,40,90,False,...,90,False,20,48,False,250,485,False,0.038835,0.264151
3,2.0,70,20,True,60,10,True,125,230,False,...,230,False,55,5,True,495,505,False,0.359649,0.000000
4,1.0,50,60,False,47,50,False,50,150,False,...,150,False,65,60,True,319,520,False,0.469027,0.536842
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40026,1.0,100,30,True,120,40,True,100,70,True,...,25,True,90,60,True,680,295,True,0.770000,0.459184
40027,2.0,60,35,True,85,55,True,40,40,False,...,40,True,68,60,True,328,275,True,0.623529,0.355769
40028,1.0,60,73,False,50,115,False,100,60,True,...,60,True,65,90,False,430,458,False,0.475248,0.666667
40029,0.5,40,25,True,30,35,False,50,70,False,...,55,False,100,45,True,330,325,True,0.574468,0.203540


In [None]:
def eval_Random_Forest(train_x,train_y,X,y,d,split,score):
    clf = RandomForestClassifier(n_estimators=100,max_depth=d,min_samples_split=split,oob_score=score)
    clf.fit(train_x,train_y)
    return clf.score(X,y)

def eval_pokemon_battle_prediction(test_x,test_y,classifier):
    return round(classfier(test_x,test_y)* 100,2)

def test_cross_validation_pokemon_battle(d,split,score):
    total = 0   #nombre total d'apprentissage
    meanSum = 0
    X = np.array(train_x)
    Y = np.array(train_y)
    kf = KFold(n_splits=10)
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        Y_train, Y_test = Y[train_index], Y[test_index]
        meanSum += round(eval_Random_Forest(X_train,Y_train,X_test,Y_test,d,split,score) * 100,2)
        total+=1
    return meanSum/total

def test_find_best_d(split,score):
    d = [16,17,18,19,20] #On pioche 10 valeurs entre 1 et len(train_x)
    print(d)

    min_list = []   #liste de tous les valeurs de cross_validation
    min_indice = 0  #indice de la plus petit valeur

    #Calculer le pourcentage d'erreur en fonction de k
    for val in d
        val = test_cross_validation_pokemon_battle(val,split,score)
        print(val)
        min_list.append(val)

    min_list = np.array(min_list)   #Transformer la liste en array

    #Retourne le meileur K ,celui qui renvoie la plus petit erreur
    return d[np.argmax(min_list)]

In [90]:
test_find_best_d(2,False)

[16, 17, 18, 19, 20]
95.608
95.614
95.68500000000002
95.69200000000001
95.696


20