In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import tensorflow as tf
import tensorflow.compat.v1 as tf1

In [3]:
fight_df = pd.read_parquet('fight_ml.parquet')

In [5]:
fight_df

Unnamed: 0,name1,hp1,attack1,defense1,specialAttack1,specialDefense1,speed1,name2,hp2,attack2,defense2,specialAttack2,specialDefense2,speed2,Gewonnen
0,Zirpeise,77,85,51,55,51,65,Pummeluff,115,45,20,45,25,20,True
1,Zirpeise,77,85,51,55,51,65,Palimpalim,75,50,80,95,90,65,False
2,Zirpeise,77,85,51,55,51,65,Schlurplek,110,85,95,80,95,50,False
3,Zirpeise,77,85,51,55,51,65,Duokles,59,110,150,45,49,35,False
4,Zirpeise,77,85,51,55,51,65,Yanmega,86,76,86,116,56,95,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
640795,Wommel,40,45,40,55,40,84,Volcanion,80,110,120,130,90,70,False
640796,Wommel,40,45,40,55,40,84,Cresselia,120,70,120,75,130,85,False
640797,Wommel,40,45,40,55,40,84,Irokex,65,90,115,45,115,58,False
640798,Wommel,40,45,40,55,40,84,Tannza,50,65,90,35,35,15,False


In [6]:
fight_df = fight_df.sample(10000)

In [7]:
X = fight_df.drop(['Gewonnen', 'name1', 'name2'], axis = 1)
y = fight_df['Gewonnen']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [5]:
model = LogisticRegression()
model.fit(X_train,y_train)
predictions = model.predict(X_test)
print(accuracy_score(y_test, predictions))
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))

0.920809215753036
[[96445  8375]
 [ 8371 98273]]
              precision    recall  f1-score   support

       False       0.92      0.92      0.92    104820
        True       0.92      0.92      0.92    106644

    accuracy                           0.92    211464
   macro avg       0.92      0.92      0.92    211464
weighted avg       0.92      0.92      0.92    211464



In [6]:
model = RandomForestClassifier()
model.fit(X_train,y_train)
predictions = model.predict(X_test)
print(accuracy_score(y_test, predictions))
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))

0.9691153104074453
[[102522   2298]
 [  4233 102411]]
              precision    recall  f1-score   support

       False       0.96      0.98      0.97    104820
        True       0.98      0.96      0.97    106644

    accuracy                           0.97    211464
   macro avg       0.97      0.97      0.97    211464
weighted avg       0.97      0.97      0.97    211464



In [8]:
model = SVC(kernel = 'linear')
model.fit(X_train,y_train)
predictions = model.predict(X_test)
print(accuracy_score(y_test, predictions))
print(confusion_matrix(y_test, predictions))
print(classification_report(y_test, predictions))

0.923030303030303
[[1524  138]
 [ 116 1522]]
              precision    recall  f1-score   support

       False       0.93      0.92      0.92      1662
        True       0.92      0.93      0.92      1638

    accuracy                           0.92      3300
   macro avg       0.92      0.92      0.92      3300
weighted avg       0.92      0.92      0.92      3300



In [7]:

tf1.disable_v2_behavior()

# split data into training and test
X_train = fight_df.sample(int(len(fight_df) * 0.6))

X_test = fight_df[~fight_df.isin(X_train)].dropna()

#  optimisation variables
num_hidden_nodes = 300
learning_rate = 0.8
epochs = 10
batch_size = int(len(fight_df) * 0.6 /epochs)

# declare the training data placeholders
# input x : 12 features
x = tf.placeholder(tf.float32, [None, 12])
# output data placeholder - 2 true/false
y = tf.placeholder(tf.float32, [None, 2])

# weights connecting the input to the 1st hidden layer
W1 = tf.Variable(tf.random_normal([12, num_hidden_nodes], stddev=0.03), name='W1')
b1 = tf.Variable(tf.random_normal([num_hidden_nodes]), name='b1')

# calculate the output of the hidden layer
hidden_out1 = tf.add(tf.matmul(x, W1), b1)
hidden_out1 = tf.nn.relu(hidden_out1)

# weights connecting the 1st hidden layer to the 2nd hidden layer
W2 = tf.Variable(tf.random_normal([num_hidden_nodes, num_hidden_nodes], stddev=0.03), name='W2')
b2 = tf.Variable(tf.random_normal([num_hidden_nodes]), name='b2')

# calculate the output of the hidden layer
hidden_out2 = tf.add(tf.matmul(hidden_out1, W2), b2)
hidden_out2 = tf.nn.relu(hidden_out2)

# weights connecting the hidden layer to the output layer
W3 = tf.Variable(tf.random_normal([num_hidden_nodes, 2], stddev=0.03), name='W3')
b3 = tf.Variable(tf.random_normal([2]), name='b3')

# calculate the output layer output - softmax activation function
# output layer
y_ = tf.nn.sigmoid(tf.add(tf.matmul(hidden_out2, W3), b3))

y_clipped = tf.clip_by_value(y_, 1e-10, 0.9999999)
cross_entropy = -tf.reduce_mean(tf.reduce_sum(y * tf.log(y_clipped)
                                                + (1 - y) * tf.log(1 - y_clipped), axis=1))
# add an optimiser
optimiser = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cross_entropy)

# setup initialisation operator
init_op = tf.global_variables_initializer()

# define an accuracy assessment operation
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# start the session
with tf.Session() as sess:
    # initialise the variables
    sess.run(init_op)
    total_batch = int(len(X_train) / batch_size)
    for epoch in range(epochs):
        avg_cost = 0
        for i in range(total_batch):
            x_sample = X_train.sample(batch_size)
            y_sample = pd.get_dummies(x_sample['Gewonnen'])
            _, c = sess.run([optimiser, cross_entropy],
                            feed_dict={x: x_sample.loc[:, 'hp1':'speed2'], y:y_sample})
        avg_cost += c / total_batch
        print("Epoch:", (epoch + 1), "cost =", "{:.3f}".format(avg_cost))
    print(sess.run(accuracy, feed_dict={x: X_test.loc[:, 'hp1':'speed2'], y: pd.get_dummies(X_test['Gewonnen'])}))
    print("a")

AttributeError: module 'tensorflow' has no attribute 'placeholder'