In [1]:
import pandas as pd
from json_functions import create_df_big_output, create_example

players_list, df = create_df_big_output('very-big.json')

In [2]:
import numpy as np
import tensorflow as tf

In [3]:
features, outputs = df["rosters vector"], df['rating vector']

features = pd.DataFrame(features.values.tolist(), index= df.index)
outputs = pd.DataFrame(outputs.values.tolist(), index= df.index)

print(np.shape(features))
print(np.shape(outputs))

(17488, 1068)
(17488, 1068)


In [4]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy

In [5]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [160]:
GLOBAL_BATCH_SIZE = 1093

In [161]:
# create loss function that only takes into account values from the output dont have a true value of 0
class Filtered_MAE(tf.keras.losses.Loss):

    def call(self, y_true, y_pred):
        # we can hardcode denominator as 10 since we know that there will always be 10 relevent values
        # in this case I just multiply by 0.1 for simplicity
        loss = my_loss(y_true, y_pred)
        return tf.divide(loss, GLOBAL_BATCH_SIZE)

In [162]:
def my_loss(y_true, y_pred):
    
    # This sigmoid is very close to a step function, if x = 0, then it will return nearly 0, and if x is a reasonable rating value,
    # even something as low as 0.4, it will return (pretty much) 1. I will plug in y_true to this and then multiply the result by
    # the error so that error on predictions where where y_true = 0 becomes tiny but where y_true is an actual rating stays 
    # pretty much the same
    def my_step(x):
        return tf.divide(1.0, tf.add(1.0, tf.exp(tf.multiply(-1000.0, tf.subtract(x, 0.02)))))
    
    # we can hardcode denominator as 10 since we know that there will always be 10 relevent values
    # in this case I just multiply by 0.1 for simplicity  
    return tf.multiply(0.1, tf.reduce_sum(tf.multiply(my_step(y_true), tf.abs(tf.subtract(y_pred, y_true)))))

In [168]:
# get the model
def get_model(n_inputs, n_outputs):
	model = Sequential()
	model.add(Dense(2000, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
	# model.add(Dense(50, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
	model.add(Dense(n_outputs))
	my_loss = Filtered_MAE()
	model.compile(loss=my_loss, optimizer='adam')
	return model

In [169]:
n_inputs, n_outputs = features.shape[1], outputs.shape[1]

In [170]:
model = get_model(n_inputs, n_outputs)

In [171]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features, outputs, test_size=0.2, random_state=42)

In [172]:
model.fit(X_train, y_train, verbose=1, epochs=100, batch_size=GLOBAL_BATCH_SIZE)
print("Evaluation on test set:")
mae = model.evaluate(X_test, y_test, verbose=1, batch_size=GLOBAL_BATCH_SIZE)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Evaluation on test set:


In [14]:
hypothetical = create_example(['EliGE', 'Stewie2K', 'Grim', 'FalleN', 'NAF'], ['device', 'Xyp9x', 'Magisk', 'dupreeh', 'gla1ve'], players_list)
model.predict(np.asarray([hypothetical]))

array([[0.8909991 , 0.6987118 , 0.4269492 , ..., 1.0185257 , 0.7517932 ,
        0.65959996]], dtype=float32)

In [15]:
def print_prediction(example, players_list, model):
    player_indices = [i for i, x in enumerate(example) if x == 1]
    example_formatted = np.asarray([example])
    pred = model.predict(example_formatted)

    relevent_preds = np.zeros(10)

    print("Team 1:")
    for i, player  in enumerate(player_indices):
        if player >= len(players_list): player =player - len(players_list)
        print(players_list[player]+': ', round(pred[0][player], 2))
        relevent_preds[i] = round(pred[0][player], 2) 
        if i == 4:
            print("Agregate Rating:", round(np.sum(relevent_preds[0:5]), 2))
            print("Team 2:")
        if i == 9:
            print("Agregate Rating:", round(np.sum(relevent_preds[5:10]), 2))


In [16]:
print_prediction(hypothetical, players_list, model)

Team 1:
EliGE:  0.71
NAF:  0.9
Stewie2K:  0.51
FalleN:  0.94
Grim:  0.77
Agregate Rating: 3.83
Team 2:
device:  1.03
dupreeh:  0.86
Xyp9x:  0.87
gla1ve:  1.07
Magisk:  1.06
Agregate Rating: 4.89


In [91]:
def print_example_vs_pred(row, example_type='test'):
    if example_type is 'test':
        # Get an example from the test split
        example = np.array(X_test.iloc[row])
        truth = np.array(y_test.iloc[row])
    elif example_type is 'train':
        # Get an example from the trian split
        example = np.array(X_train.iloc[row])
        truth = np.array(y_train.iloc[row])
    elif example_type is 'all':
        # Get an example from the original dataframe
        example = np.array(features.iloc[row])
        truth = np.array(outputs.iloc[row]) 
    
    players = np.where(example == 1.0)[0]

    team1 = []
    team2 = []
    for i, player in enumerate(players):
        if i < 5:
            team1.append(players_list[player])
        else:
            team2.append(players_list[player-len(players_list)])
    
    example_for_pred = create_example(team1, team2, players_list)
    pred = model.predict([example_for_pred])

    data = []
    for i, player  in enumerate(players):
        
        player_list_index = (player if player < len(players_list) 
                                   else player - len(players_list)
                             )
        data.append([players_list[player_list_index], round(pred[0][player], 2), truth[player]])

    col_width = max(len(str(word)) for row in data for word in row) + 2  # padding
    print("".join(str(word).ljust(col_width) for word in ['Team 1', 'Pred', 'Truth']))
    print('---------------------------')
    for i, row in enumerate(data):
        print("".join(str(word).ljust(col_width) for word in row))
        if i == 4:
            print('---------------------------')               
            print("".join(str(word).ljust(col_width) for word in ['Team 2', 'Pred', 'Truth']))
            print('---------------------------')   
    print('---------------------------')
    print("Loss: ", my_loss(np.array(truth, dtype='double'), np.array(pred, dtype='double')).numpy())


In [92]:
print_example_vs_pred(0, 'test')

Team 1         Pred           Truth          
---------------------------
HEN1           1.07           1.61           
yuurih         1.03           1.08           
VINI           1.11           0.99           
KSCERATO       1.06           1.22           
arT            1.08           0.91           
---------------------------
Team 2         Pred           Truth          
---------------------------
AZR            0.98           0.76           
jks            1.01           0.98           
jkaem          0.99           0.86           
Gratisfaction  1.03           1.05           
Liazz          1.04           1.06           
---------------------------
Loss:  0.1470878435798775


In [93]:
print_example_vs_pred(1, 'train')

Team 1   Pred     Truth    
---------------------------
FugLy    1.45     1.39     
Ethan    1.43     1.36     
CeRq     1.38     1.36     
daps     1.02     1.0      
Brehze   1.17     0.94     
---------------------------
Team 2   Pred     Truth    
---------------------------
ANGE1    0.78     0.84     
ISSAA    1.01     1.12     
woxic    0.83     0.88     
DeadFox  0.82     0.96     
bondik   0.83     0.91     
---------------------------
Loss:  0.08417594111474119


In [94]:
print_example_vs_pred(123, 'all')

Team 1       Pred         Truth        
---------------------------
KRIMZ        1.32         2.07         
flusha       1.29         1.94         
Lekr0        1.49         1.19         
Golden       1.7          1.25         
JW           1.18         1.09         
---------------------------
Team 2       Pred         Truth        
---------------------------
Snax         0.59         0.67         
NEO          0.84         0.57         
TaZ          0.69         0.77         
pashaBiceps  0.78         0.33         
byali        0.35         0.88         
---------------------------
Loss:  0.36605233137244947


In [96]:
print_example_vs_pred(9000, 'all')

Team 1    Pred      Truth     
---------------------------
allu      1.19      1.16      
Aleksib   1.11      0.72      
xseveN    1.08      1.17      
sergej    1.14      1.05      
Aerial    0.92      1.18      
---------------------------
Team 2    Pred      Truth     
---------------------------
nitr0     0.93      1.0       
Twistzz   1.11      1.17      
EliGE     1.17      1.44      
NAF       0.93      1.2       
Stewie2K  1.07      0.78      
---------------------------
Loss:  0.18133195589320358


In [97]:
model.save('model_big_01.h5')