In [1]:
import pandas as pd
from json_functions import create_df_simple, create_example

# players_list, df = create_df('test.json')

players_list, df = create_df_simple('very-big.json')

In [2]:
import numpy as np
import tensorflow as tf

In [3]:
features, outputs = df["rosters vector"], df['rating vector']

features = pd.DataFrame(features.values.tolist(), index= df.index)
outputs = pd.DataFrame(outputs.values.tolist(), index= df.index)

In [4]:
def load_array(data_arrays, batch_size, is_train=True):  #@save
    """Construct a TensorFlow data iterator."""
    dataset = tf.data.Dataset.from_tensor_slices(data_arrays)
    if is_train:
        dataset = dataset.shuffle(buffer_size=1000)
    dataset = dataset.batch(batch_size)
    return dataset

batch_size = 10
data_iter = load_array((features, outputs), batch_size)

In [5]:
import tensorflow as tf

In [6]:
# Set up use GPU
# physical_devices = tf.config.experimental.list_physical_devices('GPU')
# print("Num GPUs available: ", len(physical_devices))
# tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [7]:
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import categorical_crossentropy

In [8]:
# get the model
def get_model(n_inputs, n_outputs):
	model = Sequential()
	model.add(Dense(100, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
	model.add(Dense(50, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
	model.add(Dense(n_outputs))
	model.compile(loss='mae', optimizer='adam')
	return model

In [9]:
n_inputs, n_outputs = features.shape[1], outputs.shape[1]

In [10]:
model = get_model(n_inputs, n_outputs)

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features, outputs, test_size=0.33, random_state=42)

In [12]:
model.fit(X_train, y_train, verbose=1, epochs=2)
mae = model.evaluate(X_test, y_test, verbose=1)

Epoch 1/2
Epoch 2/2


In [13]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               106900    
_________________________________________________________________
dense_1 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_2 (Dense)              (None, 10)                510       
Total params: 112,460
Trainable params: 112,460
Non-trainable params: 0
_________________________________________________________________


In [15]:
example = create_example(['EliGE', 'Stewie2K', 'Grim', 'FalleN', 'NAF'], ['device', 'Xyp9x', 'Magisk', 'dupreeh', 'gla1ve'], players_list)
model.predict(np.asarray([example]))

array([[1.1868082, 1.075088 , 0.9876964, 0.8094249, 0.623414 , 1.5024917,
        1.3105333, 1.2264646, 1.0622538, 0.9846176]], dtype=float32)

In [16]:
def print_prediction(example, players_list, model):
    player_indices = [i for i, x in enumerate(example) if x == 1]
    example_formatted = np.asarray([example])
    pred = model.predict(example_formatted)
    print("Predicted rating vector:", pred[0])

    print("Team 1:")
    for player in player_indices[0:5]:
        print(players_list[player]+', ', end='')
    print("\nRating spread:")
    for i in range(0, 5):
        print(round(pred[0][i], 2))
    print("Team 2:")
    for player in player_indices[5:10]:
        print(players_list[player-len(players_list)]+', ', end='')
    print("\nRating spread:")
    for i in range(5, 10):
        print(round(pred[0][i], 2))



In [17]:
# This model unfortunately isn't able to predict rating by player, just the spread of rating for the team.
print_prediction(example, players_list, model)

Predicted rating vector: [1.1868082 1.075088  0.9876964 0.8094249 0.623414  1.5024917 1.3105333
 1.2264646 1.0622538 0.9846176]
Team 1:
EliGE, NAF, Stewie2K, FalleN, Grim, 
Rating spread:
1.19
1.08
0.99
0.81
0.62
Team 2:
device, dupreeh, Xyp9x, gla1ve, Magisk, 
Rating spread:
1.5
1.31
1.23
1.06
0.98


Compare these results to a couple real matches played between these two teams on March 26th that the model hasn't seen:

https://www.hltv.org/stats/matches/mapstatsid/117239/liquid-vs-astralis?rankingFilter=Top10

https://www.hltv.org/stats/matches/mapstatsid/117231/astralis-vs-liquid?rankingFilter=Top10

Here's a hypothetical game that could never happen: 2016 Astralis vs current Astralis. Current Astralis is the most dominant CSGO roster of all time, so we would expect to see higher ratings accross the board for them, which we do. (They are team 2 here).

In [20]:
example2 = create_example(['device', 'Xyp9x', 'Kjaerbye', 'dupreeh', 'karrigan'], ['device', 'Xyp9x', 'Magisk', 'dupreeh', 'gla1ve'], players_list)
print_prediction(example2, players_list, model)

Predicted rating vector: [1.2620677  1.1140167  1.0066153  0.90441847 0.69624555 1.3945405
 1.2482151  1.138304   1.0769234  0.87902534]
Team 1:
karrigan, device, dupreeh, Xyp9x, Kjaerbye, 
Rating spread:
1.26
1.11
1.01
0.9
0.7
Team 2:
device, dupreeh, Xyp9x, gla1ve, Magisk, 
Rating spread:
1.39
1.25
1.14
1.08
0.88
