In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split


def read_dataset(template, start_idx, end_idx):
    frames = [ pd.read_json(f) for f in [template.format(i) for i in range(start_idx, end_idx+1)] ]
    return pd.concat(frames, ignore_index = True)

dataframe = read_dataset("datasets/dataset_initial/dataset_{:02}.json", 0, 17)

In [2]:
train, test = train_test_split(dataframe, test_size=0.1)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

62734 train examples
15684 validation examples
8714 test examples


In [6]:
# convert columns to discrete numerical values
CATEGORICAL_COLUMNS = ['round_status', 'map']
NUMERIC_COLUMNS = ['round_status_time_left']
INTEGER_COLUMNS = ['alive_players_t', 'alive_players_ct', "health_t", "health_ct", "money_ct", "money_t"]

def get_attr(ds, team, attr=None):
    team_players = map(lambda players: filter(lambda p: p["team"] == team, players), ds['alive_players'])
    if attr:
        team_players = map(lambda players: map(lambda p: p[attr], players), team_players)
    
    return list(map(lambda p: list(p), team_players))

for ds in [train, test, val]:
    ds['alive_players_t']  = list(map(len ,get_attr(ds, "Terrorist")))
    ds['alive_players_ct'] = list(map(len, get_attr(ds, "CT")))
    ds['health_ct']        = list(map(sum, get_attr(ds, "CT", "health")))
    ds['health_t']         = list(map(sum, get_attr(ds, "Terrorist", "health")))
    ds['money_ct']         = list(map(sum, get_attr(ds, "CT", "money")))
    ds['money_t']          = list(map(sum, get_attr(ds, "Terrorist", "money")))
    
feature_columns = []
feature_names = CATEGORICAL_COLUMNS + NUMERIC_COLUMNS + INTEGER_COLUMNS
for feature_name in CATEGORICAL_COLUMNS:
    vocabulary = train[feature_name].unique()
    feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in NUMERIC_COLUMNS:
    feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float64))

for feature_name in INTEGER_COLUMNS:
    feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.int32))

train.info()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats

<class 'pandas.core.frame.DataFrame'>
Int64Index: 62734 entries, 82521 to 70334
Data columns (total 18 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   map                     62734 non-null  object 
 1   patch_version           62734 non-null  int64  
 2   map_crc                 62734 non-null  int64  
 3   current_score           62734 non-null  object 
 4   round_status            62734 non-null  object 
 5   round_status_time_left  62734 non-null  float64
 6   alive_players           62734 non-null  object 
 7   active_smokes           62734 non-null  object 
 8   active_molotovs         62734 non-null  object 
 9   previous_kills          62734 non-null  object 
 10  round_winner            62734 non-null  object 
 11  planted_bomb            7075 non-null   object 
 12  alive_players_t         62734 non-null  int64  
 13  alive_players_ct        62734 non-null  int64  
 14  health_ct               62734 non-

In [7]:
for ds in [train, test, val]:
    ds.drop(['patch_version', 'map_crc', 'alive_players', 'active_smokes', 'active_molotovs', 'previous_kills', 'planted_bomb'], axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [8]:
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
    dataframe = dataframe.copy()
    labels = dataframe.pop('round_winner').map(lambda s: 0 if s == "CT" else 1)
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(dataframe))
    ds = ds.batch(batch_size)
    return ds
batch_size = 5 # A small batch sized is used for demonstration purposes
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

In [14]:
for feature_batch, label_batch in train_ds.take(2):
    print('Every feature:', list(feature_batch.keys()))
    print('A batch of maps:', feature_batch['alive_players_t'])
    print('A batch of targets:', label_batch )

Every feature: ['map', 'current_score', 'round_status', 'round_status_time_left', 'alive_players_t', 'alive_players_ct', 'health_ct', 'health_t', 'money_ct', 'money_t']
A batch of maps: tf.Tensor([1 4 1 5 5], shape=(5,), dtype=int32)
A batch of targets: tf.Tensor([0 1 1 1 1], shape=(5,), dtype=int32)
Every feature: ['map', 'current_score', 'round_status', 'round_status_time_left', 'alive_players_t', 'alive_players_ct', 'health_ct', 'health_t', 'money_ct', 'money_t']
A batch of maps: tf.Tensor([5 5 2 5 5], shape=(5,), dtype=int32)
A batch of targets: tf.Tensor([0 1 0 1 0], shape=(5,), dtype=int32)


In [18]:
def get_compiled_model():
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(10, activation='relu'),
        tf.keras.layers.Dense(10, activation='relu'),
        tf.keras.layers.Dense(1)
    ])

    model.compile(optimizer='adam',
                loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                metrics=['accuracy'])
    return model

model = get_compiled_model()
model.fit(train_ds, epochs=15)

ValueError: Passing a dictionary input to a Sequential Model which doesn't have FeatureLayer as the first layer is an error.