In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
input_pattern = 'gs://dota-draft/data/matches/05032020*'
filenames = tf.io.gfile.glob(input_pattern)
filenames

In [None]:
def serialize_example(data):
    radiant = tf.train.Int64List(value=data['radiant_picks'])
    dire = tf.train.Int64List(value=data['dire_picks'])
    label = tf.train.Int64List(value=[data['radiant_win']])
    features = tf.train.Features(
        feature = {
            'radiant': tf.train.Feature(int64_list=radiant),
            'dire': tf.train.Feature(int64_list=dire),
            'label': tf.train.Feature(int64_list=label),
        }
    )
    example = tf.train.Example(features=features)
    return example.SerializeToString()

In [None]:
def csv_to_tfrecords(in_filename, out_filename, compression_type=None):
    dataset = tf.data.TextLineDataset(in_filename)
    options = tf.io.TFRecordOptions(compression_type=compression_type)
    with tf.io.TFRecordWriter(out_filename, options) as writer:
        for instance in dataset:
            data = json.loads(instance.numpy())
            writer.write(serialize_example(data))

In [None]:
for filename in filenames:
    tf_record_filename = filename.replace('matches', 'records') + '.tfrecords'
    csv_to_tfrecords(filename, tf_record_filename)

In [None]:
expected_features = {
    'radiant': tf.io.FixedLenFeature([5], dtype=tf.int64),
    'dire': tf.io.FixedLenFeature([5], dtype=tf.int64),
    'label': tf.io.FixedLenFeature([1], dtype=tf.int64),
}

def parse_tfrecord(serialized_example):
    example = tf.io.parse_single_example(serialized_example,
                                         expected_features)
    return tf.concat([example['radiant'], example['dire']], axis=0), example['label']

In [None]:
def tfrecords_reader_dataset(filenames, batch_size=32,
                             shuffle_buffer_size=10000, n_readers=5):
    dataset = tf.data.Dataset.list_files(filenames)
    dataset = dataset.repeat()
    dataset = dataset.interleave(
        lambda filename: tf.data.TFRecordDataset(filename),
        cycle_length=n_readers)
    dataset.shuffle(shuffle_buffer_size)
    dataset = dataset.map(parse_tfrecord,
                          num_parallel_calls=tf.data.experimental.AUTOTUNE).batch(batch_size)
    return dataset.prefetch(1)

In [None]:
input_pattern = 'gs://dota-draft/data/records/05032020*.tfrecords'
filenames = tf.io.gfile.glob(input_pattern)[:1]
filenames

In [None]:
dataset = tfrecords_reader_dataset(filenames, shuffle_buffer_size=10)

In [None]:
model = keras.models.Sequential([
    keras.layers.Dense(30, activation='relu', input_shape=[10]),
    keras.layers.Dense(1),
])

In [None]:
model.summary()

In [None]:
keras.utils.plot_model(model, "my_model.png", show_shapes=True)

In [None]:
model.compile(loss='binary_crossentropy', optimizer=keras.optimizers.SGD(lr=1e-3))

In [None]:
model.fit(dataset, epochs=10)