In [1]:
import json
import numpy as np
import datetime
np.random.seed(81)

import h5py
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Flatten, Conv2D, MaxPooling2D # new!

import matplotlib.pyplot as plt

%matplotlib inline

n_classes = 2

Using TensorFlow backend.


In [2]:
model = Sequential()
# conv 1
model.add(Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same', input_shape=(240, 320, 1)))
#model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name="doop"))
model.add(Dropout(0.10))

# conv 2
model.add(Conv2D(128, kernel_size=(8, 8), activation='relu', padding='same'))
model.add(MaxPooling2D(pool_size=(5, 5), strides=(5, 5), padding='same'))
model.add(Dropout(0.10))

# Fully Connected
model.add(Flatten())

model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(n_classes, activation='softmax'))
json_string = model.to_json()

model = None

---

Pretend I just pulled this string off a queue

In [3]:
from keras.models import model_from_json
model = model_from_json(json_string)

In [4]:
from prepare_football_data_for_keras import DataPreperator

In [5]:
dp = DataPreperator()
train_x, train_y, test_x, test_y = dp.load_data()
dp.print_counts()

Train Inputs 8000
Train labels [ 4000.  4000.]
Test Inputs 2000
Test labels [ 1000.  1000.]
Validation Inputs 0
Validation labels 0


In [6]:
dp.create_validation_split()
dp.print_counts()

Train Inputs 8000
Train labels [ 4000.  4000.]
Test Inputs 1000
Test labels [ 496.  504.]
Validation Inputs 1000
Validation labels [ 504.  496.]


In [7]:
(train_x, train_y, test_x, test_y, val_x, val_y) = dp.get_data_sets()
train_x = train_x[:10]
train_y = train_y[:10]
val_x = val_x[:10]
val_y = val_y[:10]
test_x = test_x[:10]
test_y = test_y[:10]

In [8]:
# TODO: use some sort of grid search to optimize this
sgd = keras.optimizers.SGD(lr=1e-3, nesterov=True)

model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])


In [9]:
training_time_start = datetime.datetime.now().isoformat()
training_time_start

'2017-11-24T14:20:35.966547'

In [10]:
training_hx = model.fit(
    train_x,
    train_y,
          batch_size=128, #220
          epochs=3, #FIXME: turn this back into 10
          verbose=1,
          validation_data=(val_x, val_y))

Train on 10 samples, validate on 10 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


In [11]:
training_time_end = datetime.datetime.now().isoformat()
training_time_end

'2017-11-24T14:22:21.612292'

In [12]:
training_eval = model.evaluate(test_x, test_y, batch_size=128, verbose=1)



In [13]:
evaluation_metrics = dict(zip(model.metrics_names, training_eval))
evaluation_metrics

{'acc': 0.60000002384185791, 'loss': 0.67571365833282471}

In [14]:
# ** is a merge function
evaluation_metrics = {**evaluation_metrics,
                      **training_hx.params,
                      **{'history' : training_hx.history},
                      **{'training_time_start' : training_time_start, 'training_time_end' : training_time_end}
                     }

In [15]:
evaluation_metrics

{'acc': 0.60000002384185791,
 'batch_size': 128,
 'do_validation': True,
 'epochs': 3,
 'history': {'acc': [0.40000000596046448, 0.60000002384185791, 1.0],
  'loss': [0.70398461818695068, 0.69150745868682861, 0.67569488286972046],
  'val_acc': [0.40000000596046448, 0.40000000596046448, 0.40000000596046448],
  'val_loss': [0.69616776704788208, 0.69872903823852539, 0.70177972316741943]},
 'loss': 0.67571365833282471,
 'metrics': ['loss', 'acc', 'val_loss', 'val_acc'],
 'samples': 10,
 'steps': None,
 'training_time_end': '2017-11-24T14:22:21.612292',
 'training_time_start': '2017-11-24T14:20:35.966547',
 'verbose': 1}

In [16]:
training_hx.history

{'acc': [0.40000000596046448, 0.60000002384185791, 1.0],
 'loss': [0.70398461818695068, 0.69150745868682861, 0.67569488286972046],
 'val_acc': [0.40000000596046448, 0.40000000596046448, 0.40000000596046448],
 'val_loss': [0.69616776704788208, 0.69872903823852539, 0.70177972316741943]}

In [17]:
evaluation_metrics = {**evaluation_metrics,
**{
    "loss" : model.loss,
    "optimizer_config" : model.optimizer.get_config(),
    "optimizer_class" : str(model.optimizer.__class__.__name__),
}}
evaluation_metrics

{'acc': 0.60000002384185791,
 'batch_size': 128,
 'do_validation': True,
 'epochs': 3,
 'history': {'acc': [0.40000000596046448, 0.60000002384185791, 1.0],
  'loss': [0.70398461818695068, 0.69150745868682861, 0.67569488286972046],
  'val_acc': [0.40000000596046448, 0.40000000596046448, 0.40000000596046448],
  'val_loss': [0.69616776704788208, 0.69872903823852539, 0.70177972316741943]},
 'loss': 'categorical_crossentropy',
 'metrics': ['loss', 'acc', 'val_loss', 'val_acc'],
 'optimizer_class': 'SGD',
 'optimizer_config': {'decay': 0.0,
  'lr': 0.0010000000474974513,
  'momentum': 0.0,
  'nesterov': True},
 'samples': 10,
 'steps': None,
 'training_time_end': '2017-11-24T14:22:21.612292',
 'training_time_start': '2017-11-24T14:20:35.966547',
 'verbose': 1}

In [18]:
evaluation_metrics = {**evaluation_metrics, **json.loads(model.to_json())}
# TODO version the data somehow and include the version of the training/test datasets

In [19]:
to_publish = json.dumps(evaluation_metrics).encode('utf-8')
print(json.dumps(evaluation_metrics, indent=2))

{
  "loss": "categorical_crossentropy",
  "acc": 0.6000000238418579,
  "batch_size": 128,
  "epochs": 3,
  "steps": null,
  "samples": 10,
  "verbose": 1,
  "do_validation": true,
  "metrics": [
    "loss",
    "acc",
    "val_loss",
    "val_acc"
  ],
  "history": {
    "val_loss": [
      0.6961677670478821,
      0.6987290382385254,
      0.7017797231674194
    ],
    "val_acc": [
      0.4000000059604645,
      0.4000000059604645,
      0.4000000059604645
    ],
    "loss": [
      0.7039846181869507,
      0.6915074586868286,
      0.6756948828697205
    ],
    "acc": [
      0.4000000059604645,
      0.6000000238418579,
      1.0
    ]
  },
  "training_time_start": "2017-11-24T14:20:35.966547",
  "training_time_end": "2017-11-24T14:22:21.612292",
  "optimizer_config": {
    "lr": 0.0010000000474974513,
    "momentum": 0.0,
    "decay": 0.0,
    "nesterov": true
  },
  "optimizer_class": "SGD",
  "class_name": "Sequential",
  "config": [
    {
      "class_name": "Conv2D",
      "

In [20]:
import sys
import os
# NOTE:
# Setting this up with confluent's kafka library and cloud karafka was obnoxious. 
# This doesn't quite work:
# sudo apt-get install libsasl2-dev librdkafka-dev
# conda install -c conda-forge librdkafka
# conda install -c conda-forge python-confluent-kafka


from kafka import KafkaProducer

results_topic = "test"

conf = {
    'bootstrap.servers': ["spark4.thedevranch.net"],
    'session.timeout.ms': 6000,
    'default.topic.config': {'auto.offset.reset': 'smallest'},
    #'security.protocol': 'sasl_ssl',#'SASL_SSL',
    #'sasl.mechanisms': 'SCRAM-SHA-256',
    #'sasl.username': "p62g66c0", #os.environ['CLOUDKARAFKA_USERNAME'],
    #'sasl.password': "mAbqsGu3L8_C8L_H31lktxnNfb1yAce5", #os.environ['CLOUDKARAFKA_PASSWORD']
}

producer = KafkaProducer(bootstrap_servers=conf['bootstrap.servers'],
#                          security_protocol="SASL_SSL",
#                          sasl_mechanism="PLAIN",
#                          sasl_plain_username=conf['sasl.username'],
#                          sasl_plain_password=conf['sasl.password'],
                        )

def delivery_callback(err, msg):
    if err:
        sys.stderr.write('%% Message failed delivery: %s\n' % err)
    else:
        sys.stderr.write('%% Message delivered to %s [%d]\n' %
                         (msg.topic(), msg.partition()))

producer.send("test", to_publish)

<kafka.producer.future.FutureRecordMetadata at 0x7f5555b207b8>