In [1]:
import numpy as np
import pandas as pd
import sklearn
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from scipy import stats
%matplotlib inline
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow_core.estimator import inputs
from tensorflow import feature_column
from tensorflow.keras import layers
from tensorflow.python.client import device_lib
tf.debugging.set_log_device_placement(True)


In [2]:
# A utility method to create a tf.data dataset from a Pandas Dataframe
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
  dataframe = dataframe.copy()
  labels = dataframe.pop('Label')
  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
  if shuffle:
    ds = ds.shuffle(buffer_size=len(dataframe))
  ds = ds.batch(batch_size)
  return ds

In [3]:
train_vector_input = pd.read_csv("vector_input.csv")

In [36]:
styles = ['Back', 'Breast','Butterfly','Drill',
          'Free','Im', 'Kick','PullPaddle']

with_role = train_vector_input[train_vector_input.Role ==1]
for style in styles:    
    with_style = with_role[with_role[style+"Reps"] > 0]
    m0 = with_style[with_style.Label == 0][style+"Reps"].mean()
    m1 = with_style[with_style.Label == 1][style+"Reps"].mean()
    print(m0, m1, m0-m1, style, sep="\t")

2.5399285525639064	2.565650878791291	-0.025722326227384684	Back
2.5513059615790974	2.5653436116122683	-0.014037650033170923	Breast
2.5353059289419377	2.5700508755082443	-0.034744946566306556	Butterfly
2.5367386759581882	2.569610663499831	-0.03287198754164278	Drill
2.5362393711341644	2.5713787300414563	-0.03513935890729192	Free
2.5518178479618068	2.569058757174613	-0.017240909212806077	Im
2.5407934496539624	2.5683349226368084	-0.027541472982846038	Kick
2.5381146511043475	2.566596346059315	-0.02848169495496755	PullPaddle


In [5]:
role = feature_column.numeric_column("Role")


competition_week = feature_column.numeric_column("CompetitionWeek")


back_reps = feature_column.numeric_column("BackReps")


back_distance = feature_column.numeric_column("BackDistance")


back_split = feature_column.numeric_column("BackAverageSplit")


breast_reps = feature_column.numeric_column("BreastReps")


breast_distance = feature_column.numeric_column("BreastDistance")


breast_split = feature_column.numeric_column("BreastAverageSplit")


butterfly_reps = feature_column.numeric_column("ButterflyReps")


butterfly_distance = feature_column.numeric_column("ButterflyDistance")


butterfly_split = feature_column.numeric_column("ButterflyAverageSplit")


drill_reps = feature_column.numeric_column("DrillReps")


drill_distance = feature_column.numeric_column("DrillDistance")


drill_split = feature_column.numeric_column("DrillAverageSplit")


free_reps = feature_column.numeric_column("FreeReps")


free_distance = feature_column.numeric_column("FreeDistance")


free_split = feature_column.numeric_column("FreeAverageSplit")


im_reps = feature_column.numeric_column("ImReps")


im_distance = feature_column.numeric_column("ImDistance")


im_split = feature_column.numeric_column("ImAverageSplit")


kick_reps = feature_column.numeric_column("KickReps")


kick_distance = feature_column.numeric_column("KickDistance")


kick_split = feature_column.numeric_column("KickAverageSplit")


pull_reps = feature_column.numeric_column("PullPaddleReps")


pull_distance = feature_column.numeric_column("PullPaddleDistance")


pull_split = feature_column.numeric_column("PullPaddleAverageSplit")


total_distance = feature_column.numeric_column("TotalDistance")


In [6]:
train, test = train_test_split(train_vector_input, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

2603248 train examples
650812 validation examples
813516 test examples


In [7]:
batch_size=32
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)

Executing op TensorSliceDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op AnonymousRandomSeedGenerator in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ShuffleDatasetV2 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op BatchDatasetV2 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op TensorSliceDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op BatchDatasetV2 in device /job:localhost/replica:0/task:0/device:CPU:0


In [8]:
feature_columns = [role, competition_week, back_reps, back_distance,
                back_split, breast_reps, breast_distance, 
                breast_split, butterfly_reps, butterfly_distance,
                butterfly_split, drill_reps, drill_distance,
                drill_split, free_reps, free_distance,
                free_split, im_reps, im_distance, 
                im_split, kick_reps, kick_distance,
                kick_split, pull_reps, pull_distance,
                pull_split, total_distance
               ]

In [9]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

In [10]:
X_train, X_test = train_test_split(train_vector_input, test_size=0.1, random_state=101)

In [11]:
X_test.dtypes

Role                        int64
CompetitionWeek             int64
BackReps                    int64
BackDistance                int64
BackAverageSplit          float64
BreastReps                  int64
BreastDistance              int64
BreastAverageSplit        float64
ButterflyReps               int64
ButterflyDistance           int64
ButterflyAverageSplit     float64
DrillReps                   int64
DrillDistance               int64
DrillAverageSplit         float64
FreeReps                    int64
FreeDistance                int64
FreeAverageSplit          float64
ImReps                      int64
ImDistance                  int64
ImAverageSplit            float64
KickReps                    int64
KickDistance                int64
KickAverageSplit          float64
PullPaddleReps              int64
PullPaddleDistance          int64
PullPaddleAverageSplit    float64
TotalDistance               int64
Label                       int64
dtype: object

In [29]:
FEATURES = ['Role', 'CompetitionWeek', 'BackReps', 'BackDistance',
       'BackAverageSplit', 'BreastReps', 'BreastDistance',
       'BreastAverageSplit', 'ButterflyReps', 'ButterflyDistance',
       'ButterflyAverageSplit', 'DrillReps', 'DrillDistance',
       'DrillAverageSplit', 'FreeReps', 'FreeDistance', 'FreeAverageSplit',
       'ImReps', 'ImDistance', 'ImAverageSplit', 'KickReps', 'KickDistance',
       'KickAverageSplit', 'PullPaddleReps', 'PullPaddleDistance',
       'PullPaddleAverageSplit', 'TotalDistance']
LABEL= 'Label'

def get_input_fn(data_set, num_epochs=None, n_batch = 128, shuffle=True):
    return tf.compat.v1.estimator.inputs.pandas_input_fn(
       x=pd.DataFrame({k: data_set[k].values for k in FEATURES}),
       y = pd.Series(data_set[LABEL].values),
       batch_size=n_batch,   
       num_epochs=num_epochs,
       shuffle=shuffle)

In [13]:
'''TensorFlow Estimator Linear Classifier Creation, training, and evaluation'''

model = tf.estimator.LinearClassifier(feature_columns=feature_columns,model_dir="ongoing/train4", n_classes=2)

model.train(get_input_fn(X_train, num_epochs=None, n_batch = 128, shuffle=False), steps=1000)
    
model.evaluate(get_input_fn(X_test, num_epochs=1, n_batch = 128, shuffle=False), steps=1000)

'TensorFlow Estimator Linear Classifier Creation, training, and evaluation'

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'ongoing/train4', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002B165FD6188>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Instructions for updating:
If using Keras pass *_const

<tensorflow_estimator.python.estimator.canned.linear.LinearClassifierV2 at 0x2b165fd0dc8>

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-11-16T17:18:39Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ongoing/train4\model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [100/1000]
INFO:tensorflow:Evaluation [200/1000]
INFO:tensorflow:Evaluation [300/1000]
INFO:tensorflow:Evaluation [400/1000]
INFO:tensorflow:Evaluation [500/1000]
INFO:tensorflow:Evaluation [600/1000]
INFO:tensorflow:Evaluation [700/1000]
INFO:tensorflow:Evaluation [800/1000]
INFO:tensorflow:Evaluation [900/1000]
INFO:tensorflow:Evaluation [100

{'accuracy': 0.8973594,
 'accuracy_baseline': 0.88653123,
 'auc': 0.759607,
 'auc_precision_recall': 0.9444892,
 'average_loss': 0.5498106,
 'label/mean': 0.88653123,
 'loss': 0.5498106,
 'precision': 0.917887,
 'prediction/mean': 0.9326404,
 'recall': 0.9710952,
 'global_step': 1000}

In [None]:
%time

'''Keras Sequential Model Creation and Training'''
with tf.device('/GPU:0'):
    model = tf.keras.Sequential([
      feature_layer,
      layers.Dense(128, activation='relu'),
      layers.Dense(128, activation='relu'),
      layers.Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    model.fit(train_ds,
              validation_data=val_ds,
              epochs=5)
    
'''Check Keras Model Accuracy'''
loss, accuracy = model.evaluate(test_ds)
print("Accuracy", accuracy)


In [None]:
'''Function to save Keras Model'''
model.save('keras\my_model')

'''Function to load Keras Model'''
# model = tf.keras.models.load_model('keras\my_model')
# model.summary()