In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

pd.options.display.max_columns = 30
pd.options.display.width = 120
pd.options.display.float_format = '{:.3f}'.format

df = pd.read_csv("nndata_03.2019.csv", sep=",", header=None)

df = df.drop(df[df[0] == 0].index)
df = df.drop(df[df[15] == 0].index)
df = df.drop(df[df[15] < (df[0]-2*df[5])].index)
df = df.dropna()

df = df.reindex(np.random.permutation(df.index))

# print(df.corr(method='pearson'))
# print(df.describe().transpose())
# print(df)
# sns.pairplot(df, diag_kind="kde")

def parse_labels_and_features(df):
    labels = np.where(df[15] >= df[0]+df[5], 3, np.where(df[15] >= df[0]-df[5]*0.5, 2, np.where(df[15] >= df[0]-df[5]*1.5, 1, 0)))
    labels = tf.keras.utils.to_categorical(labels)
    
    df = df.loc[:, 0:14]
    df[20] = np.where(df[1] >= df[0]+df[5], 3, np.where(df[1] >= df[0]-df[5]*0.5, 2, np.where(df[1] >= df[0]-df[5]*1.5, 1, 0)))
    df[21] = np.where(df[4] >= df[0]+df[5], 3, np.where(df[4] >= df[0]-df[5]*0.5, 2, np.where(df[4] >= df[0]-df[5]*1.5, 1, 0)))    
    
    df.drop(df.columns[[0, 1, 4, 5, 13, 14]], axis=1, inplace=True)
    
    features = pd.DataFrame()

    for column in df.columns:
        if column in (2,3,9,10):
            data = df[column] / df[column].max()
        else:
            data = df[column]            

        features.insert(loc=len(features.columns), value=data, column=column)
            
    return features.values, labels

def train_model(df):

    q100 = df[0].count()
    q90 = int(q100 * 0.9)
    q80 = int(q100 * 0.8)

    training_examples, training_labels = parse_labels_and_features(df[:q80])
    validation_examples, validation_labels = parse_labels_and_features(df[q80:q90])
    unseen_examples, unseen_labels = parse_labels_and_features(df[q90:])
    
    #print(training_examples)
    #print(training_labels)
    
    model = keras.Sequential([
        keras.layers.Dense(11, activation=tf.nn.relu, input_shape=[11]),
        keras.layers.Dense(144, activation=tf.nn.relu, kernel_regularizer=keras.regularizers.l1(0.01)),
        keras.layers.Dense(144, activation=tf.nn.relu, kernel_regularizer=keras.regularizers.l1(0.01)),
        keras.layers.Dense(144, activation=tf.nn.relu, kernel_regularizer=keras.regularizers.l1(0.01)),
        keras.layers.Dense(4, activation=tf.nn.softmax)
    ])
    
    model.compile(
        optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.01), 
        loss='categorical_crossentropy', 
        metrics=['acc'])
        
    history = model.fit(
        training_examples, 
        training_labels, 
        epochs=10, 
        batch_size=100, 
        validation_data=(validation_examples, validation_labels))
    
    plt.figure(figsize=(20, 5))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.title('model accuracy')    
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='upper left')

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc='upper left')

    plt.show()
    
    print('Evaluation on unseen data:')
    scores = model.evaluate(unseen_examples, unseen_labels)
    print("%s: %.2f" % (model.metrics_names[0], scores[0]))    
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))

    print(model.summary())

train_model(df)

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Train on 118992 samples, validate on 14874 samples
Epoch 1/10

Epoch 2/10

Epoch 3/10

Epoch 4/10

Epoch 5/10

Epoch 6/10

Epoch 7/10

Epoch 8/10

Epoch 9/10

Epoch 10/10



<Figure size 2000x500 with 2 Axes>

Evaluation on unseen data:

loss: 1.22
acc: 42.46%
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 11)                132       
_________________________________________________________________
dense_2 (Dense)              (None, 144)               1728      
_________________________________________________________________
dense_3 (Dense)              (None, 144)               20880     
_________________________________________________________________
dense_4 (Dense)              (None, 144)               20880     
_________________________________________________________________
dense_5 (Dense)              (None, 4)                 580       
Total params: 44,200
Trainable params: 44,200
Non-trainable params: 0
_________________________________________________________________
None
