In [43]:
import time
import numpy as np
import pandas as pd

from sklearn.utils import shuffle

from tensorflow.keras.backend import backend
from tensorflow.keras.models import *
from tensorflow.keras.layers import * 

import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)
#print(gpus)

print ("============================================================")
print (" GRU RNN for gender prediction                              ")
print ("============================================================")


start_time = time.time()
acc_list = []

df = pd.read_excel("table.xlsx")
df.dropna(inplace = True)   # remove empty (NaN) lines

n = 10
for ii in range(1, n+1):
    
    time_02 = time.time()
    
    df = shuffle(df)

    size = df.shape[0]
    t = int(size * 0.8)         # train part size

    x = df.to_numpy()[:size, 0:6]
    y = df.to_numpy()[:size, 6] 

    X = np.empty((x.shape[0], x.shape[1]), dtype='int32')

    for d in range(x.shape[1]):        
        xs = list(set(x[:, d]))

        xd = {xs[i]: i for i in range(len(xs))}
        for l in range(x.shape[0]):
            X[l][d] = xd[x[l][d]]


    XF_train = np.array(X[0:t, 1:])           # train features
    XF_test  = np.array(X[t:size, 1:])        # test features

    yn = np.empty((size), dtype='int32')

    ys = list(set(y[:]))
    yd = {ys[i]: i for i in range(len(ys))}
    for l in range(len(yn)):
            yn[l] = yd[y[l]]


    y_train = np.array(yn[0:t])
    y_test  = np.array(yn[t:size])

    # all unique characters to the set
    events = set()
    for seq in x[:, 0]:
        for event in seq:
            events.add(event)

    events = list(events)

    event_to_id = {t:i+1 for i,t in enumerate(events)}

    max_seq_len = 8
    seq_events_numbered = np.zeros((x.shape[0], max_seq_len), dtype='int32')

    for i in range(seq_events_numbered.shape[0]):
        for k in range(len(x[i][0])):
            seq_events_numbered[i][k] = event_to_id[x[i][0][k]]

    S_train = np.array(seq_events_numbered[0:t, :])       # train sequences
    S_test  = np.array(seq_events_numbered[t:size, :])    # test sequences


    features_input = Input(shape=(5,))
    features_1     = Dense(128, activation="relu", input_dim=6)(features_input)
    features_2     = Dropout(0.1)(features_1)
    features_out   = Dense(128, activation="relu")(features_2)

    sequences_input = Input(shape=(8,))
    sequences_1     = Embedding(input_dim=9, output_dim=100)(sequences_input)
    sequences_out   = GRU(128)(sequences_1)  # GRU may be replaced by LSTM or SimpleRNN

    sequences_features_0 = concatenate([features_out, sequences_out])
    sequences_features_1 = Dense(128, activation='relu')(sequences_features_0)
    sequences_features_2 = Dropout(0.05)(sequences_features_1)
    sequences_features_out = Dense(1, activation='sigmoid')(sequences_features_2)

    sequences_features = Model(inputs=[features_input, sequences_input], outputs=sequences_features_out)

    sequences_features.compile(loss='binary_crossentropy', optimizer='adam',
                      metrics=['accuracy'])

    sequences_features.fit([XF_train, S_train], y_train, epochs=100, batch_size=100, verbose=0) 

    score, acc = sequences_features.evaluate([XF_test, S_test], y_test, verbose=0)

    acc_list.append(acc)
    
    print("Training & evaluation", ii, ": accuracy = %0.3f" % acc, "time = %0.2f" % (time.time() - time_02))

time_03 = time.time()

print ("\nModel training & evaluation time (mean): %0.2f" % ((time_03 - start_time)/n))   
print ("Accuracy (mean): %.3f" % np.mean((acc_list)), ", Variance: %.6f" % (np.var(acc_list)), 
       ", Standard deviation: %.6f" % (np.std(acc_list)))
print ("==========================================================================")


 GRU RNN for gender prediction                             
Training & evaluation 1 : accuracy = 0.761 time = 14.14
Training & evaluation 2 : accuracy = 0.756 time = 14.26
Training & evaluation 3 : accuracy = 0.775 time = 13.90
Training & evaluation 4 : accuracy = 0.750 time = 13.90
Training & evaluation 5 : accuracy = 0.762 time = 14.33
Training & evaluation 6 : accuracy = 0.764 time = 14.09
Training & evaluation 7 : accuracy = 0.747 time = 13.89
Training & evaluation 8 : accuracy = 0.768 time = 13.79
Training & evaluation 9 : accuracy = 0.746 time = 13.73
Training & evaluation 10 : accuracy = 0.770 time = 14.40

Model training & evaluation time (mean): 14.11
Accuracy (mean): 0.760 , Variance: 0.000090 , Standard deviation: 0.009475
