In [28]:
import time
import numpy as np
import pandas as pd

from sklearn.utils import shuffle
from tensorflow.keras.backend import backend
from tensorflow.keras.models import *
from tensorflow.keras.layers import * 

import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)
#print(gpus)

print ("============================================================")
print (" SimpleRNN for demographic sequences and features           ")
print ("============================================================")


start_time = time.time()
time_01 = time.time()
acc_list = []

df = pd.read_excel("table.xlsx")
df.dropna(inplace = True)   # remove empty (NaN) lines

n = 10
for ii in range(1, n+1):
    
    time_02 = time.time()
    
    df = shuffle(df) 

    size = df.shape[0]
    t = int(size * 0.8)         # train part size

    x = df.values[:size, 0:7]
    y = np.empty(size, dtype=str)

    for i in range(x.shape[0]):
        y[i] = x[i, 0][-1]
        x[i, 0] = x[i, 0][:-1]

    X = np.empty((x.shape[0], x.shape[1]), dtype='int32')

    for d in range(x.shape[1]):        
        xs = list(set(x[:, d]))

        xd = {xs[i]: i for i in range(len(xs))}
        for l in range(x.shape[0]):
            X[l][d] = xd[x[l][d]]


    XF_train = X[0:t, 1:]           # train features
    XF_test  = X[t:size, 1:]        # test features

    yn = np.empty((size), dtype='int32')

    ys = list(set(y[:]))
    yd = {ys[i]: i for i in range(len(ys))}
    for l in range(len(yn)):
        yn[l] = yd[y[l]]

    y_train = yn[0:t]
    y_test  = yn[t:size]

    # all unique characters to the set
    events = set()
    for seq in x[:, 0]:
        for event in seq:
            events.add(event)

    events = list(events)
    event_to_id = {t:i+1 for i,t in enumerate(events)}

    max_seq_len = 7
    seq_events_numbered = np.zeros((x.shape[0], max_seq_len), dtype='int32')

    for i in range(seq_events_numbered.shape[0]):
        for k in range(len(x[i][0])):
            seq_events_numbered[i][k] = event_to_id[x[i][0][k]]

    S_train = seq_events_numbered[0:t, :]       # train sequences
    S_test  = seq_events_numbered[t:size, :]    # test sequences

    
    features_input = Input(shape=(6,))
    features_1     = Dense(100, activation="relu", input_dim=6)(features_input)
    features_2     = Dropout(0.1)(features_1)
    features_out   = Dense(100, activation="relu")(features_2)

    sequences_input = Input(shape=(7,))
    sequences_1     = Embedding(input_dim=9, output_dim=200)(sequences_input)
    sequences_out   = SimpleRNN(200)(sequences_1)   # SimpleRNN may be replaced by GRU or LSTM

    sequences_features_0 = concatenate([features_out, sequences_out])
    sequences_features_1 = Dense(300, activation='relu')(sequences_features_0)
    sequences_features_2 = Dropout(0.05)(sequences_features_1)
    sequences_features_out = Dense(8, activation='softmax')(sequences_features_2)

    sequences_features = Model(inputs=[features_input, sequences_input], outputs=sequences_features_out)

    sequences_features.compile(loss='sparse_categorical_crossentropy', optimizer='adam',
                      metrics=['sparse_categorical_accuracy'])

    
    sequences_features.fit([XF_train, S_train], y_train, epochs=70, batch_size=100, verbose=0)

    score, acc = sequences_features.evaluate([XF_test, S_test], y_test, verbose=0)
    acc_list.append(acc)
    
    print("Training & evaluation", ii, ": accuracy = %0.3f" % acc, "time = %0.2f" % (time.time() - time_02))

time_03 = time.time()

print ("\nModel training & evaluation time (mean): %0.2f" % ((time_03 - start_time)/n))   
print ("Accuracy (mean): %.3f" % np.mean((acc_list)), ", Variance: %.6f" % (np.var(acc_list)), 
       ", Standard deviation: %.6f" % (np.std(acc_list)))
print ("==========================================================================")


 SimpleRNN for demographic sequences and features            
Training & evaluation 1 : accuracy = 0.928 time = 13.32
Training & evaluation 2 : accuracy = 0.930 time = 13.39
Training & evaluation 3 : accuracy = 0.929 time = 13.33
Training & evaluation 4 : accuracy = 0.921 time = 13.41
Training & evaluation 5 : accuracy = 0.932 time = 13.36
Training & evaluation 6 : accuracy = 0.934 time = 13.93
Training & evaluation 7 : accuracy = 0.937 time = 13.45
Training & evaluation 8 : accuracy = 0.916 time = 13.55
Training & evaluation 9 : accuracy = 0.928 time = 13.52
Training & evaluation 10 : accuracy = 0.916 time = 13.38

Model training & evaluation time (mean): 13.50
Accuracy (mean): 0.927 , Variance: 0.000049 , Standard deviation: 0.007000
