In [None]:
import tensorflow as tf 
import keras as ks 
import numpy as np
import pandas as pd
import os
from   tensorflow.keras import backend as K

data_folder      = "/nvme/drive_1/NTDS_Final/"
filtered_users   = pd.read_csv(data_folder+"filtered_users.csv",delimiter=',').values

parallel_calls   = 12
random_seed      = 1
batch_len        = 1000
num_epochs       = 2000
sampled_nodes    = 2

num_eigenvectors    = 100
labels              = np.load(data_folder+"new_filtering/labels.npy")


** I. Setup Data Pipelines **

In [None]:
features       = np.load(data_folder+"new_filtering/features.npy")
features       = (features - np.mean(features,axis=0)) / np.std(features,axis=0)

dataset_train = []
truth_train   = []
for i,feat in enumerate(features[:50000]): 

    dataset_train.append(feat)
    truth_train.append(labels[i,1])
dataset_train = np.array(dataset_train)
truth_train   = np.array(truth_train)

dataset_test  = features[:50000]
truth_test    = labels[50000:,1]    

** II. Create Neural Network Model **

In [None]:
def f1(y_true, y_pred):
    y_pred = K.round(y_pred)
    
    print(y_pred)
    
    tp     = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    fp     = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn     = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)

def f1_loss(y_true, y_pred):
    
    y_pred_clip = tf.clip_by_value(y_pred,0,1)
    
    tp = K.sum(K.cast(y_true*y_pred_clip, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred_clip), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred_clip, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred_clip), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    
    return 1 - K.mean(f1)


def build_model():
    
    input_1 = tf.keras.layers.Input(shape=[num_eigenvectors,],name='input_1') 
    
    dense_1 = tf.keras.layers.BatchNormalization()(input_1)
    dense_1 = tf.keras.layers.Dense(len(labels), activation='relu',
                                    use_bias=True, name='dense_1')\
                                    (input_1) 
    dense_1 = tf.keras.layers.BatchNormalization()(dense_1)
    dense_2 = tf.keras.layers.Dense(1, activation='sigmoid', 
                                    use_bias=True, name='dense_2')(dense_1) 
    
    return tf.keras.Model(inputs=[input_1],outputs=[dense_2]) 



model  = build_model()
model.compile(optimizer=tf.train.AdamOptimizer(learning_rate=0.002),
              #validation_data = data_iter_test,
              loss=f1_loss,
              metrics=['binary_crossentropy',f1])
model.summary()

**III. Train and Test Network ** 

In [None]:
model.fit(x = dataset_train, 
          y = truth_train,
          epochs=20,
          batch_size=1000)

In [None]:
error_acc = 0
idx_acc   = 0
for i,feat in enumerate(dataset_test):
    #if truth_test[i] == 1:
        y_pred    = np.rint(model.predict(np.array([feat])))
        if y_pred >= 0.5:
            y_pred =0
        else:
            y_pred =1
        error_acc += np.abs(y_pred-truth_test[i])
        idx_acc   += 1 
        print("\r"+str(error_acc/idx_acc)+" test error", sep=' ', end='', flush=True) 




In [None]:
print(len(files_test))
print(len(files_train))