In [1]:
import pydot
from collections import defaultdict
from keras.layers import Input, Dense, Dropout, Embedding, GlobalAveragePooling1D, GRU, Bidirectional
from keras.layers import GlobalMaxPooling1D, LSTM, Dropout, SimpleRNN, TimeDistributed
from keras.models import Model, Sequential
from keras.optimizers import Adam
from keras.engine.topology import Layer
from keras.layers import concatenate
from keras import activations, initializers, constraints
from keras import regularizers
from keras.regularizers import l1,l2, l1_l2
from keras.callbacks import EarlyStopping
import keras.backend as K
import numpy as np
import os
import pickle
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import log_loss
import tensorflow as tf

os.environ['CUDA_VISIBLE_DEVICES'] = '1'
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
tf.keras.backend.set_session(sess)

from layers import *
from utils import *

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
## The proposed model, HENIN
def HENIN(GCNXss_shape, GCNXpp_shape, reg=l2(5e-4), n_layers=2, lr=0.01, n_head=8, size_per_head=8, MAX_REV_LEN=75, MAX_REV_WORD_LEN=10, support=3):
    
    '''
    Comment Encoding
    '''
    
    ''' Capture reviews context correlation'''
    ## word-level encoding
    word_input = Input(shape=(None, 300), dtype='float32')
    word_sa = Self_Attention(n_head, size_per_head)(word_input)
    word_avg = GlobalAveragePooling1D()(word_sa)
    wordEncoder = Model(word_input, word_avg)
    
    ## review-level encoding
    content_input = Input(shape=(MAX_REV_LEN, MAX_REV_WORD_LEN, 300), dtype='float32')
    content_word_encode = TimeDistributed(wordEncoder, name='word_seq_encoder')(content_input)
    content_sa = Self_Attention(n_head, size_per_head)(content_word_encode)
    contentSA_avg_pool = GlobalAveragePooling1D()(content_sa) # session embedding
    
    ''' Capture Post-Comment co-attention'''
    post_words_input = Input(shape=(None, 300), dtype='float32')
    post_lstm = Bidirectional(GRU(32, return_sequences=True))(post_words_input)
    coAtt_vec = CoAttLayer(MAX_REV_LEN)([content_word_encode, post_lstm])
    
    '''
    GCN
    Session-Session Interaction Extractor
    Adjacency: session-session
    '''
    G_ss = [Input(shape=(None, None), batch_shape=(None, None), sparse=True) for _ in range(3)]
    
    X_ss = Input(shape=(GCNXss_shape,))
    X_ss_emb = Dense(16, activation='relu')(X_ss)
    
    # Define GCN model architecture
    H_ss = Dropout(0.2)(X_ss_emb)
    for i in range(n_layers-1):
        H_ss = GraphConvolution(16, support, activation='relu', kernel_regularizer=reg)([H_ss]+G_ss)
        
    H_ss = GraphConvolution(8, support, activation='softmax', kernel_regularizer=reg)([H_ss]+G_ss)
    
    '''
    GCN
    Post-Post Interaction Extractor
    Adjacency: post-post
    '''
    G_pp = [Input(shape=(None, None), batch_shape=(None, None), sparse=True) for _ in range(3)]
    
    X_pp = Input(shape=(GCNXpp_shape,))
    X_pp_emb = Dense(16, activation='relu')(X_pp)
    
    # Define GCN model architecture
    H_pp = Dropout(0.2)(X_pp_emb)
    for i in range(n_layers-1):
        H_pp = GraphConvolution(16, support, activation='relu', kernel_regularizer=reg)([H_pp]+G_pp)
    H_pp = GraphConvolution(8, support, activation='softmax', kernel_regularizer=reg)([H_pp]+G_pp)
     
    '''
    Concatenate Comment Encoding & GCN Embedding
    '''
    H = concatenate([contentSA_avg_pool, coAtt_vec, H_ss, H_pp])
    Y = Dense(1, activation='sigmoid')(H)
    
    # Compile model
    model = Model(inputs=[content_input]+[post_words_input]+[X_ss]+G_ss+[X_pp]+G_pp, outputs=Y)
    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=lr))
    model.summary()
    
    return model



In [3]:
'''
Load data
'''
# load preprocessed data
with open('preprocessData/Dat4Model.pickle', 'rb') as f:
    Dat4Model = pickle.load(f)
    
# load multi-hot user vectors of each session
with open('preprocessData/multi_hot_users.pickle', 'rb') as f:
    multi_hot_users = pickle.load(f)  
    
w2v_vec_all = Dat4Model['w2v_vec_all'] # features for HENIN
y_all = Dat4Model['y_all'] # target for HENIN
textFeat_all = Dat4Model['textFeat_all']

MAX_REV_WORD_LEN = w2v_vec_all.shape[2]
MAX_REV_LEN = w2v_vec_all.shape[1]

# word embedding of posted text
postEmb = pad_sequences(w2v_vec_all[:,0,:,:], maxlen=MAX_REV_LEN, dtype='float32', padding='post') 



In [3]:
## cross validating for HENIN model
def HENIN_cv(graph, y, A, model, epochs):
    
    skf = StratifiedKFold(n_splits=5, random_state=9999, shuffle=True)
    iters = 0
    
    for train_index, test_index in skf.split(range(len(y)), y):
        y_train, y_test, train_mask = Mask_y(y=y, train_ix=train_index, test_ix=test_index)
        #y_train, y_test = Mask_y(y=y, train_ix=train_index, test_ix=test_index)
        clf = model
        for epoch in range(epochs):
            
            clf.fit(graph, y_train, sample_weight=train_mask, batch_size=A.shape[0], epochs=1)
            #if epoch%5==0:
                #print(metrics(y[test_index], (clf.predict(graph, batch_size=A.shape[0])[:,0] >= 0.5).astype(int)[test_index]))
        preds = (clf.predict(graph, batch_size=A.shape[0])[:,0] >= 0.5).astype(int)
        
        completePerform = metrics(y, preds) # Complete set performance
        generalPerform = metrics(y[test_index], preds[test_index]) # test set performance
        
          
        try:
            if iters == 1:
                CP = {k: v + [completePerform[k]] for k, v in CP.items()}
                GP = {k: v + [generalPerform[k]] for k, v in GP.items()}
            else:  
                CP = {k: [v] + [completePerform[k]] for k, v in CP.items()}
                GP = {k: [v] + [generalPerform[k]] for k, v in GP.items()}
                iters += 1
        except:
            CP = completePerform
            GP = generalPerform
    
    AvgCP = {k: '{:.3f}'.format(np.mean(v)) for k, v in CP.items()}
    AvgGP = {k: '{:.3f}'.format(np.mean(v)) for k, v in GP.items()}
    
    return AvgCP, AvgGP


In [11]:
# Hyper-params tuning
 
import time

ppA = genAdjacencyMatrix(textFeat_all[:,0,:], 'cosine')
ssA = genAdjacencyMatrix(multi_hot_users, 'cosine')

graph_ss = genGCNgraph(ssA, multi_hot_users)
graph_pp = genGCNgraph(ppA, textFeat_all[:,0,:])

graph = [w2v_vec_all]+[postEmb]+graph_ss+graph_pp



In [13]:
ls = [l1(1e-4), l1(3e-4), l1(1e-3), l2(1e-4), l2(3e-4), l2(1e-3)]
layers=[3,4,5]
lrs = [3e-4, 1e-3, 3e-3, 0.01]

results = {}
start=time.time()
for i in layers:
    for l in ls:
        for lr in lrs:
            clf = HENIN(GCNXss_shape=multi_hot_users.shape[1], 
                GCNXpp_shape=textFeat_all[:,0,:].shape[1], 
                reg=l, n_layers=i,lr=lr,
                n_head=8, size_per_head=8, MAX_REV_LEN=MAX_REV_LEN, 
                MAX_REV_WORD_LEN=MAX_REV_WORD_LEN, support=3)

            AvgCP, AvgGP = HENIN_cv(graph=graph, y=y_all, A=ppA, model=clf, epochs=40)
            results[(i, l, lr)] = AvgGP
            print(f"i={i},l={l.get_config()}, lr={lr}, {AvgGP}")
print(f"Total run time={time.time()-start}")

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_51 (InputLayer)           (None, 72176)        0                                            
__________________________________________________________________________________________________
input_55 (InputLayer)           (None, 300)          0                                            
__________________________________________________________________________________________________
dense_13 (Dense)                (None, 16)           1154832     input_51[0][0]                   
__________________________________________________________________________________________________
dense_14 (Dense)                (None, 16)           4816        input_55[0][0]                   
__________________________________________________________________________________________________
dropout_9 

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
i=4,l={'l1': 0.0, 'l2': 9.999999747378752e-05}, lr=0.001, {'acc': '0.805', 'prec': '0.846', 'rec': '0.476', 'f1': '0.549'}
__________________________________________________________________________________________________
Layer (type)                

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
i=4,l={'l1': 0.0, 'l2': 9.999999747378752e-05}, lr=0.003, {'acc': '0.831', 'prec': '0.821', 'rec': '0.567', 'f1': '0.666'}
__________________________________________________________________________________________________
Layer (type)                

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
i=4,l={'l1': 0.0, 'l2': 9.999999747378752e-05}, lr=0.01, {'acc': '0.849', 'prec': '0.789', 'rec': '0.692', 'f1': '0.737'}
__________________________________________________________________________________________________
Layer (type)                 

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
i=5,l={'l1': 0.0003000000142492354, 'l2': 0.0}, lr=0.001, {'acc': '0.806', 'prec': '0.824', 'rec': '0.484', 'f1': '0.566'}
__________________________________________________________________________________________________
Layer (type)                

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
i=5,l={'l1': 0.0003000000142492354, 'l2': 0.0}, lr=0.003, {'acc': '0.839', 'prec': '0.804', 'rec': '0.617', 'f1': '0.691'}
__________________________________________________________________________________________________
Layer (type)                

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
i=5,l={'l1': 0.0003000000142492354, 'l2': 0.0}, lr=0.01, {'acc': '0.849', 'prec': '0.802', 'rec': '0.669', 'f1': '0.725'}
__________________________________________________________________________________________________
Layer (type)                 

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
i=4,l={'l1': 0.0, 'l2': 0.0003000000142492354}, lr=0.001, {'acc': '0.802', 'prec': '0.828', 'rec': '0.464', 'f1': '0.550'}
__________________________________________________________________________________________________
Layer (type)                

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
i=4,l={'l1': 0.0, 'l2': 0.0003000000142492354}, lr=0.003, {'acc': '0.838', 'prec': '0.789', 'rec': '0.633', 'f1': '0.697'}
__________________________________________________________________________________________________
Layer (type)                

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
i=4,l={'l1': 0.0, 'l2': 0.0003000000142492354}, lr=0.01, {'acc': '0.836', 'prec': '0.812', 'rec': '0.605', 'f1': '0.685'}
Total run time=12636.652625322342


In [14]:
df = pd.DataFrame(results.values(),dtype='float')

best_params=list(df['f1'].sort_values(ascending=False).index[0:3])

best_params=list(np.array([(i, l, lr) for i in layers for l in ls for lr in lrs])[best_params])



In [19]:
for item in best_params:
    print(f"layers={item[0]}, reg={item[1].get_config()}, lr={item[2]}")

layers=4, reg={'l1': 0.0, 'l2': 9.999999747378752e-05}, lr=0.01
layers=4, reg={'l1': 0.0003000000142492354, 'l2': 0.0}, lr=0.01
layers=4, reg={'l1': 0.0, 'l2': 0.0003000000142492354}, lr=0.003


In [20]:
best_results={}

#start=time.time()
for param in best_params[::-1]:
    clf = HENIN(GCNXss_shape=multi_hot_users.shape[1], 
	        GCNXpp_shape=textFeat_all[:,0,:].shape[1], 
            reg=param[1], n_layers=param[0], lr=param[2],
	        n_head=8, size_per_head=8, MAX_REV_LEN=MAX_REV_LEN, 
	        MAX_REV_WORD_LEN=MAX_REV_WORD_LEN, support=3)

    AvgCP, AvgGP = HENIN_cv(graph=graph, y=y_all, A=ppA, model=clf, epochs=10)
    best_results[tuple(param)] = AvgGP
    print(f"i={param[0]},l={param[1].get_config()}, lr={param[2]}, {AvgGP}")
#print(f"Total run time={time.time()-start}")

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_172 (InputLayer)          (None, 72176)        0                                            
__________________________________________________________________________________________________
input_176 (InputLayer)          (None, 300)          0                                            
__________________________________________________________________________________________________
dense_46 (Dense)                (None, 16)           1154832     input_172[0][0]                  
__________________________________________________________________________________________________
dense_47 (Dense)                (None, 16)           4816        input_176[0][0]                  
__________________________________________________________________________________________________
dropout_31

Epoch 1/1
Epoch 1/1


KeyboardInterrupt: 

In [22]:
clf = HENIN(GCNXss_shape=multi_hot_users.shape[1], 
	        GCNXpp_shape=textFeat_all[:,0,:].shape[1], 
            reg=l2(1e-4), n_layers=4, lr=0.01,
	        n_head=8, size_per_head=8, MAX_REV_LEN=MAX_REV_LEN, 
	        MAX_REV_WORD_LEN=MAX_REV_WORD_LEN, support=3)

AvgCP, AvgGP = HENIN_cv(graph=graph, y=y_all, A=ppA, model=clf, epochs=60)
#best_results[tuple(param)] = AvgGP
print(f" result={AvgGP}")

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_183 (InputLayer)          (None, 72176)        0                                            
__________________________________________________________________________________________________
input_187 (InputLayer)          (None, 300)          0                                            
__________________________________________________________________________________________________
dense_49 (Dense)                (None, 16)           1154832     input_183[0][0]                  
__________________________________________________________________________________________________
dense_50 (Dense)                (None, 16)           4816        input_187[0][0]                  
__________________________________________________________________________________________________
dropout_33

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


KeyboardInterrupt: 

In [49]:
best_results
for k,v in best_results.items():
    print(f"layers={k[0]}, reg={k[1].get_config()}, result={v}")

layers=4, reg={'l1': 0.0, 'l2': 9.999999747378752e-05}, result={'acc': '0.893', 'prec': '0.847', 'rec': '0.789', 'f1': '0.816'}
layers=5, reg={'l1': 0.0003000000142492354, 'l2': 0.0}, result={'acc': '0.879', 'prec': '0.817', 'rec': '0.778', 'f1': '0.797'}
layers=4, reg={'l1': 0.0, 'l2': 0.0003000000142492354}, result={'acc': '0.886', 'prec': '0.832', 'rec': '0.787', 'f1': '0.808'}


In [50]:
with open('best_params.pkl','wb') as f:
    pickle.dump(best_results, f)

In [39]:
# serialize model to JSON
best_params = clf.to_json()
with open("best_model.json", "w") as json_file:
    json_file.write(best_params)
# serialize weights to HDF5
clf.save_weights("best_model.h5")
print("Saved model to disk")

Saved model to disk


# Early Detection

In [4]:
'''
1 3rd of comments
'''
# load preprocessed data
with open('preprocessData/Dat4ModelINSTA25.pickle', 'rb') as f:
    Dat4Model_3rd = pickle.load(f)
    
# load multi-hot user vectors of each session
with open('preprocessData/multi_hot_usersINSTA25.pickle', 'rb') as f:
    multi_hot_users_3rd = pickle.load(f)  
    
w2v_vec_all_3rd = Dat4Model_3rd['w2v_vec_all'] # features for HENIN
y_all_3rd = Dat4Model_3rd['y_all'] # target for HENIN
textFeat_all_3rd = Dat4Model_3rd['textFeat_all']

MAX_REV_WORD_LEN_3rd = w2v_vec_all_3rd.shape[2]
MAX_REV_LEN_3rd = w2v_vec_all_3rd.shape[1]

# word embedding of posted text
postEmb_3rd = pad_sequences(w2v_vec_all_3rd[:,0,:,:], maxlen=MAX_REV_LEN_3rd, dtype='float32', padding='post') 

ppA_3rd = genAdjacencyMatrix(textFeat_all_3rd[:,0,:], 'cosine')
ssA_3rd = genAdjacencyMatrix(multi_hot_users_3rd, 'cosine')

graph_ss_3rd = genGCNgraph(ssA_3rd, multi_hot_users_3rd)
graph_pp_3rd = genGCNgraph(ppA_3rd, textFeat_all_3rd[:,0,:])

graph_3rd = [w2v_vec_all_3rd]+[postEmb_3rd]+graph_ss_3rd+graph_pp_3rd


In [5]:
textFeat_all_3rd.shape

(2211, 25, 300)

In [7]:
#1 6th of comments

# load preprocessed data
with open('preprocessData/Dat4ModelINSTA12.pickle', 'rb') as f:
    Dat4Model_6th = pickle.load(f)
    
# load multi-hot user vectors of each session
with open('preprocessData/multi_hot_usersINSTA12.pickle', 'rb') as f:
    multi_hot_users_6th = pickle.load(f)  
    
w2v_vec_all_6th = Dat4Model_6th['w2v_vec_all'] # features for HENIN
y_all_6th = Dat4Model_6th['y_all'] # target for HENIN
textFeat_all_6th = Dat4Model_6th['textFeat_all']

MAX_REV_WORD_LEN_6th = w2v_vec_all_6th.shape[2]
MAX_REV_LEN_6th = w2v_vec_all_6th.shape[1]

# word embedding of posted text
postEmb_6th = pad_sequences(w2v_vec_all_6th[:,0,:,:], maxlen=MAX_REV_LEN_6th, dtype='float32', padding='post') 

ppA_6th = genAdjacencyMatrix(textFeat_all_6th[:,0,:], 'cosine')
ssA_6th = genAdjacencyMatrix(multi_hot_users_6th, 'cosine')

graph_ss_6th = genGCNgraph(ssA_6th, multi_hot_users_6th)
graph_pp_6th = genGCNgraph(ppA_6th, textFeat_all_6th[:,0,:])

graph_6th = [w2v_vec_all_6th]+[postEmb_6th]+graph_ss_6th+graph_pp_6th


In [6]:
clf_3rd = HENIN(GCNXss_shape=multi_hot_users_3rd.shape[1], 
	        GCNXpp_shape=textFeat_all_3rd[:,0,:].shape[1], 
            reg=l2(1e-4), n_layers=4, lr=0.01,
	        n_head=8, size_per_head=8, MAX_REV_LEN=MAX_REV_LEN_3rd, 
	        MAX_REV_WORD_LEN=MAX_REV_WORD_LEN_3rd, support=3)

AvgCP_3rd, AvgGP_3rd = HENIN_cv(graph=graph_3rd, y=y_all_3rd, A=ppA_3rd, model=clf_3rd, epochs=40)
#best_results[tuple(param)] = AvgGP
print(f" result={AvgGP_3rd}")

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            (None, 72176)        0                                            
__________________________________________________________________________________________________
input_11 (InputLayer)           (None, 300)          0                                            
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 16)           1154832     input_7[0][0]                    
__________________________________________________________________________________________________
dense_2 (Dense)      

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


Epoch 1/1
Epoch 1/1
 result={'acc': '0.876', 'prec': '0.826', 'rec': '0.755', 'f1': '0.788'}


In [8]:
clf_6th = HENIN(GCNXss_shape=multi_hot_users_6th.shape[1], 
	        GCNXpp_shape=textFeat_all_6th[:,0,:].shape[1], 
            reg=l2(1e-4), n_layers=4, lr=0.01,
	        n_head=8, size_per_head=8, MAX_REV_LEN=MAX_REV_LEN_6th, 
	        MAX_REV_WORD_LEN=MAX_REV_WORD_LEN_6th, support=3)

AvgCP_6th, AvgGP_6th = HENIN_cv(graph=graph_6th, y=y_all_6th, A=ppA_6th, model=clf_6th, epochs=40)
#best_results[tuple(param)] = AvgGP
print(f" result={AvgGP_6th}")

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_18 (InputLayer)           (None, 72176)        0                                            
__________________________________________________________________________________________________
input_22 (InputLayer)           (None, 300)          0                                            
__________________________________________________________________________________________________
dense_4 (Dense)                 (None, 16)           1154832     input_18[0][0]                   
__________________________________________________________________________________________________
dense_5 (Dense)                 (None, 16)           4816        input_22[0][0]                   
__________________________________________________________________________________________________
dropout_3 

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


Epoch 1/1
Epoch 1/1
 result={'acc': '0.872', 'prec': '0.809', 'rec': '0.753', 'f1': '0.778'}
