In [11]:
import pydot
from collections import defaultdict
from keras.layers import Input, Dense, Dropout, Embedding, GlobalAveragePooling1D, GRU, Bidirectional
from keras.layers import GlobalMaxPooling1D, LSTM, Dropout, SimpleRNN, TimeDistributed
from keras.models import Model, Sequential
from keras.optimizers import Adam
from keras.engine.topology import Layer
from keras.layers import concatenate
from keras import activations, initializers, constraints
from keras import regularizers
from keras.regularizers import l1,l2, l1_l2
from keras.callbacks import EarlyStopping
import keras.backend as K
import numpy as np
import os
import pickle
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import log_loss
import tensorflow as tf

os.environ['CUDA_VISIBLE_DEVICES'] = '1'
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
tf.keras.backend.set_session(sess)

from layers import *
from utils import *

In [2]:
## The proposed model, HENIN
def HENIN(GCNXss_shape, GCNXpp_shape, reg=l2(5e-4), n_layers=2, lr=0.01, n_head=8, size_per_head=8, MAX_REV_LEN=80, MAX_REV_WORD_LEN=6, support=3):
    
    '''
    Comment Encoding
    '''
    
    ''' Capture reviews context correlation'''
    ## word-level encoding
    word_input = Input(shape=(None, 300), dtype='float32')
    word_sa = Self_Attention(n_head, size_per_head)(word_input)
    word_avg = GlobalAveragePooling1D()(word_sa)
    wordEncoder = Model(word_input, word_avg)
    
    ## review-level encoding
    content_input = Input(shape=(MAX_REV_LEN, MAX_REV_WORD_LEN, 300), dtype='float32')
    content_word_encode = TimeDistributed(wordEncoder, name='word_seq_encoder')(content_input)
    content_sa = Self_Attention(n_head, size_per_head)(content_word_encode)
    contentSA_avg_pool = GlobalAveragePooling1D()(content_sa) # session embedding
    
    ''' Capture Post-Comment co-attention'''
    post_words_input = Input(shape=(None, 300), dtype='float32')
    post_lstm = Bidirectional(GRU(32, return_sequences=True))(post_words_input)
    coAtt_vec = CoAttLayer(MAX_REV_LEN)([content_word_encode, post_lstm])
    
    '''
    GCN
    Session-Session Interaction Extractor
    Adjacency: session-session
    '''
    G_ss = [Input(shape=(None, None), batch_shape=(None, None), sparse=True) for _ in range(3)]
    
    X_ss = Input(shape=(GCNXss_shape,))
    X_ss_emb = Dense(16, activation='relu')(X_ss)
    
    # Define GCN model architecture
    H_ss = Dropout(0.2)(X_ss_emb)
    for i in range(n_layers-1):
        H_ss = GraphConvolution(16, support, activation='relu', kernel_regularizer=reg)([H_ss]+G_ss)
        
    H_ss = GraphConvolution(8, support, activation='softmax', kernel_regularizer=reg)([H_ss]+G_ss)
    
    '''
    GCN
    Post-Post Interaction Extractor
    Adjacency: post-post
    '''
    G_pp = [Input(shape=(None, None), batch_shape=(None, None), sparse=True) for _ in range(3)]
    
    X_pp = Input(shape=(GCNXpp_shape,))
    X_pp_emb = Dense(16, activation='relu')(X_pp)
    
    # Define GCN model architecture
    H_pp = Dropout(0.2)(X_pp_emb)
    for i in range(n_layers-1):
        H_pp = GraphConvolution(16, support, activation='relu', kernel_regularizer=reg)([H_pp]+G_pp)
    H_pp = GraphConvolution(8, support, activation='softmax', kernel_regularizer=reg)([H_pp]+G_pp)
     
    '''
    Concatenate Comment Encoding & GCN Embedding
    '''
    H = concatenate([contentSA_avg_pool, coAtt_vec, H_ss, H_pp])
    Y = Dense(1, activation='sigmoid')(H)
    
    # Compile model
    model = Model(inputs=[content_input]+[post_words_input]+[X_ss]+G_ss+[X_pp]+G_pp, outputs=Y)
    model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.01))
    model.summary()
    
    return model



In [4]:
'''
Load data
'''
# load preprocessed data
with open('preprocessData/Dat4ModelVINE.pickle', 'rb') as f:
    Dat4Model = pickle.load(f)
    
# load multi-hot user vectors of each session
with open('preprocessData/multi_hot_usersVINE.pickle', 'rb') as f:
    multi_hot_users = pickle.load(f)  
    
w2v_vec_all = Dat4Model['w2v_vec_all'] # features for HENIN
y_all = Dat4Model['y_all'] # target for HENIN
textFeat_all = Dat4Model['textFeat_all']

MAX_REV_WORD_LEN = w2v_vec_all.shape[2]
MAX_REV_LEN = w2v_vec_all.shape[1]

# word embedding of posted text
postEmb = pad_sequences(w2v_vec_all[:,0,:,:], maxlen=MAX_REV_LEN, dtype='float32', padding='post') 



In [9]:
## cross validating for HENIN model
def HENIN_cv(graph, y, A, model, epochs):
    
    skf = StratifiedKFold(n_splits=5, random_state=9999, shuffle=True)
    iters = 0
    
    for train_index, test_index in skf.split(range(len(y)), y):
        y_train, y_test, train_mask = Mask_y(y=y, train_ix=train_index, test_ix=test_index)
        #y_train, y_test = Mask_y(y=y, train_ix=train_index, test_ix=test_index)
        clf = model
        for epoch in range(epochs):
            
            clf.fit(graph, y_train, sample_weight=train_mask, batch_size=A.shape[0], epochs=1)
            #if epoch%5==0:
                #print(metrics(y[test_index], (clf.predict(graph, batch_size=A.shape[0])[:,0] >= 0.5).astype(int)[test_index]))
        preds = (clf.predict(graph, batch_size=A.shape[0])[:,0] >= 0.5).astype(int)
        
        completePerform = metrics(y, preds) # Complete set performance
        generalPerform = metrics(y[test_index], preds[test_index]) # test set performance
        
          
        try:
            if iters == 1:
                CP = {k: v + [completePerform[k]] for k, v in CP.items()}
                GP = {k: v + [generalPerform[k]] for k, v in GP.items()}
            else:  
                CP = {k: [v] + [completePerform[k]] for k, v in CP.items()}
                GP = {k: [v] + [generalPerform[k]] for k, v in GP.items()}
                iters += 1
        except:
            CP = completePerform
            GP = generalPerform
    
    AvgCP = {k: '{:.3f}'.format(np.mean(v)) for k, v in CP.items()}
    AvgGP = {k: '{:.3f}'.format(np.mean(v)) for k, v in GP.items()}
    
    return AvgCP, AvgGP


In [5]:

 
import time

ppA = genAdjacencyMatrix(textFeat_all[:,0,:], 'cosine')
ssA = genAdjacencyMatrix(multi_hot_users, 'cosine')

graph_ss = genGCNgraph(ssA, multi_hot_users)
graph_pp = genGCNgraph(ppA, textFeat_all[:,0,:])

graph = [w2v_vec_all]+[postEmb]+graph_ss+graph_pp




# Hyper-params tuning

In [6]:
vine_best_results={}

#start=time.time()
for param in [(4, l2(1e-4),lr=0.01), (4, l1(3e-4), lr=0.01), (4, l2(3e-4)), lr=0.003]:
    clf = HENIN(GCNXss_shape=multi_hot_users.shape[1], 
	        GCNXpp_shape=textFeat_all[:,0,:].shape[1], 
            reg=param[1], n_layers=param[0],lr=param[2],
	        n_head=8, size_per_head=8, MAX_REV_LEN=MAX_REV_LEN, 
	        MAX_REV_WORD_LEN=MAX_REV_WORD_LEN, support=3)

    AvgCP, AvgGP = HENIN_cv(graph=graph, y=y_all, A=ppA, model=clf, epochs=40)
    vine_best_results[tuple(param)] = AvgGP
    print(f"i={param[0]},l={param[1].get_config()}, {AvgGP}")
#print(f"Total run time={time.time()-start}")

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            (None, 29051)        0                                            
__________________________________________________________________________________________________
input_11 (InputLayer)           (None, 300)          0                                            
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 16)           464832      input_7[0][0]                    
__________________________________________________________________________________________________
dense_2 (Dense)      

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
i=4,l={'l1': 0.0, 'l2': 9.9999

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
i=5,l={'l1': 0.000300000014249

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
i=4,l={'l1': 0.0, 'l2': 0.0003

In [7]:
for k,v in vine_best_results.items():
    print(f"layers={k[0]}, reg={k[1].get_config()}, result={v}")

layers=4, reg={'l1': 0.0, 'l2': 9.999999747378752e-05}, result={'acc': '0.794', 'prec': '0.754', 'rec': '0.523', 'f1': '0.613'}
layers=5, reg={'l1': 0.0003000000142492354, 'l2': 0.0}, result={'acc': '0.805', 'prec': '0.739', 'rec': '0.586', 'f1': '0.651'}
layers=4, reg={'l1': 0.0, 'l2': 0.0003000000142492354}, result={'acc': '0.801', 'prec': '0.733', 'rec': '0.579', 'f1': '0.644'}


# Early Detection

In [12]:
'''
1 3rd of comments
'''
# load preprocessed data
with open('preprocessData/Dat4Model_vine25.pickle', 'rb') as f:
    Dat4Model_3rd = pickle.load(f)
    
# load multi-hot user vectors of each session
with open('preprocessData/multi_hot_users_vine25.pickle', 'rb') as f:
    multi_hot_users_3rd = pickle.load(f)  
    
w2v_vec_all_3rd = Dat4Model_3rd['w2v_vec_all'] # features for HENIN
y_all_3rd = Dat4Model_3rd['y_all'] # target for HENIN
textFeat_all_3rd = Dat4Model_3rd['textFeat_all']

MAX_REV_WORD_LEN_3rd = w2v_vec_all_3rd.shape[2]
MAX_REV_LEN_3rd = w2v_vec_all_3rd.shape[1]

# word embedding of posted text
postEmb_3rd = pad_sequences(w2v_vec_all_3rd[:,0,:,:], maxlen=MAX_REV_LEN_3rd, dtype='float32', padding='post') 

ppA_3rd = genAdjacencyMatrix(textFeat_all_3rd[:,0,:], 'cosine')
ssA_3rd = genAdjacencyMatrix(multi_hot_users_3rd, 'cosine')

graph_ss_3rd = genGCNgraph(ssA_3rd, multi_hot_users_3rd)
graph_pp_3rd = genGCNgraph(ppA_3rd, textFeat_all_3rd[:,0,:])

graph_3rd = [w2v_vec_all_3rd]+[postEmb_3rd]+graph_ss_3rd+graph_pp_3rd


In [13]:
#1 6th of comments

# load preprocessed data
with open('preprocessData/Dat4Model_vine12.pickle', 'rb') as f:
    Dat4Model_6th = pickle.load(f)
    
# load multi-hot user vectors of each session
with open('preprocessData/multi_hot_users_vine12.pickle', 'rb') as f:
    multi_hot_users_6th = pickle.load(f)  
    
w2v_vec_all_6th = Dat4Model_6th['w2v_vec_all'] # features for HENIN
y_all_6th = Dat4Model_6th['y_all'] # target for HENIN
textFeat_all_6th = Dat4Model_6th['textFeat_all']

MAX_REV_WORD_LEN_6th = w2v_vec_all_6th.shape[2]
MAX_REV_LEN_6th = w2v_vec_all_6th.shape[1]

# word embedding of posted text
postEmb_6th = pad_sequences(w2v_vec_all_6th[:,0,:,:], maxlen=MAX_REV_LEN_6th, dtype='float32', padding='post') 

ppA_6th = genAdjacencyMatrix(textFeat_all_6th[:,0,:], 'cosine')
ssA_6th = genAdjacencyMatrix(multi_hot_users_6th, 'cosine')

graph_ss_6th = genGCNgraph(ssA_6th, multi_hot_users_6th)
graph_pp_6th = genGCNgraph(ppA_6th, textFeat_all_6th[:,0,:])

graph_6th = [w2v_vec_all_6th]+[postEmb_6th]+graph_ss_6th+graph_pp_6th


In [14]:
clf_3rd = HENIN(GCNXss_shape=multi_hot_users_3rd.shape[1], 
	        GCNXpp_shape=textFeat_all_3rd[:,0,:].shape[1], 
            reg=l2(1e-4), n_layers=4, lr=0.01,
	        n_head=8, size_per_head=8, MAX_REV_LEN=MAX_REV_LEN_3rd, 
	        MAX_REV_WORD_LEN=MAX_REV_WORD_LEN_3rd, support=3)

AvgCP_3rd, AvgGP_3rd = HENIN_cv(graph=graph_3rd, y=y_all_3rd, A=ppA_3rd, model=clf_3rd, epochs=40)
#best_results[tuple(param)] = AvgGP
print(f" result={AvgGP_3rd}")

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_29 (InputLayer)           (None, 29051)        0                                            
__________________________________________________________________________________________________
input_33 (InputLayer)           (None, 300)          0                                            
__________________________________________________________________________________________________
dense_7 (Dense)                 (None, 16)           464832      input_29[0][0]                   
__________________________________________________________________________________________________
dense_8 (Dense)                 (None, 16)           4816        input_33[0][0]                   
__________________________________________________________________________________________________
dropout_5 

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
 result={'acc': '0.790', 'prec

In [15]:
clf_6th = HENIN(GCNXss_shape=multi_hot_users_6th.shape[1], 
	        GCNXpp_shape=textFeat_all_6th[:,0,:].shape[1], 
            reg=l2(1e-4), n_layers=4, lr=0.01,
	        n_head=8, size_per_head=8, MAX_REV_LEN=MAX_REV_LEN_6th, 
	        MAX_REV_WORD_LEN=MAX_REV_WORD_LEN_6th, support=3)

AvgCP_6th, AvgGP_6th = HENIN_cv(graph=graph_6th, y=y_all_6th, A=ppA_6th, model=clf_6th, epochs=40)
#best_results[tuple(param)] = AvgGP
print(f" result={AvgGP_6th}")

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_40 (InputLayer)           (None, 29051)        0                                            
__________________________________________________________________________________________________
input_44 (InputLayer)           (None, 300)          0                                            
__________________________________________________________________________________________________
dense_10 (Dense)                (None, 16)           464832      input_40[0][0]                   
__________________________________________________________________________________________________
dense_11 (Dense)                (None, 16)           4816        input_44[0][0]                   
__________________________________________________________________________________________________
dropout_7 

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
 result={'acc': '0.773', 'prec

In [14]:
multi_hot_users_6th

<249x7549 sparse matrix of type '<class 'numpy.int64'>'
	with 7793 stored elements in Compressed Sparse Row format>

# Unlabeled data

In [7]:
'''
Load data
'''
# load preprocessed data
with open('preprocessData/Dat4ModelVINE_COMBINED.pickle', 'rb') as f:
    Dat4Model_cb = pickle.load(f)
    
# load multi-hot user vectors of each session
with open('preprocessData/multi_hot_usersVINE_COMBINED.pickle', 'rb') as f:
    multi_hot_users_cb = pickle.load(f)  
    
w2v_vec_all_cb = Dat4Model_cb['w2v_vec_all'] # features for HENIN
y_all_cb = Dat4Model_cb['y_all'] # target for HENIN
textFeat_all_cb = Dat4Model_cb['textFeat_all']

MAX_REV_WORD_LEN_cb = w2v_vec_all_cb.shape[2]
MAX_REV_LEN_cb = w2v_vec_all_cb.shape[1]

# word embedding of posted text
postEmb_cb = pad_sequences(w2v_vec_all_cb[:,0,:,:], maxlen=MAX_REV_LEN_cb, dtype='float32', padding='post') 

ppA_cb = genAdjacencyMatrix(textFeat_all_cb[:,0,:], 'cosine')
ssA_cb = genAdjacencyMatrix(multi_hot_users_cb, 'cosine')

graph_ss_cb = genGCNgraph(ssA_cb, multi_hot_users_cb)
graph_pp_cb = genGCNgraph(ppA_cb, textFeat_all_cb[:,0,:])

graph_cb = [w2v_vec_all_cb]+[postEmb_cb]+graph_ss_cb+graph_pp_cb



In [10]:
clf_cb = HENIN(GCNXss_shape=multi_hot_users_cb.shape[1], 
	        GCNXpp_shape=textFeat_all_cb[:,0,:].shape[1], 
            reg=l2(1e-4), n_layers=4, lr=0.01,
	        n_head=8, size_per_head=8, MAX_REV_LEN=MAX_REV_LEN_cb, 
	        MAX_REV_WORD_LEN=MAX_REV_WORD_LEN_cb, support=3)

AvgCP_cb, AvgGP_cb = HENIN_cv(graph=graph_cb, y=y_all_cb, A=ppA_cb, model=clf_cb, epochs=40)
#best_results[tuple(param)] = AvgGP
print(f" result={AvgGP_cb}")

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_18 (InputLayer)           (None, 29051)        0                                            
__________________________________________________________________________________________________
input_22 (InputLayer)           (None, 300)          0                                            
__________________________________________________________________________________________________
dense_4 (Dense)                 (None, 16)           464832      input_18[0][0]                   
__________________________________________________________________________________________________
dense_5 (Dense)                 (None, 16)           4816        input_22[0][0]                   
__________________________________________________________________________________________________
dropout_3 

Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
 result={'acc': '0.794', 'prec