## Setup enviroment and load data

In [1]:
import pickle

X_test = pickle.load(open("X_test.npy", "rb"))
y_test = pickle.load(open("y_test.npy", "rb"))
X_train = pickle.load(open("X_train.npy", "rb"))
y_train = pickle.load(open("y_train.npy", "rb"))

In [2]:
y_train = y_train.argmax(axis=1)
y_test = y_test.argmax(axis=1)

In [7]:
import pandas as pd
import numpy as np
import stellargraph as sg
import cv2
from tensorflow.keras.callbacks import EarlyStopping

n_node_feature = 69+64+1
n_node_max = 112
np.random.seed(10)
n_class = 2
# n_edge_type = 13
n_edge_type = 2 + n_node_feature + 13

## Representation learning with 1 epoch

In [8]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten, GlobalAveragePooling1D,Input,concatenate,Concatenate
from stellargraph.layer.gcn import GraphConvolution
import tensorflow_addons as tfa

# Model CNN cũ
# Sửa lại padding = same cho tất cả các Conv layer
# Thêm 1 block Conv
def create_cnn_model():
    model = Sequential()
    model.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=(n_node_max,n_node_max, 1)))
    model.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (1, 1), padding='same', activation='relu'))
    model.add(Flatten())

    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.25))
    
    return model

def create_gcn_model():
    x_features = Input(shape=( n_node_max, n_node_feature))
    x_adjacency = Input(shape=( n_node_max, n_node_max))
    out = Dropout(0.5)(x_features)
    out = GraphConvolution(32, activation='relu', 
                        use_bias=True
                        )([out, x_adjacency])
    out = Dropout(0.5)(out)
    out = GraphConvolution(32, activation='relu', 
                        use_bias=True
                        )([out, x_adjacency])

    # out = GlobalAveragePooling1D()(out)
    out = Flatten()(out)
    out = Dense(64, activation='relu')(out)
    out = Dropout(.25)(out)

    model = Model(inputs=[x_features, x_adjacency], outputs=out)

    return model

In [9]:
def create_model() : 
  model1 = create_cnn_model()
  model2 = create_gcn_model()

  
  out = Concatenate()([model1.output,model2.output])
  # out = Flatten()(out)    
  out = Dense(64, activation='relu')(out)
  out = Dropout(.5)(out)
  out = Dense(32, activation='relu')(out)
  out = Dense(n_class, activation='softmax')(out)

  model = Model([model1.input,model2.input], out)
  model.compile(optimizer=tf.keras.optimizers.Adam(), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'])
  model.summary()
  return model

In [None]:
model = create_model()
model.fit(x = X_train, y = y_train, batch_size=256, epochs=1)

In [None]:
presentation_model = Model(inputs=model.inputs, outputs=model.layers[-5].output)
presentation_model.summary()

In [None]:
X_test = presentation_model.predict(X_test)
X_train = presentation_model.predict(X_train)
print(X_test.shape)
print(X_train.shape)

(5451, 128)


In [None]:
from collections import Counter

print(Counter(y_train))

Counter({0: 11558, 1: 1160})


In [None]:
import os
import json
import sys

import numpy
import torch
from sklearn.model_selection import train_test_split

from representation_learning_api import RepresentationLearningModel

In [None]:
lambda1 = 0.5
lambda2 = 0.001
num_layers = 1

In [None]:
X = numpy.array([X_test, X_train])
Y = numpy.array([y_test, y_train])

print('Dataset', X.shape, Y.shape, numpy.sum(Y), sep='\t', file=sys.stderr)
print('=' * 100, file=sys.stderr, flush=True)

In [None]:
for _ in range(30):
    train_X, test_X, train_Y, test_Y = train_test_split(X, Y, test_size=0.2)
    print(train_X.shape, train_Y.shape, test_X.shape, test_Y.shape, sep='\t', file=sys.stderr, flush=True)
    model = RepresentationLearningModel(
        lambda1=lambda1, lambda2=lambda2, batch_size=256, print=True, max_patience=5, balance=True,
        num_layers=num_layers
    )

    model.train(train_X, train_Y)
    results = model.evaluate(test_X, test_Y)

    print(results['accuracy'], results['precision'], results['recall'], results['f1'], sep='\t', flush=True)

    print(results['accuracy'], results['precision'], results['recall'], results['f1'], sep='\t',
            file=sys.stderr, flush=True, end=('\n' + '=' * 100 + '\n'))