In [None]:
%tensorflow_version 1.x

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import seaborn as sns
import pickle
import io
import tensorflow as tf
import os
import gc

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

from keras.models import Model
from keras.layers import LSTM, Activation, Dense, Dropout, Input, Embedding,Bidirectional
from keras.optimizers import RMSprop,Adam
from keras.preprocessing.text import Tokenizer
from keras.preprocessing import sequence
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras.metrics import top_k_categorical_accuracy
from keras.layers import Input, Dense, Embedding, Conv1D, Conv2D, MaxPooling1D, MaxPool2D
from keras.layers import Reshape, Flatten, Dropout, Concatenate
from keras.layers import SpatialDropout1D, concatenate
from keras.layers import GRU, Bidirectional, GlobalAveragePooling1D, GlobalMaxPooling1D,BatchNormalization
from keras import regularizers
from keras.models import model_from_json


%matplotlib inline
# import tensorflow as tf


In [None]:
import tensorflow as tf
if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")

In [None]:
'''from google.colab import drive
drive.mount('/content/gdrive',force_remount = True)
'''

## Load one hot encoded output

In [None]:
with open("./labels.pkl","rb") as f:
  Y = pickle.load(f)

In [None]:
Y_train, Y_test = train_test_split(Y,test_size = 0.1,random_state = 42)

In [None]:
gc.collect()

## Load train and test sequences data

In [None]:
sequences_matrix = np.load("/50to500_cleaned_train_sequences.npy")

In [None]:
test_sequences_matrix = np.load("./cleaned_test_sequences.npy")

## load all models

In [None]:
def top_3_accuracy(y_true, y_pred):
    return top_k_categorical_accuracy(y_true, y_pred, k=3)

def top_10_accuracy(y_true, y_pred):
    return top_k_categorical_accuracy(y_true, y_pred, k=10)

def top_15_accuracy(y_true, y_pred):
    return top_k_categorical_accuracy(y_true, y_pred, k=15)

def top_5_accuracy(y_true, y_pred):
    return top_k_categorical_accuracy(y_true, y_pred, k=5)

In [None]:
def load_model(arch_file,wt_file):
  json_file = open(arch_file, 'r')
  loaded_model_json = json_file.read()
  json_file.close()
  model = model_from_json(loaded_model_json)
  # load weights into new model
  model.load_weights(wt_file)
  model.compile(loss='categorical_crossentropy',optimizer=Adam(),metrics=['accuracy',top_3_accuracy,top_5_accuracy,top_10_accuracy,top_15_accuracy])
  print("Loaded model from disk")
  return model

In [None]:
root_path = "./Sentimix/"
arch_file = []
arch_file.append(root_path+"CNN_50to500.json")
arch_file.append(root_path+"LSTM_50to500.json")
print(arch_file)

In [None]:
wt_file = []
wt_file.append(root_path+"CNN_50to500.h5")
wt_file.append(root_path+"LSTM_50to500.h5")
print(wt_file)

In [None]:
wt_file = []
wt_file.append(root_path+"/bilstm_ir.h5")
wt_file.append(root_path+"/cnn_ir.h5")
wt_file.append(root_path+"/LSTM_attentio.h5")
wt_file.append(root_path+"/rnn_cnn.h5")
print(wt_file)

In [None]:
def load_allmodels(arch_file,wt_file):
  all_models = list()
  n_models = len(arch_file)
  for i in range(n_models):
    model = load_model(arch_file[i],wt_file[i])
    print("model number ",i+1,"loaded")
    all_models.append(model)
  return all_models
all_models = load_allmodels(arch_file,wt_file)


## Evaluate all models

In [None]:
for model in all_models:
	score = model.evaluate(test_sequences_matrix, Y_test, batch_size=1024)
	print('Model top 15 Accuracy:',score)

## prepare training dataset for meta learner

In [None]:
from keras.utils import plot_model
from keras.models import Model
from keras.layers import Input
from keras.layers import Dense
from keras.layers.merge import concatenate

def define_stacked_model(members):
	# update all layers in all models to not be trainable
    for i in range(len(members)):
      model = members[i]
      for layer in model.layers:
        # make not trainable
        layer.trainable = False
        # rename to avoid 'unique layer name' issue
        layer.name = 'ensemble_' + str(i+1) + '_' + layer.name
    # define multi-headed input
    ensemble_visible = [model.input for model in members]
    # concatenate merge output from each model
    ensemble_outputs = [model.output for model in members]
    merge = concatenate(ensemble_outputs)
    hidden = Dense(2048, activation='relu',kernel_regularizer=regularizers.l2(0.05))(merge)
    hidden = BatchNormalization(name = 'BN1')(hidden)
    #hidden = Dropout(0.3)(hidden)
    
#     hidden = Dense(2048, activation='relu')(merge)
#     hidden = BatchNormalization(name = 'BN1')(hidden)
#     hidden = Dropout(0.2)(hidden)
    
#     hidden = Dense(2048, activation='relu')(merge)
#     hidden = BatchNormalization(name = 'BN1')(hidden)
#     hidden = Dropout(0.2)(hidden)
    
    output = Dense(Y.shape[1], activation='softmax')(hidden)
    model = Model(inputs=ensemble_visible, outputs=output)
    # plot graph of ensemble
    plot_model(model, show_shapes=True, to_file='ensemble_50to500_cnn_and_lstm_graph.png')
    # compile
    model.compile(loss='categorical_crossentropy',optimizer=Adam(),metrics=['accuracy',top_3_accuracy,top_5_accuracy,top_10_accuracy,top_15_accuracy])
    return model

In [None]:
from google.colab import files
files.download("ensemble_50to500_cnn_and_lstm_graph.png")

In [None]:
def fit_stacked_model(model, inputX, inputy):
	# prepare input data
	X = [inputX for _ in range(len(model.input))]
	# fit model
	return model.fit(X,inputy,batch_size=1024,epochs=10,validation_split=0.1,callbacks=[EarlyStopping(monitor='val_loss',min_delta=0.00001,patience=2),
                                          ReduceLROnPlateau(min_delta=0.0001,patience=1)])

In [None]:
def evaluate_stacked_model(model, inputX,inputy):
	# prepare input data
	X = [inputX for _ in range(len(model.input))]
	# make prediction
	return model.evaluate(X,inputy, batch_size=1024)

In [None]:

stacked_model = define_stacked_model(all_models)

In [None]:
hist = fit_stacked_model(stacked_model, sequences_matrix, Y_train)

In [None]:
# list all data in history
print(hist.history.keys())
# summarize history for accuracy
plt.plot(hist.history['acc'])
# plt.plot(hist.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
# plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(hist.history['loss'])
# plt.plot(hist.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
# plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
score = evaluate_stacked_model(stacked_model, test_sequences_matrix, Y_test)

In [None]:
print(score)

In [None]:
print("top 15 acc",score[5]*100)

## Save the model

In [None]:
# serialize model to JSON
model_json = stacked_model.to_json()
with open("/lstm_bilstm_ensemble.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
stacked_model.save_weights("/lstm_bilstm_ensemble.h5")
print("Saved model to disk")

 


In [None]:
# from google.colab import files
# files.download('LSTMbatchnormonfulldatacount1000.json')
# files.download('LSTMbatchnormonfulldatacount1000.h5')

In [None]:
# load json and create model
from keras.models import model_from_json

json_file = open('/content/gdrive/My Drive/IR_project/surya_models/lstm_bilstm_ensemble.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("/content/gdrive/My Drive/IR_project/surya_models/lstm_bilstm_emsemble.h5")
print("Loaded model from disk")

In [None]:
# use loaded model to predict on test data
loaded_model.compile(loss='categorical_crossentropy',optimizer=Adam(),metrics=['accuracy',top_3_accuracy,top_5_accuracy,top_10_accuracy,top_15_accuracy])
score = evaluate_stacked_model(loaded_model, test_sequences_matrix, Y_test)


In [None]:
print(score)

In [None]:
print("top 15 %s: %.2f%%" % (loaded_model.metrics_names[1], score[5]*100))