In [None]:
from __future__ import print_function
import numpy as np
import pandas as pd
from keras.preprocessing import sequence
from keras.datasets import imdb
import keras
from keras.utils import to_categorical
from sklearn.metrics import f1_score



from keras.layers import Embedding
from keras.layers import Dense, Input, Flatten
from keras.layers import Conv1D,merge, MaxPooling1D, Embedding,Merge,  Dropout
from keras.models import Model
from keras.models import Sequential
from keras.layers.merge import Add,Concatenate,Dot


BASE_DIR = ''
GLOVE_DIR = '/home/abhinav/data/GLOVE_DATA/glove.6B'
TEXT_DATA_DIR = '/home/abhinav/data/full_text'
MAX_SEQUENCE_LENGTH = 1000
MAX_NB_WORDS = 50000
EMBEDDING_DIM = 100
num_words = MAX_NB_WORDS
MOD_DIR = "/home/abhinav/data/sixth_model_alan_filtering/"


print('Loading data...')
[x_train, y_train] = pd.read_pickle(MOD_DIR + "train_data") 

[x_test, y_test] = pd.read_pickle(MOD_DIR + "test_dataset") 

print("Loading the Embedding matrix")

embedding_matrix = pd.read_pickle(MOD_DIR + "embedding_matrix")



#some helper functions

class Metrics(keras.callbacks.Callback):
    def __init__(self,filepath):
        self.filepath = filepath
        self.best = -np.Inf
    def on_epoch_end(self, epoch,batch, logs={}):
        predict = self.model.predict(self.validation_data[0],batch_size = 512)
        predict = prob_to_label(predict)
        targ = self.validation_data[1]
        self.f1s=f1(targ, predict)
        """if self.f1s > self.best:
            self.best = self.f1s
            print('Epoch %05d: improved f1 to %0.5f,'
                                  ' saving model to %s'
                                  % (epoch, self.best, self.filepath))
            self.model.save(self.filepath,overwrite =True)"""
        print ("\nWeighted F1 score found on Validation dataset : " ,self.f1s)
        return

from sklearn.metrics import precision_recall_fscore_support
def f1(y_true, y_pred):
    return precision_recall_fscore_support(y_true = y_true, y_pred = y_pred)
ndm = keras.optimizers.Nadam(lr = 0.001)    

def prob_to_label(array):
    for a in array:
        if a[0] > a[1] :
            a[0] = 1
            a[1] = 0
        elif a[0] < a[1]:
            a[0] = 0
            a[1] = 1
        else:
            ind = np.random.randint(2)
            a[ind]= 1
            a[int(1-ind)] = 0
    return array



In [29]:



#Declaring the Model


embedding_layer = Embedding(num_words,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            input_length=MAX_SEQUENCE_LENGTH,
                            trainable=True)

# applying a more complex convolutional approach
convs = []
filter_sizes = [3,4,5]

sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)

for fsz in filter_sizes:
    l_conv = Conv1D(filters=128,kernel_size=fsz,activation='relu')(embedded_sequences)
    l_pool = MaxPooling1D(5)(l_conv)
    convs.append(l_pool)
    
l_merge = Merge(mode='concat', concat_axis=1)(convs)
l_cov1= Conv1D(128, 5, activation='relu')(l_merge)
l_pool1 = MaxPooling1D(5)(l_cov1)
l_cov2 = Conv1D(128, 5, activation='relu')(l_pool1)
l_pool2 = MaxPooling1D(30)(l_cov2)
l_flat = Flatten()(l_pool2)
l_dense = Dense(128, activation='relu')(l_flat)
preds = Dense(2, activation='softmax')(l_dense)


model = Model(sequence_input, preds)
model.compile(loss='binary_crossentropy',
              optimizer=ndm,
              metrics=['acc'])
model.summary()


____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_5 (InputLayer)             (None, 1000)          0                                            
____________________________________________________________________________________________________
embedding_5 (Embedding)          (None, 1000, 100)     5000000     input_5[0][0]                    
____________________________________________________________________________________________________
conv1d_19 (Conv1D)               (None, 998, 128)      38528       embedding_5[0][0]                
____________________________________________________________________________________________________
conv1d_20 (Conv1D)               (None, 997, 128)      51328       embedding_5[0][0]                
___________________________________________________________________________________________



In [30]:
initial_weights = model.get_weights()


In [32]:
model.set_weights(initial_weights)
print('Train...')
metrics = Metrics(MOD_DIR + "none")
model.fit(x_train, y_train,
          batch_size=128,
          epochs=10,
          class_weight = {0. : 1, 1. : 1},
validation_data=[x_test, y_test],
         callbacks = [metrics])

Train...
Train on 9937 samples, validate on 45500 samples
Epoch 1/10
Weighted F1 score found on Validation dataset :  (array([ 0.98901099,  0.        ]), array([ 1.,  0.]), array([ 0.99447514,  0.        ]), array([45000,   500]))
Epoch 2/10
Weighted F1 score found on Validation dataset :  (array([ 0.98901099,  0.        ]), array([ 1.,  0.]), array([ 0.99447514,  0.        ]), array([45000,   500]))
Epoch 3/10

KeyboardInterrupt: 

In [33]:
model.set_weights(initial_weights)
print('Train...')
metrics = Metrics(MOD_DIR + "none")
model.fit(x_train, y_train,
          batch_size=128,
          epochs=10,
          class_weight = {0. : 1, 1. : 3},
validation_data=[x_test, y_test],
         callbacks = [metrics])

Train...
Train on 9937 samples, validate on 45500 samples
Epoch 1/10
Weighted F1 score found on Validation dataset :  (array([ 0.98901099,  0.        ]), array([ 1.,  0.]), array([ 0.99447514,  0.        ]), array([45000,   500]))
Epoch 2/10
Weighted F1 score found on Validation dataset :  (array([ 0.99273463,  0.02355316]), array([ 0.77428889,  0.49      ]), array([ 0.87000924,  0.04494588]), array([45000,   500]))
Epoch 3/10
Weighted F1 score found on Validation dataset :  (array([ 0.99373683,  0.02375915]), array([ 0.73337778,  0.584     ]), array([ 0.843933  ,  0.04566067]), array([45000,   500]))
Epoch 4/10
Weighted F1 score found on Validation dataset :  (array([ 0.99329727,  0.02368513]), array([ 0.75084444,  0.544     ]), array([ 0.8552192 ,  0.04539386]), array([45000,   500]))
Epoch 5/10
Weighted F1 score found on Validation dataset :  (array([ 0.9927831 ,  0.02507283]), array([ 0.79175556,  0.482     ]), array([ 0.88094649,  0.04766614]), array([45000,   500]))
Epoch 6/10
We

<keras.callbacks.History at 0x7f0742906dd0>

In [34]:
model.set_weights(initial_weights)
print('Train...')
metrics = Metrics(MOD_DIR + "none")
model.fit(x_train, y_train,
          batch_size=128,
          epochs=10,
          class_weight = {0. : 1, 1. : 5},
validation_data=[x_test, y_test],
         callbacks = [metrics])

Train...
Train on 9937 samples, validate on 45500 samples
Epoch 1/10
Weighted F1 score found on Validation dataset :  (array([ 0.9930811 ,  0.01718499]), array([ 0.60602222,  0.62      ]), array([ 0.75270835,  0.03344301]), array([45000,   500]))
Epoch 2/10
Weighted F1 score found on Validation dataset :  (array([ 0.99490532,  0.0165952 ]), array([ 0.49037778,  0.774     ]), array([ 0.65695147,  0.0324937 ]), array([45000,   500]))
Epoch 3/10
Weighted F1 score found on Validation dataset :  (array([ 0.99389617,  0.01970865]), array([ 0.64408889,  0.644     ]), array([ 0.78164019,  0.03824682]), array([45000,   500]))
Epoch 4/10
Weighted F1 score found on Validation dataset :  (array([ 0.99511775,  0.02063024]), array([ 0.616,  0.728]), array([ 0.76095311,  0.04012346]), array([45000,   500]))
Epoch 5/10
Weighted F1 score found on Validation dataset :  (array([ 0.99290431,  0.02324098]), array([ 0.76184444,  0.51      ]), array([ 0.86216175,  0.04445607]), array([45000,   500]))
Epoch 6

<keras.callbacks.History at 0x7f06df62de90>

In [35]:
model.set_weights(initial_weights)
print('Train...')
metrics = Metrics(MOD_DIR + "none")
model.fit(x_train, y_train,
          batch_size=128,
          epochs=10,
          class_weight = {0. : 1, 1. : 8},
validation_data=[x_test, y_test],
         callbacks = [metrics])

Train...
Train on 9937 samples, validate on 45500 samples
Epoch 1/10
Weighted F1 score found on Validation dataset :  (array([ 0.99703628,  0.01318774]), array([ 0.2168,  0.942 ]), array([ 0.35615588,  0.02601132]), array([45000,   500]))
Epoch 2/10
Weighted F1 score found on Validation dataset :  (array([ 0.99590902,  0.02871714]), array([ 0.72491111,  0.732     ]), array([ 0.83907144,  0.05526614]), array([45000,   500]))
Epoch 3/10
Weighted F1 score found on Validation dataset :  (array([ 0.99853657,  0.02532064]), array([ 0.60651111,  0.92      ]), array([ 0.75464864,  0.04928483]), array([45000,   500]))
Epoch 4/10
Weighted F1 score found on Validation dataset :  (array([ 0.99479426,  0.04138595]), array([ 0.84506667,  0.602     ]), array([ 0.91383806,  0.07744757]), array([45000,   500]))
Epoch 5/10
Weighted F1 score found on Validation dataset :  (array([ 0.9933545 ,  0.04721823]), array([ 0.89686667,  0.46      ]), array([ 0.94264793,  0.08564513]), array([45000,   500]))
Epoch

<keras.callbacks.History at 0x7f06dfe870d0>