## Mount Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [2]:
ls drive/My\ Drive/Insight

2019.xlsx  data_labeled.csv  Make_Predictions.ipynb  predicts.txt
data.csv   elmo-model.h5     Multi_Class_Elmo.ipynb  topics_labels.csv


In [0]:
!cp '/content/drive/My Drive/Insight/topics_labels.csv' 'topics_labels.csv'

## Import Libraries

In [0]:
import numpy as np
import pandas as pd
import re

In [5]:
!pip install keras==2.2.4

Collecting keras==2.2.4
[?25l  Downloading https://files.pythonhosted.org/packages/5e/10/aa32dad071ce52b5502266b5c659451cfd6ffcbf14e6c8c4f16c0ff5aaab/Keras-2.2.4-py2.py3-none-any.whl (312kB)
[K     |████████████████████████████████| 317kB 2.8MB/s 
Installing collected packages: keras
  Found existing installation: Keras 2.2.5
    Uninstalling Keras-2.2.5:
      Successfully uninstalled Keras-2.2.5
Successfully installed keras-2.2.4


In [6]:
import tensorflow as tf
import pandas as pd
import tensorflow_hub as hub
import os
import re
from keras import backend as K
import keras.layers as layers
from keras.models import Model, load_model
from keras.engine import Layer
import numpy as np

Using TensorFlow backend.


## Read Data

In [0]:
data_df = pd.read_csv('topics_labels.csv')
data_df = data_df.sample(frac=1)

In [8]:
texts = list(data_df['text'])
texts = [' '.join(t.split()[0:100]) for t in texts]

labels = list(data_df['topic'])

from sklearn import preprocessing
le = preprocessing.LabelEncoder()
le.fit(labels)

LabelEncoder()

In [0]:
from keras.utils import to_categorical
def encode(le, labels):
    enc = le.transform(labels)
    return to_categorical(enc)

def decode(le, one_hot):
    dec = np.argmax(one_hot, axis=1)
    return le.inverse_transform(dec)

In [0]:
label_enc = encode(le,labels)
text_enc = texts

In [11]:
label_enc.shape

(32500, 10)

In [0]:
text_train = np.asarray(text_enc[:25000])
label_train = np.asarray(label_enc[:25000])

text_test = np.asarray(text_enc[25000:])
label_test = np.asarray(label_enc[25000:])

## Build ELMO Embedding Layer

In [0]:
elmo = hub.Module("https://tfhub.dev/google/elmo/2", trainable=True)

In [0]:
# Create a custom layer that allows us to update weights (lambda layers do not have trainable parameters!)

class ElmoEmbeddingLayer(Layer):
    def __init__(self, **kwargs):
        self.dimensions = 1024
        self.trainable=True
        super(ElmoEmbeddingLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.elmo = hub.Module('https://tfhub.dev/google/elmo/2', trainable=self.trainable,
                               name="{}_module".format(self.name))

        self.trainable_weights += K.tf.trainable_variables(scope="^{}_module/.*".format(self.name))
        super(ElmoEmbeddingLayer, self).build(input_shape)

    def call(self, x, mask=None):
        result = self.elmo(K.squeeze(K.cast(x, tf.string), axis=1),
                      as_dict=True,
                      signature='default',
                      )['default']
        return result

    def compute_mask(self, inputs, mask=None):
        return K.not_equal(inputs, '--PAD--')

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.dimensions)

## Build Model

In [15]:

input_text = layers.Input(shape=(1,), dtype="string")
embedding = ElmoEmbeddingLayer()(input_text)
dense = layers.Dense(128, activation='relu')(embedding)
#dropout = layers.Dropout(0.3)(dense)
pred = layers.Dense(10, activation='sigmoid')(dense)#(dropout)

model = Model(inputs=[input_text], outputs=pred)

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()













INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore




















Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 1)                 0         
_________________________________________________________________
elmo_embedding_layer_1 (Elmo (None, 1024)              4         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               131200    
_________________________________________________________________
dense_2 (Dense)              (None, 10)                1290      
Total params: 132,494
Trainable params: 132,494
Non-trainable params: 0
_________________________________________________________________


In [16]:
with tf.Session() as session:
    K.set_session(session)
    session.run(tf.global_variables_initializer())  
    session.run(tf.tables_initializer())
    history = model.fit(text_train, label_train, epochs=2, batch_size=6, validation_data=(text_test,label_test))
    model.save_weights('./elmo-model.h5')













Train on 25000 samples, validate on 7500 samples
Epoch 1/1


























In [0]:
with tf.Session() as session:
    K.set_session(session)
    session.run(tf.global_variables_initializer())
    session.run(tf.tables_initializer())
    model.load_weights('./elmo-model.h5')  
    predicts = model.predict(text_test, batch_size=128)
    

In [0]:
!cp 'elmo-model.h5' '/content/drive/My Drive/Insight/elmo-model.h5'

## Model Evaluation

In [0]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [0]:
np.shape(predicts)

In [0]:
predicts_shape=np.shape(predicts)

In [0]:
predicts[18]

In [0]:
y_pred = np.zeros(predicts_shape)
y_pred = predicts.argmax(axis=-1)

In [0]:
y_pred[:20]

In [0]:
lb = preprocessing.LabelBinarizer()
lb.fit(list(range(0,10)))

In [0]:
y_pred_ohe = lb.transform(y_pred)

In [0]:
y_pred_ohe[0]

In [0]:
decode(le, y_pred_ohe)[:20]

In [0]:
text_test[:20]

In [0]:
print('Test Accuracy Score: ', format(accuracy_score(label_test,y_pred_ohe)))

In [0]:
print('Test Precision Score: ', format(precision_score(label_test, y_pred_ohe, average = 'macro')))

In [0]:
print('Test Recall Score: ', format(recall_score(label_test, y_pred_ohe, average = 'macro')))

In [0]:
print('Test Recall Score: ', format(f1_score(label_test, y_pred_ohe, average = 'macro')))