In [None]:
import tensorflow.compat.v1 as tf
import tensorflow_hub as hub

tf.compat.v1.disable_eager_execution()

# Elmo in a model

In [None]:
from tensorflow import keras
from keras import Model
from keras.layers import Input, Lambda, Dense, LSTM
import keras.backend as K

In [None]:
url= "http://tfhub.dev/google/elmo/3"
embed= hub.Module(url, trainable= False)

In [None]:
max_len= 100
batch_size= 30

In [None]:
def ELMOEmbedding(x):
  return embed(inputs=
   {"tokens":tf.squeeze(tf.cast(x, tf.string)),
    "sequence_len": tf.constant(batch_size*[max_len])
   },
    signature= "tokens",
    as_dict= True)['elmo']

In [None]:
input_text= Input(shape=(max_len,), dtype= tf.string)
embedding= Lambda(ELMOEmbedding, output_shape= (max_len, 1024))(input_text)
lstm= LSTM(units= 128, return_sequences= True, recurrent_dropout= 0.2, dropout= 0.2)(embedding)
dense= Dense(1, activation= 'sigmoid')(lstm)
model= Model(inputs=[input_text], outputs= dense)

In [None]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 100)]             0         
                                                                 
 lambda (Lambda)             (30, None, 1024)          0         
                                                                 
 lstm (LSTM)                 (30, None, 128)           590336    
                                                                 
 dense (Dense)               (30, None, 1)             129       
                                                                 
Total params: 590,465
Trainable params: 590,465
Non-trainable params: 0
_________________________________________________________________


# Elmo for spam Classification

In [None]:
import pandas as pd

In [None]:
data= pd.read_csv('spam.csv', encoding="latin-1")

In [None]:
data.head()

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,


In [None]:
y= list(data['v1'])
x= list(data['v2'])

In [None]:
from sklearn import preprocessing
import numpy as np

In [None]:
le= preprocessing.LabelEncoder()
le.fit(y)
le.classes_
le.transform([y[0]])

array([0])

In [None]:
def encode(le, labels):
  enc= le.transform(labels)
  return keras.utils.to_categorical(enc)

def decode(le, one_hot):
  dec= np.argmax(one_hot, axis=1)
  return le.inverse_transform(dec)

In [None]:
test= encode(le, ['ham','spam','ham'])
print(test)
dec_test= decode(le, test)
print(dec_test)

[[1. 0.]
 [0. 1.]
 [1. 0.]]
['ham' 'spam' 'ham']


In [None]:
print(x[0])

Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...


In [None]:
x_enc= x #cuz using elmo
y_enc= encode(le, y)

x_train= np.array(x_enc[:5000])
y_train= np.array(y_enc[:5000])

x_test= np.array(x_enc[5000:])
y_test= np.array(y_enc[5000:])

In [None]:
# Sentence Representation

def ELMOEmbed(x):
  return embed(inputs=
    tf.squeeze(tf.cast(x, tf.string)),
    signature= "default",
    as_dict= True)['default']

In [None]:
from keras.layers import Input, Lambda, Dense
from keras.models import Model
import keras.backend as K

In [None]:
input= Input(shape=(None, ), dtype= tf.string)
x= Lambda(ELMOEmbed, output_shape= (None, 1024))(input)
x= Dense(128, activation= 'relu')(x)
pred= Dense(2, activation= 'softmax')(x)

model= Model(inputs= [input], outputs= pred)
model.compile(loss= 'categorical_crossentropy', optimizer= 'adam', metrics= ['accuracy'])

In [None]:
# Input to ELMO Embedding layer is string
# Input for labels is a one_hot vector

print(x_train[0])
print(y_train[0])

Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...
[1. 0.]


In [None]:
with tf.Session() as session:
  K.set_session(session)
  #Sets up our model
  session.run(tf.global_variables_initializer())
  session.run(tf.tables_initializer())
  history= model.fit(x_train, y_train, epochs=1, batch_size= 32)
  # Give a path to Save the model Weights
  model.save_weights('./elmo_for_email_spam.h5')

Train on 5000 samples

In [None]:
# Prediction

predicts= model.predict(x_test, batch_size= 32)
y_test= decode(le, y_test)
y_preds= decode(le, predicts)
print(y_preds)

# Shakespear Dataset

In [None]:
! pip install datasets

In [None]:
import os
import requests

# download the data and manually split it into two train and test sets  or other wise use directly

file_name = "shakespeare.txt"
if not os.path.isfile(file_name):
	url = "https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt"
	data = requests.get(url)

	with open(file_name, 'w') as f:
		f.write(data.text)

In [None]:
with open(file_name) as f:
  lines= f.readlines()


In [None]:
t= 'this is a fuckign joke'
t= t.split(' ')
t= str.join(' ', t)
print(t)

this is a fuckign joke


In [None]:
# Preprocessing

for line in lines:
  line= line.split(' ')
  line= str.join(' ', line)


In [None]:
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding, Concatenate, TimeDistributed, Bidirectional
from tensorflow.keras.models import Model

# Load the Shakespeare dataset
with open('shakespeare.txt', 'r') as f:
    text = f.read()




In [None]:
with open(file_name) as f:
  text= f.readlines()

In [None]:
print(text[:10])

['First Citizen:\n', 'Before we proceed any further, hear me speak.\n', '\n', 'All:\n', 'Speak, speak.\n', '\n', 'First Citizen:\n', 'You are all resolved rather to die than to famish?\n', '\n', 'All:\n']


In [None]:
# Preprocess the data
tokenizer = Tokenizer(filters='')
tokenizer.fit_on_texts([text])
sequences = tokenizer.texts_to_sequences([text])[0]

In [None]:
print(len(sequences))

40000


In [None]:
max_len = max([len(seq) for seq in sequences])
vocab_size = len(tokenizer.word_index) + 1
data = pad_sequences(sequences, maxlen=max_len, padding='post')

# Yelp

In [None]:
! pip install datasets

In [None]:
import tensorflow_datasets as tfds

In [None]:
ds = tfds.load('huggingface:yelp_review_full/yelp_review_full')

In [None]:
for i in ds:
  print(i)

train
test


In [None]:
train_label= []
train_text= []

test_label= []
test_text= []

for review in ds['train']:
  train_label.append(review['label'])
  train_text.append(review['text'])

for review in ds['test']:
  test_label.append(review['label'])
  test_text.append(review['text'])


In [None]:
print(test_label[0])

tf.Tensor(0, shape=(), dtype=int64)


In [None]:
print(train_text[0])
print(type(train_text[0]))

tf.Tensor(b"My husband and I LOVE Red Devil. We eat Ina few times a month and order take out at least twice a week. Red Devil never fails to deliver an excellent meal. We generally eat dinner late (after 8pm) so we rarely wait for a table. Dinner hours expect a wait, but definitely well worth every minute!! The take out guys are awesome!! The entire menu is delicious! You can't go wrong with red devil. Enjoy!!", shape=(), dtype=string)
<class 'tensorflow.python.framework.ops.EagerTensor'>


In [None]:
t= str(train_text[100])
print(t[12:-26])

It's a little bitter sweet for me here...I just never know what to expect is what i mean by that.\\n\\nNow I have gone to Lulu's countless times...every time being unique in it's own way, whether it's the food or the service and mostly not in a good way. I can't be a full supporter like most people are because the people that like Lulu's don't agree with my restaurant choices and only think Lulu's is God's gift to Plaza Midwood or to Charlotte at that, and will always put it at the top of they're list to recommend to people!!! That's fine but I really can't take their recommendation on anything else in this town then,sorry!! \\n\\nI came here two Sundays ago and will say that my service and food were both good (this time). It was also 2pm and they were very quiet I assumed we got there after their Brunch rush.\\n\\nOn all of my other visits here their has always been something wrong the service is my biggest of issues and then the food because one person can get something that is reall

In [None]:
train__text= [str(i) for i in train_text]
train__text= [i[12:-26] for i in train__text]
print(type(train__text[0]))
print(train__text[:10])

<class 'str'>
["My husband and I LOVE Red Devil. We eat Ina few times a month and order take out at least twice a week. Red Devil never fails to deliver an excellent meal. We generally eat dinner late (after 8pm) so we rarely wait for a table. Dinner hours expect a wait, but definitely well worth every minute!! The take out guys are awesome!! The entire menu is delicious! You can't go wrong with red devil. Enjoy!!", 'Their food size is pretty good one . But honestly the dumplings we got were way worse than my home made ones ... Too plain to have it . My order was pork stone rice which was super plain ...totally killed our appetite that night . Hope they can boost their food flavor up.', 'My mom and I go here quite a bit when we come to the Bellagio to look at the garden display.  The line was not too long but we probably waited about 20 minutes for a table.  Not too bad for this place.  \\\\n\\\\nIt did take a bit for someone to come over to the table.  The drink person took our drink 

In [None]:
test__text= [str(i) for i in test_text]
test__text= [i[12:-26] for i in test__text]
print(type(test__text[0]))
print(test__text[:10])

<class 'str'>
["I was so disappointed in my order from them today.   Ordered the pizza special with Canadian bacon and pineapple for delivery.  Both were thrown on top of a previously baked pizza with no cheese added.  In fact it wasn't Canadian bacon but rough cut large unevenly diced chunks of ham.  I asked multiple people in the office and they had never seen anything like it.  Wish I could upload a picture. I usually eat there at least once a week so I tried to call and talk to a manager.  The phone was answered with please hold in an annoyed voice, followed by another person picking up the phone with a yeah.  I asked to speak to a manager and was told the person who put me on hold was the manager.  I repeated that I needed the manager and was informed that both the manager ( who was yelling in the background I don't have time for this) and the employee didn't have time to talk to me.   HORRIBLE customer service won't be eating here again.", 'Great price for the breakfast deal $4.9

In [None]:
train__label= [str(i) for i in train_label]
train__label= [int(i[10]) for i in train__label]
print(type(train__label[0]))
print(train__label[:10])

<class 'int'>
[4, 0, 2, 1, 2, 4, 2, 3, 3, 1]


In [None]:
print(max(train__label))
print(min(train__label))

4
0


In [None]:
test__label= [str(i) for i in test_label]
test__label= [int(i[10]) for i in test__label]
print(type(test__label[0]))
print(test__label[:10])

<class 'int'>
[0, 4, 0, 2, 1, 3, 3, 0, 0, 0]


In [None]:
import pandas as pd

dic_train= {'text': train__text, 'label': train__label}
dic_test= {'text': test__text, 'label': test__label}

df_train= pd.DataFrame(dic_train)
df_test= pd.DataFrame(dic_test)

In [None]:
print(df_train)

In [None]:
df_train.to_csv('yelp_train')
df_test.to_csv('yelp_test')

In [None]:
data= pd.read_csv('yelp_train.csv')
data.head()