# 08_nlp_intro

In [123]:
import zipfile
import urllib.request
import random
import os
import pathlib
import datetime
import numpy as np
import pandas as pd
import io

from helper_functions import create_tensorboard_callback, unzip_data, plot_loss_curves, compare_historys

import matplotlib.pylab as plt
import matplotlib.image as mpimg

import tensorflow as tf
import tensorflow_hub as hub
import keras
from keras.layers import TextVectorization
from keras import layers
from keras.layers import preprocessing

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.model_selection import train_test_split

In [2]:
# downloading dataset

url = "https://storage.googleapis.com/ztm_tf_course/nlp_getting_started.zip"
filename = "nlp_getting_started.zip"
file = urllib.request.urlretrieve(url, filename)

In [3]:
# unziping dataset

with zipfile.ZipFile("./data/nlp_getting_started.zip") as zip_ref:
    zip_ref.extractall()

## visualize

In [4]:
train_df = pd.read_csv("./data/nlp_getting_started/train.csv")
test_df = pd.read_csv("./data/nlp_getting_started/test.csv")
train_df.head()

Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1


In [10]:
# shuffle data
train_df_shuffled = train_df.sample(frac=1, random_state=42)
train_df_shuffled

Unnamed: 0,id,keyword,location,text,target
2644,3796,destruction,,So you have a new weapon that can cause un-ima...,1
2227,3185,deluge,,The f$&amp;@ing things I do for #GISHWHES Just...,0
5448,7769,police,UK,DT @georgegalloway: RT @Galloway4Mayor: ÛÏThe...,1
132,191,aftershock,,Aftershock back to school kick off was great. ...,0
6845,9810,trauma,"Montgomery County, MD",in response to trauma Children of Addicts deve...,0
...,...,...,...,...,...
5226,7470,obliteration,Merica!,@Eganator2000 There aren't many Obliteration s...,0
5390,7691,panic,,just had a panic attack bc I don't have enough...,0
860,1242,blood,,Omron HEM-712C Automatic Blood Pressure Monito...,0
7603,10862,,,Officials say a quarantine is in place at an A...,1


In [11]:
len(train_df), len(test_df)

(7613, 3263)

In [12]:
random_index = random.randint(
    0,
    len(train_df) -
    5)  # create random indexes not higher than the total number of samples
for row in train_df_shuffled[["text", "target"
                              ]][random_index:random_index + 5].itertuples():
    _, text, target = row
    print(f"Target: {target}",
          "(real disaster)" if target > 0 else "(not real disaster)")
    print(f"Text:\n{text}\n")
    print("---\n")


Target: 1 (real disaster)
Text:
#NASA announced that a massive #solar storm is headed straight for us: http://t.co/CM5u55MiOl

---

Target: 0 (not real disaster)
Text:
@StephanieMarija 'light rain' was the forecast I based my dressing on. Light. Rain. Not incessant deluge!

---

Target: 1 (real disaster)
Text:
I think a Typhoon just passed through here lol

---

Target: 0 (not real disaster)
Text:
Shirley Caesar - Windstorm http://t.co/KoCH8SLasQ #nowplaying #listenlive

---

Target: 0 (not real disaster)
Text:
Corey Robinson having some solid reps at RT although as I type this he got beat on lightning dip by T. Walker

---



## split data into training and validation sets

In [14]:
# Use train_test_split to split training data into training and validation sets
train_sentences, val_sentences, train_labels, val_labels = train_test_split(
    train_df_shuffled["text"].to_numpy(),
    train_df_shuffled["target"].to_numpy(),
    test_size=0.1,  # dedicate 10% of samples to validation set
    random_state=42)  # random state for reproducibility

In [15]:
len(train_sentences), len(train_labels), len(val_sentences), len(val_labels)


(6851, 6851, 762, 762)

In [16]:
train_sentences[:3]

array(['@mogacola @zamtriossu i screamed after hitting tweet',
       'Imagine getting flattened by Kurt Zouma',
       '@Gurmeetramrahim #MSGDoing111WelfareWorks Green S welfare force ke appx 65000 members har time disaster victim ki help ke liye tyar hai....'],
      dtype=object)

## converting text into numbers

In [23]:
# instructor being weird ignore this cell.....

text_vectorizer = TextVectorization(
    max_tokens=None,
    standardize="lower_and_strip_punctuation",
    split="whitespace",
    ngrams=None,
    output_mode="int",
    output_sequence_length=None,
)


In [24]:
round(sum([len(i.split()) for i in train_sentences]) / len(train_sentences))

max_vocab_length = 10000
max_length = 15

text_vectorizer = TextVectorization(
    max_tokens=max_vocab_length,
    output_mode="int",
    output_sequence_length=max_length,
)

## fit the text vectorizer to the training text

In [25]:
text_vectorizer.adapt(train_sentences)

In [26]:
# create sample sentence and tokenize it
sample_sentence = "theres a flood in my street!"
text_vectorizer([sample_sentence])

<tf.Tensor: shape=(1, 15), dtype=int64, numpy=
array([[264,   3, 232,   4,  13, 698,   0,   0,   0,   0,   0,   0,   0,
          0,   0]])>

In [46]:
random_sentence = random.choice(train_sentences)
print(
    f"original sentence:\n {random_sentence}\nvectorized version:\n{text_vectorizer([random_sentence])}"
)


original sentence:
 Emergency root canal!! #tookitlikeaman #lovemydentist #sore
vectorized version:
[[  73 1998    1 7477    1 4570    0    0    0    0    0    0    0    0
     0]]


In [47]:
# getting unique words in vocab
words_in_vocab = text_vectorizer.get_vocabulary()
top_5_words = words_in_vocab[:5]
bottom_5_words = words_in_vocab[-5:]
len(words_in_vocab)

10000

In [48]:
top_5_words

['', '[UNK]', 'the', 'a', 'in']

In [49]:
bottom_5_words

['pages', 'paeds', 'pads', 'padres', 'paddytomlinson1']

## creating embedding layer

In [51]:
embedding = layers.Embedding(
    input_dim=max_vocab_length,
    output_dim=128,
    input_length=max_length,
)

embedding

<keras.layers.core.embedding.Embedding at 0x12999d5b0>

In [54]:
random_sentence = random.choice(train_sentences)
print(f"original sentence:\n {random_sentence}\nembedded version:\n")

sample_embed = embedding(text_vectorizer([random_sentence]))
sample_embed

original sentence:
 Hundreds feared drowned as migrant boat capsizes off Libya http://t.co/pPJi1tCNML
embedded version:



<tf.Tensor: shape=(1, 15, 128), dtype=float32, numpy=
array([[[-0.03552001, -0.00999812,  0.04908582, ..., -0.03251666,
         -0.00162953,  0.02935452],
        [-0.02490748, -0.0170994 , -0.04048036, ..., -0.02317482,
          0.03829101,  0.04885192],
        [-0.0391386 , -0.00747428, -0.03154691, ...,  0.02009287,
         -0.040349  , -0.01432111],
        ...,
        [-0.04980755,  0.04545769,  0.00577293, ...,  0.01777959,
          0.01583925, -0.04735428],
        [-0.04980755,  0.04545769,  0.00577293, ...,  0.01777959,
          0.01583925, -0.04735428],
        [-0.04980755,  0.04545769,  0.00577293, ...,  0.01777959,
          0.01583925, -0.04735428]]], dtype=float32)>

## model_0 modeling a text dataset

In [62]:
model_0 = Pipeline([("tfidf", TfidfVectorizer()), ("clf", MultinomialNB())])
model_0.fit(train_sentences, train_labels)
baseline_score = model_0.score(val_sentences, val_labels)
baseline_score

0.7926509186351706

In [63]:
baseline_preds = model_0.predict(val_sentences)
baseline_preds[:20]

array([1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1])

In [65]:
def calculate_results(y_true, y_pred):
    """
  Calculates model accuracy, precision, recall and f1 score of a binary classification model.

  Args:
  -----
  y_true = true labels in the form of a 1D array
  y_pred = predicted labels in the form of a 1D array

  Returns a dictionary of accuracy, precision, recall, f1-score.
  """
    # Calculate model accuracy
    model_accuracy = accuracy_score(y_true, y_pred) * 100
    # Calculate model precision, recall and f1 score using "weighted" average
    model_precision, model_recall, model_f1, _ = precision_recall_fscore_support(
        y_true, y_pred, average="weighted")
    model_results = {
        "accuracy": model_accuracy,
        "precision": model_precision,
        "recall": model_recall,
        "f1": model_f1
    }
    return model_results


In [66]:
baseline_results = calculate_results(y_true=val_labels, y_pred=baseline_preds)
baseline_results

{'accuracy': 79.26509186351706,
 'precision': 0.8111390004213173,
 'recall': 0.7926509186351706,
 'f1': 0.7862189758049549}

## Model_1 simple dense model

In [75]:
SAVE_DIR = "./tensorboard/"
inputs = layers.Input(shape=(1, ), dtype="string")
x = text_vectorizer(inputs)
x = embedding(x)
x = layers.GlobalAveragePooling1D(name="global_avg_pool_layer")(x)
outputs = layers.Dense(1, activation="sigmoid")(x)
model_1 = tf.keras.Model(inputs, outputs, name="model_1_dense")


In [76]:
model_1.summary()

Model: "model_1_dense"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 1)]               0         
                                                                 
 text_vectorization_3 (TextV  (None, 15)               0         
 ectorization)                                                   
                                                                 
 embedding_1 (Embedding)     (None, 15, 128)           1280000   
                                                                 
 global_avg_pool_layer (Glob  (None, 128)              0         
 alAveragePooling1D)                                             
                                                                 
 dense_4 (Dense)             (None, 1)                 129       
                                                                 
Total params: 1,280,129
Trainable params: 1,280,129
N

In [77]:
model_1.compile(loss="binary_crossentropy",
                optimizer=tf.keras.optimizers.Adam(),
                metrics=["accuracy"])
model_1_history = model_1.fit(x=train_sentences,
                              y=train_labels,
                              epochs=5,
                              validation_data=(val_sentences, val_labels),
                              callbacks=[
                                  create_tensorboard_callback(
                                      dir_name=SAVE_DIR,
                                      experiment_name="model_1_dense")
                              ])


Saving TensorBoard log files to: ./tensorboard//model_1_dense/20230221-153311
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [78]:
model_1.evaluate(val_sentences, val_labels)




[0.481535941362381, 0.7834645509719849]

In [79]:
model_1_pred_probs = model_1.predict(val_sentences)
model_1_pred_probs.shape



(762, 1)

In [80]:
model_1_pred_probs[:5]

array([[0.3574995 ],
       [0.7528848 ],
       [0.9977275 ],
       [0.1694827 ],
       [0.09879838]], dtype=float32)

## convert model predictions to label format

In [85]:
model_1_preds = tf.squeeze(tf.round(model_1_pred_probs))
model_1_preds[:10]

<tf.Tensor: shape=(10,), dtype=float32, numpy=array([0., 1., 1., 0., 0., 1., 1., 1., 1., 0.], dtype=float32)>

In [87]:
model_1_results = calculate_results(val_labels, model_1_preds)
model_1_results

{'accuracy': 78.34645669291339,
 'precision': 0.7868445599717488,
 'recall': 0.7834645669291339,
 'f1': 0.7809185675137833}

## model_2 lstm

In [92]:
inputs = layers.Input(shape=(1, ), dtype="string")
x = text_vectorizer(inputs)
x = embedding(x)
# x = layers.GlobalAveragePooling1D(name="global_avg_pool_layer")(x)
# x = layers.LSTM(64, return_sequences=True)(x)
x = layers.LSTM(64)(x)
# x = layers.Dense(64,activation="relu")(x)
outputs = layers.Dense(1, activation="sigmoid")(x)
model_2 = tf.keras.Model(inputs, outputs, name="model_2_LSTM")


In [93]:
model_2.summary()

Model: "model_2_LSTM"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_7 (InputLayer)        [(None, 1)]               0         
                                                                 
 text_vectorization_3 (TextV  (None, 15)               0         
 ectorization)                                                   
                                                                 
 embedding_1 (Embedding)     (None, 15, 128)           1280000   
                                                                 
 lstm_2 (LSTM)               (None, 64)                49408     
                                                                 
 dense_7 (Dense)             (None, 1)                 65        
                                                                 
Total params: 1,329,473
Trainable params: 1,329,473
Non-trainable params: 0
____________________________________________

In [94]:
model_2.compile(loss="binary_crossentropy",
                optimizer=tf.keras.optimizers.Adam(),
                metrics=["accuracy"])
model_2_history = model_2.fit(train_sentences,
                              train_labels,
                              epochs=5,
                              validation_data=(val_sentences, val_labels),
                              callbacks=[
                                  create_tensorboard_callback(
                                      dir_name=SAVE_DIR,
                                      experiment_name="model_2_dense")
                              ])


Saving TensorBoard log files to: ./tensorboard//model_2_dense/20230221-160619
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [96]:
model_2_pred_probs = model_2.predict(val_sentences)
model_2_pred_probs[:10]



array([[1.4022008e-02],
       [9.0239912e-01],
       [9.9949247e-01],
       [3.7848540e-02],
       [5.1971315e-04],
       [9.9705082e-01],
       [8.0899692e-01],
       [9.9966520e-01],
       [9.9950707e-01],
       [3.1499305e-01]], dtype=float32)

In [97]:
model_2_preds = tf.squeeze(tf.round(model_2_pred_probs))
model_2_preds[:10]

<tf.Tensor: shape=(10,), dtype=float32, numpy=array([0., 1., 1., 0., 0., 1., 1., 1., 1., 0.], dtype=float32)>

In [100]:
model_2_results = calculate_results(val_labels, model_2_preds)
model_2_results

{'accuracy': 77.69028871391076,
 'precision': 0.7798819103655997,
 'recall': 0.7769028871391076,
 'f1': 0.7743498017353971}

## model_3 GRU cell

In [101]:
inputs = layers.Input(shape=(1, ), dtype="string")
x = text_vectorizer(inputs)
x = embedding(x)
x = layers.GRU(64)(x)
outputs = layers.Dense(1, activation="sigmoid")(x)
model_3 = tf.keras.Model(inputs, outputs, name="model_3_LSTM")


In [102]:
model_3.compile(loss="binary_crossentropy",
                optimizer=tf.keras.optimizers.Adam(),
                metrics=["accuracy"])
model_3_history = model_3.fit(train_sentences,
                              train_labels,
                              epochs=5,
                              validation_data=(val_sentences, val_labels),
                              callbacks=[
                                  create_tensorboard_callback(
                                      dir_name=SAVE_DIR,
                                      experiment_name="model_3_dense")
                              ])


Saving TensorBoard log files to: ./tensorboard//model_3_dense/20230221-161608
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## model_4 bidirectional

In [103]:
inputs = layers.Input(shape=(1, ), dtype="string")
x = text_vectorizer(inputs)
x = embedding(x)
x = layers.Bidirectional(layers.LSTM(64))(x)
outputs = layers.Dense(1, activation="sigmoid")(x)
model_4 = tf.keras.Model(inputs, outputs, name="model_4_LSTM")


In [104]:
model_4.compile(loss="binary_crossentropy",
                optimizer=tf.keras.optimizers.Adam(),
                metrics=["accuracy"])
model_4_history = model_4.fit(train_sentences,
                              train_labels,
                              epochs=5,
                              validation_data=(val_sentences, val_labels),
                              callbacks=[
                                  create_tensorboard_callback(
                                      dir_name=SAVE_DIR,
                                      experiment_name="model_4_dense")
                              ])


Saving TensorBoard log files to: ./tensorboard//model_4_dense/20230221-162153
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## model_5 Conv1D

In [106]:
tf.random.set_seed(42)
model_5_embedding = layers.Embedding(input_dim=max_vocab_length,
                                     output_dim=128,
                                     embeddings_initializer="uniform",
                                     input_length=max_length,
                                     name="embedding_5")

In [107]:
inputs = layers.Input(shape=(1, ), dtype="string")
x = text_vectorizer(inputs)
x = model_5_embedding(x)
x = layers.Conv1D(filters=32, kernel_size=5, activation="relu")(x)
x = layers.GlobalMaxPool1D()(x)
outputs = layers.Dense(1, activation="sigmoid")(x)
model_5 = tf.keras.Model(inputs, outputs, name="model_5_Conv1D")

In [108]:
model_5.compile(loss="binary_crossentropy",
                optimizer=tf.keras.optimizers.Adam(),
                metrics=["accuracy"])
model_5_history = model_5.fit(train_sentences,
                              train_labels,
                              epochs=5,
                              validation_data=(val_sentences, val_labels),
                              callbacks=[
                                  create_tensorboard_callback(
                                      dir_name=SAVE_DIR,
                                      experiment_name="model_5_dense")
                              ])


Saving TensorBoard log files to: ./tensorboard//model_5_dense/20230221-163521
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## Model_6 tensorflowhub

In [109]:
embed = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4")
embed_samples = embed([
    sample_sentence,
    "When you call the universal sentence encoder on a sentence, it turns it into numbers."
])

print(embed_samples[0][:50])

tf.Tensor(
[-0.01157027  0.02485911  0.02878048 -0.01271501  0.03971541  0.08827761
  0.02680985  0.05589836 -0.01068731 -0.00597293  0.00639323 -0.01819516
  0.00030815  0.09105889  0.05874644 -0.03180626  0.01512474 -0.05162926
  0.00991366 -0.06865344 -0.04209306  0.02678978  0.03011006  0.00321068
 -0.00337968 -0.04787356  0.0226672  -0.00985928 -0.04063615 -0.01292093
 -0.04666383  0.056303   -0.03949254  0.00517684  0.02495828 -0.0701444
  0.0287151   0.04947681 -0.00633977 -0.08960192  0.0280712  -0.00808363
 -0.01360601  0.0599865  -0.10361788 -0.05195374  0.00232955 -0.02332529
 -0.03758105  0.03327728], shape=(50,), dtype=float32)


In [110]:
sentence_encoder_layer = hub.KerasLayer(
    "https://tfhub.dev/google/universal-sentence-encoder/4",
    input_shape=[],
    dtype=tf.string,
    trainable=False,
    name="USE")






In [117]:
model_6 = tf.keras.Sequential([
    sentence_encoder_layer,
    layers.Dense(64, activation="relu"),
    layers.Dense(1, activation="sigmoid"),
],
                              name="model_6_USE")


In [118]:
model_6.summary()

Model: "model_6_USE"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 USE (KerasLayer)            (None, 512)               256797824 
                                                                 
 dense_12 (Dense)            (None, 64)                32832     
                                                                 
 dense_13 (Dense)            (None, 1)                 65        
                                                                 
Total params: 256,830,721
Trainable params: 32,897
Non-trainable params: 256,797,824
_________________________________________________________________


In [119]:
model_6.compile(loss="binary_crossentropy",
                optimizer=tf.keras.optimizers.Adam(),
                metrics=["accuracy"])
model_6_history = model_6.fit(train_sentences,
                              train_labels,
                              epochs=5,
                              validation_data=(val_sentences, val_labels),
                              callbacks=[
                                  create_tensorboard_callback(
                                      dir_name=SAVE_DIR,
                                      experiment_name="model_6_dense")
                              ])


Saving TensorBoard log files to: ./tensorboard//model_6_dense/20230221-171130
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## model_7 tf hub model with 10% of the data

keep in mind the below cell is incorrect hes going to change it later....
i was like why is he using train_df_shuffled.....

In [121]:
train_10_percent = train_df_shuffled[["text",
                                      "target"]].sample(frac=0.1,
                                                        random_state=42)

train_sentences_10_percent = train_10_percent["text"].to_list()
train_labels_10_percent = train_10_percent["target"].to_list()

In [124]:
train_sentences_90_percent, train_sentences_10_percent, train_labels_90_percent, train_labels_10_percent = train_test_split(
    np.array(train_sentences), train_labels, test_size=0.1, random_state=42)


In [None]:
model_7 = tf.keras.models.clone_model(model_6)

model_7.compile(loss="binary_crossentropy",
                optimizer=tf.keras.optimizers.Adam(),
                metrics=["accuracy"])
model_7_history = model_7.fit(train_sentences_10_percent,
                              train_labels_10_percent,
                              epochs=5,
                              validation_data=(val_sentences, val_labels),
                              callbacks=[
                                  create_tensorboard_callback(
                                      dir_name=SAVE_DIR,
                                      experiment_name="model_7_dense")
                              ])


Saving TensorBoard log files to: ./tensorboard//model_7_dense/20230221-173932
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [126]:
model_6.save("saves/08/model_6.h5")

In [127]:
model_6.save("saves/08/model_6")



INFO:tensorflow:Assets written to: saves/08/model_6/assets


INFO:tensorflow:Assets written to: saves/08/model_6/assets


## finding the most wrong examples

In [131]:
model_6_pred_probs = model_6.predict(val_sentences)
model_6_pred_probs[:10]



array([[0.16901767],
       [0.74339193],
       [0.98894936],
       [0.20092905],
       [0.7338651 ],
       [0.6801801 ],
       [0.9806574 ],
       [0.97457296],
       [0.9235823 ],
       [0.08834559]], dtype=float32)

In [132]:
model_6_preds = tf.squeeze(tf.round(model_6_pred_probs))
model_6_preds[:10]

<tf.Tensor: shape=(10,), dtype=float32, numpy=array([0., 1., 1., 0., 1., 1., 1., 1., 1., 0.], dtype=float32)>

In [133]:
# Create dataframe with validation sentences and best performing model predictions
val_df = pd.DataFrame({"text": val_sentences,
                       "target": val_labels,
                       "pred": model_6_preds,
                       "pred_prob": tf.squeeze(model_6_pred_probs)})
val_df.head()

Unnamed: 0,text,target,pred,pred_prob
0,DFR EP016 Monthly Meltdown - On Dnbheaven 2015...,0,0.0,0.169018
1,FedEx no longer to transport bioterror germs i...,0,1.0,0.743392
2,Gunmen kill four in El Salvador bus attack: Su...,1,1.0,0.988949
3,@camilacabello97 Internally and externally scr...,1,0.0,0.200929
4,Radiation emergency #preparedness starts with ...,1,1.0,0.733865


In [134]:
most_wrong = val_df[val_df["target"] != val_df["pred"]].sort_values("pred_prob", ascending=False)

In [135]:
for row in most_wrong[:10].itertuples(): # loop through the top 10 rows (change the index to view different rows)
  _, text, target, pred, prob = row
  print(f"Target: {target}, Pred: {int(pred)}, Prob: {prob}")
  print(f"Text:\n{text}\n")
  print("----\n")

Target: 0, Pred: 1, Prob: 0.9109154939651489
Text:
? High Skies - Burning Buildings ? http://t.co/uVq41i3Kx2 #nowplaying

----

Target: 0, Pred: 1, Prob: 0.8621683120727539
Text:
FedEx will no longer transport bioterror pathogens in wake of anthrax lab mishaps http://t.co/lHpgxc4b8J

----

Target: 0, Pred: 1, Prob: 0.8595730066299438
Text:
@noah_anyname That's where the concentration camps and mass murder come in. 
 
EVERY. FUCKING. TIME.

----

Target: 0, Pred: 1, Prob: 0.8494942784309387
Text:
@SonofLiberty357 all illuminated by the brightly burning buildings all around the town!

----

Target: 0, Pred: 1, Prob: 0.8467164635658264
Text:
@madonnamking RSPCA site multiple 7 story high rise buildings next to low density character residential in an area that floods

----

Target: 0, Pred: 1, Prob: 0.8205220103263855
Text:
Ashes 2015: AustraliaÛªs collapse at Trent Bridge among worst in history: England bundled out Australia for 60 ... http://t.co/t5TrhjUAU0

----

Target: 0, Pred: 1, Pr

## making predictions on the test dataset

In [139]:
test_sentence = test_df["text"].to_list()
test_samples = random.sample(test_sentence,10,)

for test_sample in test_samples:
  pred_prob = tf.squeeze(model_6.predict([test_sample])) # has to be list
  pred = tf.round(pred_prob)
  print(f"Pred: {int(pred)}, Prob: {pred_prob}")
  print(f"Text:\n{test_sample}\n")
  print("----\n")

Pred: 0, Prob: 0.08294907212257385
Text:
PIERCE THE VEIL Rubber Bracelet Wristband Collide with the Sky - Full read by eBay http://t.co/H4WUPpaT6k http://t.co/WisjhDH58n

----

Pred: 1, Prob: 0.5424458980560303
Text:
U.S National Park Services Tonto National Forest: Stop the Annihilation of the Salt River Wild Horse... https://t.co/FLcQQeZnVW via @Change

----

Pred: 0, Prob: 0.20131106674671173
Text:
@thehill this is 1 example of y the Conservatives annihilated Burton v Wiimington Prkng Auth while Liberals stood by &amp;have done nothing

----

Pred: 0, Prob: 0.10217876732349396
Text:
#People #Tilly the #Confused Cat Overcomes Horrible Attack to Win Hearts http://t.co/QtrsYxFzo3

----

Pred: 1, Prob: 0.7648373246192932
Text:
Violent Forces Radio: Now Playing Torture - Storm Alert
TuneIn Player @ http://t.co/XsSgEdSbH4

----

Pred: 0, Prob: 0.27731916308403015
Text:
8/5 Arizona Cardinals news: Jaron Brown makes headlines minor injuries continue to build http://t.co/QxKncvlrh1 http://t.