In [21]:
from __future__ import print_function
from __future__ import division
import tensorflow as tf
import collections
import nltk
import numpy as np
from nltk.tokenize.casual import TweetTokenizer
import sys;
sys.path.insert(0, '../code')
from w266_common import utils, vocabulary
import time
import datetime
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
import re
np.random.seed(266)

In [22]:
import csv
tokenizer = TweetTokenizer()
x_data = []
x_contexts = []
labels = []
sentences  = []
contexts = []
with open('../data/merged_data_v3.csv', 'r') as csvfile:
    linereader = csv.reader(csvfile, delimiter = '|')
    for i, row in enumerate(linereader):
        if i == 0:
            continue
        sentence, context, sarcasm = row
        sentence = re.sub("RT @[^\s]+:", "retweet", sentence)
        sentences.append(sentence)
        contexts.append(context)
        x_tokens = utils.canonicalize_words(tokenizer.tokenize(sentence), hashtags = False)
        context_tokens = utils.canonicalize_words(tokenizer.tokenize(context))
        x_data.append(x_tokens)
        x_contexts.append(context_tokens)
        labels.append(int(sarcasm))


#rng = np.random.RandomState(5)
#rng.shuffle(x_data)  # in-place
#train_split_idx = int(0.7 * len(labels))
#test_split_idx  = int(0.9 * len(labels))

shuffle_indices = np.random.permutation(np.arange(len(labels)))
train_split_idx = int(0.7 * len(labels))
test_split_idx  = int(0.9 * len(labels))

train_indices = shuffle_indices[:train_split_idx]
validation_indices = shuffle_indices[train_split_idx:test_split_idx]
test_indices = shuffle_indices[test_split_idx:]


train_sentences = np.array(x_data)[train_indices]
train_contexts = np.array(x_contexts)[train_indices]
train_labels= np.array(labels)[train_indices] 
validation_sentences = np.array(x_data)[validation_indices]
validation_labels = np.array(labels)[validation_indices]
validation_contexts = np.array(x_contexts)[validation_indices]
test_sentences = np.array(x_data)[test_indices]  
test_contexts = np.array(x_contexts)[test_indices]
test_labels = np.array(labels)[test_indices]  



In [23]:
np.array(x_contexts)[train_indices].shape

  """Entry point for launching an IPython kernel.


(6108,)

In [24]:
train_sentences.shape

(6108,)

In [25]:
def transform_labels(raw_label_set, size):
    label_set = []
    for label in raw_label_set:
        labels = [0] * size
        labels[label] = 1
        label_set.append(labels)
    return np.array(label_set)

expanded_train_labels = transform_labels(train_labels, 2)
expanded_validation_labels = transform_labels(validation_labels,2)
expanded_test_labels = transform_labels(test_labels,2)

In [26]:
a = [1,3,4,5,6,7,8,8,1,5,6,7]
a + ["<PADDING>"]*0

[1, 3, 4, 5, 6, 7, 8, 8, 1, 5, 6, 7]

In [27]:
class PaddingAndTruncating:
    def __init__(self, max_len):
        self.max_len = max_len
    
    def pad_or_truncate(self, sentence):
        sen_len = len(sentence)
        paddings = self.max_len - sen_len
        if paddings >=0:
            return list(sentence) + ["<PADDING>"] * paddings
        return sentence[0:paddings]
        
PadAndTrunc = PaddingAndTruncating(10)
        
        
PadAndTrunc.pad_or_truncate(["the","angry","man","is","angry"])

['the',
 'angry',
 'man',
 'is',
 'angry',
 '<PADDING>',
 '<PADDING>',
 '<PADDING>',
 '<PADDING>',
 '<PADDING>']

In [28]:
from tensorflow.contrib import learn
vocab_size = 5000
#max_len = max([len(sent) for sent  in train_sentences])
PadAndTrunc =PaddingAndTruncating(40)
train_sentences_padded = list(map(PadAndTrunc.pad_or_truncate, train_sentences))
train_context_padded = list(map(PadAndTrunc.pad_or_truncate, train_contexts))
validation_sentences_padded = list(map(PadAndTrunc.pad_or_truncate, validation_sentences))
validation_context_padded = list(map(PadAndTrunc.pad_or_truncate, validation_contexts))
test_sentences_padded = list(map(PadAndTrunc.pad_or_truncate, test_sentences))
test_context_padded = list(map(PadAndTrunc.pad_or_truncate, test_contexts))


vocab = vocabulary.Vocabulary(utils.flatten(list(train_sentences_padded) + list(train_context_padded)), vocab_size)
train_s = np.array(list(map(vocab.words_to_ids, train_sentences_padded)))
train_c = np.array(list(map(vocab.words_to_ids, train_context_padded)))
validation_s = np.array(list(map(vocab.words_to_ids, validation_sentences_padded)))
validation_c = np.array(list(map(vocab.words_to_ids, validation_context_padded)))
test_s = np.array(list(map(vocab.words_to_ids, test_sentences_padded)))
test_c = np.array(list(map(vocab.words_to_ids, test_context_padded)))
train_s

array([[ 12,  83,  76, ...,   3,   3,   3],
       [ 12,   2,  16, ...,   3,   3,   3],
       [ 12,   2, 324, ...,   3,   3,   3],
       ...,
       [ 32, 148,  18, ...,   3,   3,   3],
       [239,   8, 685, ...,   3,   3,   3],
       [ 12, 279,  10, ...,   3,   3,   3]])

In [29]:
i = 3
("conv-maxpool-%s" % i)

'conv-maxpool-3'

In [30]:
class TextCNN(object):

    def __init__(
      self, sequence_length, num_classes, vocab_size, 
      embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0):

        # Placeholders for input, output and dropout
        self.input_x1 = tf.placeholder(tf.int32, [None, sequence_length], name="input_x1")
        self.input_x2 = tf.placeholder(tf.int32, [None, sequence_length], name="input_x2")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

        # Keeping track of l2 regularization loss (optional)
        l2_loss = tf.constant(0.0)

        # Embedding layer
        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            self.W = tf.Variable(
                tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
                name="W")
            self.embedded_chars1 = tf.nn.embedding_lookup(self.W, self.input_x1)
            self.embedded_chars_expanded1 = tf.expand_dims(self.embedded_chars1, -1)
            self.embedded_chars2 = tf.nn.embedding_lookup(self.W, self.input_x2)
            self.embedded_chars_expanded2 = tf.expand_dims(self.embedded_chars2, -1)

        # Create a convolution + avgpool layer for each filter size
        pooled_outputs = []

        for i, filter_size in enumerate(filter_sizes):
            with tf.variable_scope("conv-avgpool-%s" % i) as scope:
                # Convolution Layer
                filter_shape = [filter_size, embedding_size, 1, num_filters]

                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
              
                conv = tf.nn.conv2d(
                    self.embedded_chars_expanded1,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")
                # Apply nonlinearity
                h1 = tf.nn.tanh(tf.nn.bias_add(conv, b), name="tanh1")
               
                # Maxpooling over the outputs
                pooled = tf.nn.avg_pool(
                    h1,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                
                pooled_outputs.append(pooled)
                #pooled_outputs.append(pooled)
                scope.reuse_variables()
                 
                
                conv = tf.nn.conv2d(
                    self.embedded_chars_expanded2,
                    W,
                    strides=[1, 1, 1, 1],
                    padding="VALID",
                    name="conv")
                # Apply nonlinearity
                h2 = tf.nn.tanh(tf.nn.bias_add(conv, b), name="tanh2")
                # Maxpooling over the outputs
                pooled2 = tf.nn.avg_pool(
                    h2,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled2)

        # Combine all the pooled features

        num_filters_total = num_filters * len(filter_sizes) * 2

        self.h_pool = tf.concat(pooled_outputs, 1)

        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])


        # Add dropout
        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)

        # Final (unnormalized) scores and predictions
        with tf.name_scope("output"):
        
            W = tf.get_variable(
                "W",
                shape=[num_filters_total, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
           
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
            self.predictions = tf.argmax(self.scores, 1, name="predictions")

        # Calculate mean cross-entropy loss
        with tf.name_scope("loss"):
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y)
            self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        # Accuracy
        with tf.name_scope("accuracy"):
            self.labels = tf.argmax(self.input_y, 1)
            TP = tf.count_nonzero(self.predictions * self.labels)
            TN = tf.count_nonzero((self.predictions - 1) * (self.labels - 1))
            FP = tf.count_nonzero(self.predictions * (self.labels - 1))
            FN = tf.count_nonzero((self.predictions - 1) * self.labels)
            self.correct_predictions = tf.equal(self.predictions, self.labels)
            self.precision = TP / (TP + FP)
            self.recall = TP / (TP + FN)
            self.f1_score = 2 * self.precision * self.recall / (self.precision + self.recall)
            self.accuracy = tf.reduce_mean(tf.cast(self.correct_predictions, "float"), name="accuracy")

In [31]:
def batch_iter(data, batch_size, num_epochs, shuffle=True):
    """
    Generates a batch iterator for a dataset.
    """
    data = np.array(data)
    data_size = len(data)
    num_batches_per_epoch = int((len(data)-1)/batch_size) + 1
    for epoch in range(num_epochs):
        # Shuffle the data at each epoch
        if shuffle:
            shuffle_indices = np.random.permutation(np.arange(data_size))
            shuffled_data = data[shuffle_indices]
        else:
            shuffled_data = data
        for batch_num in range(num_batches_per_epoch):
            start_index = batch_num * batch_size
            end_index = min((batch_num + 1) * batch_size, data_size)
            yield shuffled_data[start_index:end_index]

In [34]:
learning_rate = 0.1

for name in list(tf.flags.FLAGS):
    delattr(tf.flags.FLAGS,name)

tf.flags.DEFINE_float("dev_sample_percentage", 0.75, "Percentage of the training data to use for validation")
tf.flags.DEFINE_string("positive_data_file", "./data/rt-polaritydata/rt-polarity.pos", "Data source for the positive data.")
tf.flags.DEFINE_string("negative_data_file", "./data/rt-polaritydata/rt-polarity.neg", "Data source for the negative data.")

# Model Hyperparameters

# embedding of 60 is best so far
tf.flags.DEFINE_integer("embedding_dim",60, "Dimensionality of character embedding (default: 128)")
tf.flags.DEFINE_string("filter_sizes", "1", "Comma-separated filter sizes (default: '3,4,5')")
tf.flags.DEFINE_integer("num_filters", 60, "Number of filters per filter size (default: 128)")
tf.flags.DEFINE_float("dropout_keep_prob", 0.4, "Dropout keep probability (default: 0.5)")
tf.flags.DEFINE_float("l2_reg_lambda", 0, "L2 regularization lambda (default: 0.0)")

# Training parameters
batch_size = 200
tf.flags.DEFINE_integer("batch_size", batch_size, "Batch Size (default: 64)")
tf.flags.DEFINE_integer("num_epochs", 10, "Number of training epochs (default: 200)")
tf.flags.DEFINE_integer("evaluate_every", 20, "Evaluate model on dev set after this many steps (default: 100)")
tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)")
tf.flags.DEFINE_integer("num_checkpoints", 5, "Number of checkpoints to store (default: 5)")
# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS
tf.app.flags.DEFINE_string('f', '', 'kernel')
# FLAGS._parse_flags()
FLAGS.flag_values_dict()

print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
print("")


# Data Preparation
# ==================================================


with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
      allow_soft_placement=FLAGS.allow_soft_placement,
      log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():

        cnn = TextCNN(
            sequence_length=train_s.shape[1],
            num_classes=2,
            vocab_size=vocab_size,
            embedding_size=FLAGS.embedding_dim,
            filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
            num_filters=FLAGS.num_filters,
            l2_reg_lambda=FLAGS.l2_reg_lambda)
        
         # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(learning_rate)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
        
        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
                sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                grad_summaries.append(grad_hist_summary)
                grad_summaries.append(sparsity_summary)
        grad_summaries_merged = tf.summary.merge(grad_summaries)

        # Output directory for models and summaries
        timestamp = str(int(time.time()))
        out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
        print("Writing to {}\n".format(out_dir))

        # Summaries for loss and accuracy
        loss_summary = tf.summary.scalar("loss", cnn.loss)
        acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

        # Train Summaries
        train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged])
        train_summary_dir = os.path.join(out_dir, "summaries", "train")
        train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

        # Dev summaries
        dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
        dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
        dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

        # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
        checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints)

        # Write vocabulary
        #vocab_processor.save(os.path.join(out_dir, "vocab"))

        # Initialize all variables
        sess.run(tf.global_variables_initializer())

        def train_step(x_batch1, x_batch2, y_batch):
            """
            A single training step
            """
            feed_dict = {
              cnn.input_x1: x_batch1,
              cnn.input_x2: x_batch2,
              cnn.input_y: y_batch,
              cnn.dropout_keep_prob: FLAGS.dropout_keep_prob
            }
            _, step, summaries, loss, accuracy = sess.run(
                [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy],
                feed_dict)
            print("step {}, loss {:g}, acc {:g}".format(step, loss, accuracy))
            train_summary_writer.add_summary(summaries, step)

        def dev_step(x_batch1, x_batch2, y_batch, writer=None):
            """
            Evaluates model on a dev set
            """
            feed_dict = {
              cnn.input_x1: x_batch1,
              cnn.input_x2: x_batch2,
              cnn.input_y: y_batch,
              cnn.dropout_keep_prob: 1.0
            }
            #r 
            step, summaries, loss, accuracy, recall, precision, f1 = sess.run(
                [global_step, dev_summary_op, cnn.loss, cnn.accuracy,cnn.recall, cnn.precision, cnn.f1_score],
                feed_dict)
            print("step {}, loss {:g}, acc {:g}, rec {:g}, pre {:g}, f1 {:g}".format(step, loss, accuracy, recall, precision, f1))# recall, precision, f1))
            if writer:
                writer.add_summary(summaries, step)
        def error_analysis(x_batch1, x_batch2, y_batch, writer=None):
            feed_dict = {
              cnn.input_x1: x_batch1,
              cnn.input_x2: x_batch2,
              cnn.input_y: y_batch,
              cnn.dropout_keep_prob: 1.0
            }
            #r 
            step, summaries, loss, accuracy, recall, precision, f1, correct, scores,predictions  = sess.run(
                [global_step, dev_summary_op, cnn.loss, cnn.accuracy,cnn.recall, cnn.precision, cnn.f1_score, cnn.correct_predictions, cnn.scores, cnn.predictions],
                feed_dict)
            print("step {}, loss {:g}, acc {:g}, rec {:g}, pre {:g}, f1 {:g}".format(step, loss, accuracy, recall, precision, f1))# recall, precision, f1))
            if writer:
                writer.add_summary(summaries, step)
            return correct, scores, predictions

        # Generate batches
        batches = batch_iter(
            list(zip(train_s, train_c,  expanded_train_labels)), FLAGS.batch_size, FLAGS.num_epochs)
        # Training loop. For each batch...
        for batch in batches:
            x_batch1, x_batch2, y_batch = zip(*batch)
            train_step(x_batch1, x_batch1, y_batch)
            current_step = tf.train.global_step(sess, global_step)
            if current_step % FLAGS.evaluate_every == 0:
                print("\nEvaluation:")
                dev_step(validation_s, validation_c, expanded_validation_labels, writer=dev_summary_writer)
                print("")
            if current_step % FLAGS.checkpoint_every == 0:
                path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                print("Saved model checkpoint to {}\n".format(path))
print("\nTest Set:")
correct, logits, predictions = error_analysis(test_s, test_c, expanded_test_labels, writer=dev_summary_writer)
print("")




Parameters:
ALLOW_SOFT_PLACEMENT=<absl.flags._flag.BooleanFlag object at 0x7f87908a8610>
BATCH_SIZE=<absl.flags._flag.Flag object at 0x7f8779d67210>
CHECKPOINT_EVERY=<absl.flags._flag.Flag object at 0x7f87908a8390>
DEV_SAMPLE_PERCENTAGE=<absl.flags._flag.Flag object at 0x7f8779d67810>
DROPOUT_KEEP_PROB=<absl.flags._flag.Flag object at 0x7f8779d67390>
EMBEDDING_DIM=<absl.flags._flag.Flag object at 0x7f8779d674d0>
EVALUATE_EVERY=<absl.flags._flag.Flag object at 0x7f8779d67190>
F=<absl.flags._flag.Flag object at 0x7f87908ace10>
FILTER_SIZES=<absl.flags._flag.Flag object at 0x7f8779d67450>
L2_REG_LAMBDA=<absl.flags._flag.Flag object at 0x7f8779d672d0>
LOG_DEVICE_PLACEMENT=<absl.flags._flag.BooleanFlag object at 0x7f87908a8410>
NEGATIVE_DATA_FILE=<absl.flags._flag.Flag object at 0x7f8779d67610>
NUM_CHECKPOINTS=<absl.flags._flag.Flag object at 0x7f87908a83d0>
NUM_EPOCHS=<absl.flags._flag.Flag object at 0x7f8779d67250>
NUM_FILTERS=<absl.flags._flag.Flag object at 0x7f8779d67050>
POSITIVE_DAT

  """


step 1, loss 0.732749, acc 0.505
step 2, loss 1.84976, acc 0.52
step 3, loss 3.08149, acc 0.53
step 4, loss 1.24399, acc 0.53
step 5, loss 1.01625, acc 0.52
step 6, loss 1.05353, acc 0.535
step 7, loss 0.780152, acc 0.695
step 8, loss 0.710147, acc 0.685
step 9, loss 0.623612, acc 0.715
step 10, loss 0.500425, acc 0.81
step 11, loss 0.595088, acc 0.705
step 12, loss 0.741982, acc 0.695
step 13, loss 0.549649, acc 0.775
step 14, loss 0.516368, acc 0.77
step 15, loss 0.503345, acc 0.79
step 16, loss 0.467506, acc 0.81
step 17, loss 0.532364, acc 0.78
step 18, loss 0.455389, acc 0.795
step 19, loss 0.601791, acc 0.77
step 20, loss 0.438381, acc 0.805

Evaluation:
step 20, loss 0.389138, acc 0.830946, rec 0.73443, pre 0.900576, f1 0.809061

step 21, loss 0.431344, acc 0.81
step 22, loss 0.470221, acc 0.805
step 23, loss 0.41028, acc 0.81
step 24, loss 0.43935, acc 0.825
step 25, loss 0.359309, acc 0.84
step 26, loss 0.370007, acc 0.845
step 27, loss 0.324896, acc 0.865
step 28, loss 0.3817

step 210, loss 0.302001, acc 0.925
step 211, loss 0.218846, acc 0.95
step 212, loss 0.333103, acc 0.895
step 213, loss 0.335551, acc 0.875
step 214, loss 0.330951, acc 0.92
step 215, loss 0.314471, acc 0.93
step 216, loss 0.356466, acc 0.9
step 217, loss 0.272182, acc 0.935185
step 218, loss 0.447206, acc 0.885
step 219, loss 0.378614, acc 0.905
step 220, loss 0.212855, acc 0.94

Evaluation:
step 220, loss 0.567794, acc 0.730086, rec 0.974148, pre 0.64867, f1 0.778769

step 221, loss 0.263619, acc 0.92
step 222, loss 0.3123, acc 0.885
step 223, loss 0.316076, acc 0.945
step 224, loss 0.216775, acc 0.94
step 225, loss 0.263154, acc 0.935
step 226, loss 0.231905, acc 0.915
step 227, loss 0.257964, acc 0.93
step 228, loss 0.236227, acc 0.92
step 229, loss 0.194257, acc 0.925
step 230, loss 0.16607, acc 0.93
step 231, loss 0.200492, acc 0.925
step 232, loss 0.136721, acc 0.955
step 233, loss 0.1381, acc 0.96
step 234, loss 0.241541, acc 0.905
step 235, loss 0.224888, acc 0.945
step 236, lo

In [35]:
def incorrect_confidence(wrong, logits, predictions):
    indeces = np.where(wrong)
    wrong_predictions = predictions[indeces]
    wrong_logits = logits[indeces]
    
    return [[wrong_logits[i][value] - wrong_logits[i][1-value], indeces[0][i]] for i, value in enumerate(wrong_predictions)]
wrong = correct == False

sorted(incorrect_confidence(wrong, logits, predictions), key = lambda logit: -logit[0])

[[7.333172, 705],
 [6.88813, 585],
 [5.7885275, 423],
 [5.379498, 499],
 [4.3761907, 402],
 [4.300793, 314],
 [4.287877, 727],
 [4.2789903, 21],
 [3.9485517, 473],
 [3.8429804, 78],
 [3.7944047, 599],
 [3.575448, 278],
 [3.5290804, 342],
 [3.4940157, 465],
 [3.4671054, 352],
 [3.409197, 752],
 [3.3677812, 569],
 [3.3653626, 131],
 [3.33324, 644],
 [3.2281518, 565],
 [3.185865, 579],
 [3.1529596, 165],
 [3.0802217, 442],
 [3.0793295, 403],
 [3.0500305, 288],
 [3.027804, 847],
 [3.021014, 395],
 [3.010228, 636],
 [2.9736733, 259],
 [2.9736733, 293],
 [2.9736733, 471],
 [2.9136329, 360],
 [2.890222, 328],
 [2.8801208, 122],
 [2.8091211, 652],
 [2.720594, 98],
 [2.6562188, 500],
 [2.6444244, 821],
 [2.6414552, 715],
 [2.6212006, 71],
 [2.570076, 94],
 [2.5207753, 75],
 [2.500715, 837],
 [2.4763026, 481],
 [2.4408479, 724],
 [2.4306238, 547],
 [2.4033535, 20],
 [2.354991, 184],
 [2.3441443, 431],
 [2.3117852, 309],
 [2.2957852, 651],
 [2.2882154, 409],
 [2.2856622, 54],
 [2.2817354, 27],
 [

In [36]:
index = 585
print(test_sentences[index])
print(test_contexts[index])
print(test_labels[index])
print(predictions[index])


['ACCOUNT', 'ACCOUNT', 'ACCOUNT', 'you', 'obviously', 'have', 'no', 'knowledge', 'of', '#disarmament', ',', 'being', 'fed', 'the', 'fuck', 'up', 'with', '#socialism', ',', 'being', 'under', 'constant', 'threat', 'of', 'force', '&', 'being', 'damn', 'near', 'killed', 'through', '#socialistoppression', ';', 'real', '#history', ',', 'and', 'enough', '=', 'enough', 'which', 'gave', 'birth', 'DG', '#constitution', '#secondamendment', '#liberty', '…']
['wakandabarbie', '🌊', 'on', 'twitter', ':', 'you', 'obviously', 'haven', '’', 't', 'seen', 'who', '’', 's', 'marching', '.', 'it', '’', 's', 'not', 'just', 'teens', ',', 'sweetie', '.', 'millions', 'of', 'us', 'are', 'fed', 'the', 'fuck', 'up', '.', 'damn', 'near', 'everyone', 'knows', 'someone', 'or', 'has', 'had', 'a', 'family', 'member', 'or', 'friend', 'killed', 'through', 'gun', 'violence', '.', 'enough', 'is', 'enough', '.']
0
1


In [37]:
index = 342
print(test_sentences[index])
print(test_contexts[index])
print(test_labels[index])
print(predictions[index])
# seems to tag compliments as sarcastic

['finished', 'a', 'way', 'out', 'with', 'ACCOUNT', 'yesterday', 'night', '.', 'i', 'really', 'like', 'the', 'game', '.', 'it', '’', 's', 'a', 'story', 'drive', 'coop', 'game', 'with', 'cool', 'protagonists', '.', 'DG-DG', 'hours', 'of', 'good', 'fun', ',', 'tension', 'and', 'surprises', '.', 'especially', 'during', 'the', 'final', 'hours', ',', 'this', 'game', 'feels', 'like', 'a', 'blockbuster', 'movie', '.', '#awayout', '#gaming']
['nan']
0
1


In [38]:
index = 409
print(test_sentences[index])
print(test_contexts[index])
print(test_labels[index])
print(predictions[index])
# seems to be another case of a compliment being tagged as sarcastic

['luckily', ',', 'i', 'once', 'again', 'had', 'a', 'ACCOUNT', 'bar', 'in', 'my', 'camera', 'case', 'since', 'i', 'cant', 'trust', 'what', 'any', 'humans', 'say', '...', '😑', 'ACCOUNT', 'started', 'their', 'main', 'set', 'with', 'one', 'of', 'my', 'favorite', 'songs', 'on', 'their', 'new', 'album', 'the', 'north', 'star', 'called', 'endless', '!', '😀', '#saturdaynight', '#concert', '#event', '#songs', '#greatmusic']
['nan']
0
1


In [39]:
index = 775
print(test_sentences[index])
print(test_contexts[index])
print(test_labels[index])
print(predictions[index])
# Not really sure why this is tagged as sarcasm

['always', 'knew', 'this', 'was', 'false.but', 'heres', 'the', 'question', ',', 'what', 'should', 'a', 'person', 'do', 'if', 'taken', 'to', 'an', 'atm', 'under', 'duress', '?', 'any', 'help', '?', 'LINK']
['nan']
0
1


In [40]:
index = 847
print(test_sentences[index])
print(test_contexts[index])
print(test_labels[index])
print(predictions[index])
# another compliment tagged as sarcastic

['ACCOUNT', 'ACCOUNT', 'that', 'was', 'very', 'thoughtful', '&', 'supportive', 'of', 'mr', '.', 'kraft', '.', 'all', 'the', 'other', '#nfl', 'teams', 'should', 'have', 'done', 'the', 'same', 'thing', '!']
['the', 'ACCOUNT', 'loaned', 'their', 'plane', 'to', 'parkland', 'students', 'so', 'they', 'could', 'get', 'to', 'the', 'HASHTAG', '.twitter.com/6gjhfcpu7i']
0
1


In [41]:
index = 499
print(test_sentences[index])
print(test_contexts[index])
print(test_labels[index])
print(predictions[index])
# too many hashtags?

['LINK', '…', '…', '…', '…', '…', '…', '…', '…', '…', '…', '…', '…', '…', '…', '…', '…', '…', '…', '…', 'once', 'upon', 'a', 'time', '(', 'spin', ')', 'featuring', 'ACCOUNT', 'of', 'd12', '#newmusic', '#hot', '#brandnew', '#michigan', '#midwest', '#underground', '#spin', '#lol', '#lmao', '#bars', '#hot', '#swag', '#d12', '#rap']
['nan']
1
0


##### 