In [None]:
# Install these packages if running from colab
!pip install tensorflow-datasets --quiet
!pip install pydot --quiet
!pip install transformers --quiet

# install huggingface datasets
!pip install datasets --quiet

!pip install rouge-score nltk --quiet
!pip install huggingface_hub --quiet
!pip install git+https://github.com/google-research/bleurt.git --quiet

[K     |████████████████████████████████| 5.5 MB 6.2 MB/s 
[K     |████████████████████████████████| 7.6 MB 48.3 MB/s 
[K     |████████████████████████████████| 182 kB 52.2 MB/s 
[K     |████████████████████████████████| 451 kB 7.7 MB/s 
[K     |████████████████████████████████| 115 kB 73.7 MB/s 
[K     |████████████████████████████████| 212 kB 71.5 MB/s 
[K     |████████████████████████████████| 127 kB 44.1 MB/s 
[?25h  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
[K     |████████████████████████████████| 352 kB 7.7 MB/s 
[K     |████████████████████████████████| 1.3 MB 53.6 MB/s 
[?25h  Building wheel for BLEURT (setup.py) ... [?25l[?25hdone


In [None]:
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras

from tensorflow.keras.layers import Embedding, Input, Dense, Lambda
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
import tensorflow_datasets as tfds

import sklearn as sk
import os
import nltk
from nltk.data import find

import matplotlib.pyplot as plt

import re

#let's make longer output readable without scrolling
from pprint import pprint

# the toxic parallel dataset, with rouge metric
#from datasets import load_dataset, load_from_disk, load_metric, DatasetDict
from datasets import load_from_disk, load_metric
import tensorflow_hub as hub

<h2> Loading the data and the predictions </h2>



In [None]:
# Load the Drive helper and mount
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
dataset_path = 'drive/MyDrive/Colab Notebooks/w266_project_data'
csv_path = 'drive/MyDrive/Colab Notebooks/w266_project_predictions/'
model_path = 'drive/MyDrive/Colab Notebooks/w266_project_models/'

# for local runs
#dataset_path = 'w266_project_data/'
#csv_path = 'w266_project_predictions/'

In [None]:
# a list of model's output CSV file
output_file_names = ['davidson_t5_test_output.csv', 'davidson_xsum_output.csv', 'davidson_bart_cnn_test.csv', 
                     'davidson_paradetox_output.csv']

In [None]:
# load the dataset
#dataset = load_from_disk(dataset_path)

<h2> ROUGE </h2>

In [None]:
metric = load_metric("rouge")

  """Entry point for launching an IPython kernel.


In [None]:
for output_file_name in output_file_names:
  df_bart_predictions = pd.read_csv(csv_path + output_file_name)
  rouge_results = metric.compute(predictions=df_bart_predictions['test_predictions'],
                                 references=df_bart_predictions['test_inputs'])
  display(output_file_name)
  display(pd.concat({k: pd.DataFrame(v) for k, v in rouge_results.items()}))
  print()

'davidson_t5_test_output.csv'

Unnamed: 0,Unnamed: 1,precision,recall,fmeasure
rouge1,0,0.911126,0.761732,0.822481
rouge1,1,0.917726,0.768275,0.828884
rouge1,2,0.923702,0.774673,0.835086
rouge2,0,0.810395,0.668446,0.725545
rouge2,1,0.819946,0.677448,0.734775
rouge2,2,0.829081,0.6873,0.744073
rougeL,0,0.911014,0.761733,0.822884
rougeL,1,0.917408,0.768322,0.829001
rougeL,2,0.923335,0.774723,0.834734
rougeLsum,0,0.911043,0.761548,0.822281





'davidson_xsum_output.csv'

Unnamed: 0,Unnamed: 1,precision,recall,fmeasure
rouge1,0,0.845083,0.679163,0.738623
rouge1,1,0.853254,0.688723,0.747363
rouge1,2,0.86081,0.698353,0.75594
rouge2,0,0.708118,0.563698,0.613866
rouge2,1,0.719788,0.575764,0.625892
rouge2,2,0.730961,0.587758,0.637498
rougeL,0,0.844044,0.67915,0.738375
rougeL,1,0.851821,0.688429,0.746768
rougeL,2,0.859524,0.697702,0.75491
rougeLsum,0,0.843513,0.679092,0.738291





'davidson_bart_cnn_test.csv'

Unnamed: 0,Unnamed: 1,precision,recall,fmeasure
rouge1,0,0.864838,0.738182,0.785346
rouge1,1,0.872531,0.746641,0.793008
rouge1,2,0.879489,0.755464,0.800554
rouge2,0,0.748877,0.631331,0.674336
rouge2,1,0.759802,0.642912,0.685003
rouge2,2,0.770914,0.653253,0.695549
rougeL,0,0.864035,0.737194,0.784306
rougeL,1,0.87188,0.746282,0.792638
rougeL,2,0.87964,0.75529,0.801074
rougeLsum,0,0.863882,0.736764,0.783941





'davidson_paradetox_output.csv'

Unnamed: 0,Unnamed: 1,precision,recall,fmeasure
rouge1,0,0.868953,0.744537,0.792054
rouge1,1,0.876225,0.753219,0.800029
rouge1,2,0.883111,0.761563,0.807592
rouge2,0,0.745857,0.634071,0.675507
rouge2,1,0.755949,0.645085,0.686126
rouge2,2,0.767249,0.656141,0.696961
rougeL,0,0.868776,0.744334,0.792319
rougeL,1,0.875611,0.752782,0.799524
rougeL,2,0.882165,0.761255,0.806737
rougeLsum,0,0.868072,0.744189,0.791939





<h2> Meteor </h2>

In [None]:
meteor_metric = load_metric("meteor")

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [None]:
for output_file_name in output_file_names:
  df_bart_predictions = pd.read_csv(csv_path + output_file_name)
  meteor_results = meteor_metric.compute(predictions=df_bart_predictions['test_predictions'],
                                 references=df_bart_predictions['test_inputs'])
  display(output_file_name)
  display(meteor_results['meteor'])
  print()
  # display(pd.concat({k: pd.DataFrame(v) for k, v in meteor_results.items()}))
  # print()

'davidson_t5_test_output.csv'

0.7167514409578921




'davidson_xsum_output.csv'

0.6458234977131738




'davidson_bart_cnn_test.csv'

0.7053197655103902




'davidson_paradetox_output.csv'

0.7153864969289357




<h2> BLEU </h2>

In [None]:
bleu_metric = load_metric("bleu")

In [None]:
for output_file_name in output_file_names:
  df_bart_predictions = pd.read_csv(csv_path + output_file_name)
  bleu_predictions = [i.split(' ') for i in df_bart_predictions['test_predictions']]
  bleu_references = [[i.split(' ')] for i in df_bart_predictions['test_inputs']]

  bleu_results = bleu_metric.compute(predictions=bleu_predictions,
                                 references=bleu_references)
  display(output_file_name)
  bleu_results = pd.DataFrame.from_dict(bleu_results)
  display(bleu_results)
  print()

'davidson_t5_test_output.csv'

Unnamed: 0,bleu,precisions,brevity_penalty,length_ratio,translation_length,reference_length
0,0.644244,0.754807,1.0,1.020938,35156,34435
1,0.644244,0.676375,1.0,1.020938,35156,34435
2,0.644244,0.610457,1.0,1.020938,35156,34435
3,0.644244,0.552742,1.0,1.020938,35156,34435





'davidson_xsum_output.csv'

Unnamed: 0,bleu,precisions,brevity_penalty,length_ratio,translation_length,reference_length
0,0.540156,0.826484,0.784232,0.804473,24966,31034
1,0.540156,0.724364,0.784232,0.804473,24966,31034
2,0.540156,0.645813,0.784232,0.804473,24966,31034
3,0.540156,0.582105,0.784232,0.804473,24966,31034





'davidson_bart_cnn_test.csv'

Unnamed: 0,bleu,precisions,brevity_penalty,length_ratio,translation_length,reference_length
0,0.601981,0.846845,0.841476,0.852808,27260,31965
1,0.601981,0.752239,0.841476,0.852808,27260,31965
2,0.601981,0.674409,0.841476,0.852808,27260,31965
3,0.601981,0.60965,0.841476,0.852808,27260,31965





'davidson_paradetox_output.csv'

Unnamed: 0,bleu,precisions,brevity_penalty,length_ratio,translation_length,reference_length
0,0.605083,0.854317,0.844874,0.855748,27354,31965
1,0.605083,0.754983,0.844874,0.855748,27354,31965
2,0.605083,0.673585,0.844874,0.855748,27354,31965
3,0.605083,0.605541,0.844874,0.855748,27354,31965





<h2> BLEURT </h2>

In [None]:
bleurt_metric = load_metric('bleurt')



In [None]:
for output_file_name in output_file_names:
  df_bart_predictions = pd.read_csv(csv_path + output_file_name)
  bleurt_predictions = [i.split(' ') for i in df_bart_predictions['test_predictions']]
  bleurt_references = [[i.split(' ')] for i in df_bart_predictions['test_inputs']]

  bleurt_results = bleurt_metric.compute(predictions=bleurt_predictions,
                                 references=bleurt_references)
  display(output_file_name)
  bleurt_results = pd.DataFrame.from_dict(bleurt_results).describe()
  display(bleurt_results)
  print()

'davidson_t5_test_output.csv'

Unnamed: 0,scores
count,2470.0
mean,-0.123598
std,0.351144
min,-1.418136
25%,-0.342472
50%,-0.071609
75%,0.141084
max,0.530806





'davidson_xsum_output.csv'

Unnamed: 0,scores
count,2400.0
mean,8.5e-05
std,0.529452
min,-1.606651
25%,-0.400097
50%,-0.009584
75%,0.405093
max,0.913619





'davidson_bart_cnn_test.csv'

Unnamed: 0,scores
count,2470.0
mean,0.094409
std,0.523335
min,-1.540609
25%,-0.26165
50%,0.134301
75%,0.480619
max,0.919579





'davidson_paradetox_output.csv'

Unnamed: 0,scores
count,2470.0
mean,0.11568
std,0.494678
min,-1.512893
25%,-0.229349
50%,0.14243
75%,0.484531
max,0.910298





## universal sentence encoder

In [None]:
encoder = hub.load('https://tfhub.dev/google/universal-sentence-encoder/4')

In [None]:
pd.options.display.max_colwidth = 100
for output_file_name in output_file_names:
  df_bart_predictions = pd.read_csv(csv_path + output_file_name)
  universal_orig_encode = [i for i in df_bart_predictions['test_inputs']]
  universal_tweet_encode = [i for i in df_bart_predictions['test_predictions']]
  encoder([universal_tweet_encode[1]])

  similarity = []
  for x,y in zip(universal_orig_encode, universal_tweet_encode):
    new_list = [x,y]
    encoding_matrix = encoder(new_list)
    score = np.inner(encoding_matrix, encoding_matrix)
    #print(x,'\n',y,'\n', score,'\n') # score gives a matrix
    #print(x,'\n',y,'\n','semantic similarity:', score[0][1],'\n')
    similarity.append(score[0][1])
  
  display(output_file_name)
  similarity_results_df = pd.concat([df_bart_predictions, pd.Series(similarity).rename('similarity')],axis=1)
  similarity_average =  similarity_results_df['similarity'].mean()
  print('Similarity Average =', similarity_average)
  display(similarity_results_df.head())
  print()

'davidson_t5_test_output.csv'

Similarity Average = 0.76269335


Unnamed: 0,test_inputs,test_predictions,similarity
0,"summarize: ""Why would you wanna be the Green Ranger? He's evil!"" Duh bitch",Why would you wanna be the Green Ranger? He's evil,0.889683
1,summarize: #HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus,#HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus,0.942004
2,summarize: pancakes trash,pancakes are not good,0.647964
3,"summarize: The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Viking...","The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick....",0.97869
4,summarize: I be telling Mcgirt music ain't enough.You gotta have a non music related agenda.Them...,I be telling Mcgirt music ain't enough.You gotta have a non music related agenda.They want to se...,0.865244





'davidson_xsum_output.csv'

Similarity Average = 0.77993673


Unnamed: 0,test_inputs,test_predictions,similarity
0,"""Why would you wanna be the Green Ranger? He's evil!"" Duh bitch",Why would you wanna be the Green Ranger? He's evil.,0.956719
1,#HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus,#HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus,1.0
2,pancakes trash,pancakes are bad,0.815722
3,"The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick....","The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick.",0.899813
4,I be telling Mcgirt music ain't enough.You gotta have a non music related agenda.Them crackers w...,I be telling Mcgirt music ain't enough. You gotta have a non music related agenda,0.665541





'davidson_bart_cnn_test.csv'

Similarity Average = 0.80283576


Unnamed: 0,test_inputs,test_predictions,similarity
0,"""Why would you wanna be the Green Ranger? He's evil!"" Duh bitch",Why would you wanna be the Green Ranger? He's evil!,0.956719
1,#HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus,#HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus,1.0
2,pancakes trash,pancakes,0.808125
3,"The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick....","The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick....",1.0
4,I be telling Mcgirt music ain't enough.You gotta have a non music related agenda.Them crackers w...,I be telling Mcgirt music ain't enough. You gotta have a non music related agenda.,0.665541





'davidson_paradetox_output.csv'

Similarity Average = 0.8070117


Unnamed: 0,test_inputs,test_predictions,similarity
0,"""Why would you wanna be the Green Ranger? He's evil!"" Duh bitch",Why would you wanna be the Green Ranger? He's evil,0.956719
1,#HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus,God still share HIS #Secrets Amos 3:7 blessings #Jesus,0.962373
2,pancakes trash,Pancakes are not good.,0.793003
3,"The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick....","The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick.",0.899813
4,I be telling Mcgirt music ain't enough.You gotta have a non music related agenda.Them crackers w...,I be telling Mcgirt music ain't enough.You gotta have a non music related agenda.Them crackers w...,1.0





## STS (Semantic Textual Similarity) Benchmark

In [None]:
!pip install sentence-transformers --quiet
from sentence_transformers import SentenceTransformer, util

In [None]:
what = SentenceTransformer('stsb-roberta-large')

Downloading:   0%|          | 0.00/748 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/191 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/3.92k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/674 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/122 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.17k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/229 [00:00<?, ?B/s]

In [None]:
pd.options.display.max_colwidth = 100
for output_file_name in output_file_names:
  df_bart_predictions = pd.read_csv(csv_path + output_file_name)
  universal_orig_encode = [i for i in df_bart_predictions['test_inputs']]
  universal_tweet_encode = [i for i in df_bart_predictions['test_predictions']]
  # encode list of sentences to get their embeddings
  embedding1 = what.encode(universal_orig_encode, convert_to_tensor=True)
  embedding2 = what.encode(universal_tweet_encode, convert_to_tensor=True)

  # compute similarity scores of two embeddings
  sts_cosine_scores = util.pytorch_cos_sim(embedding1, embedding2)

  sts_cos_score = []

  for i in range(len(universal_orig_encode)):
    #print("Sentence 1:", universal_orig_encode[i])
    #print("Sentence 2:", universal_tweet_encode[i])
    sts_score = sts_cosine_scores[i][i].item()
    #print("Similarity Score:", score)
    sts_cos_score.append(sts_score)
    #print()
  
  display(output_file_name)
  sts_results_df = pd.concat([df_bart_predictions, pd.Series(sts_cos_score).rename('cos_similarity')],axis=1)
  sts_average =  sts_results_df['cos_similarity'].mean()
  print('STS Average =', sts_average)
  display(sts_results_df.head())
  print()

'davidson_t5_test_output.csv'

STS Average = 0.8118323946484791


Unnamed: 0,test_inputs,test_predictions,cos_similarity
0,"summarize: ""Why would you wanna be the Green Ranger? He's evil!"" Duh bitch",Why would you wanna be the Green Ranger? He's evil,0.904447
1,summarize: #HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus,#HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus,0.97178
2,summarize: pancakes trash,pancakes are not good,0.467036
3,"summarize: The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Viking...","The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick....",0.990039
4,summarize: I be telling Mcgirt music ain't enough.You gotta have a non music related agenda.Them...,I be telling Mcgirt music ain't enough.You gotta have a non music related agenda.They want to se...,0.943209





'davidson_xsum_output.csv'

STS Average = 0.7760304670066883


Unnamed: 0,test_inputs,test_predictions,cos_similarity
0,"""Why would you wanna be the Green Ranger? He's evil!"" Duh bitch",Why would you wanna be the Green Ranger? He's evil.,0.945268
1,#HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus,#HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus,0.999999
2,pancakes trash,pancakes are bad,0.781247
3,"The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick....","The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick.",0.859712
4,I be telling Mcgirt music ain't enough.You gotta have a non music related agenda.Them crackers w...,I be telling Mcgirt music ain't enough. You gotta have a non music related agenda,0.758244





'davidson_bart_cnn_test.csv'

STS Average = 0.7950303416107874


Unnamed: 0,test_inputs,test_predictions,cos_similarity
0,"""Why would you wanna be the Green Ranger? He's evil!"" Duh bitch",Why would you wanna be the Green Ranger? He's evil!,0.952777
1,#HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus,#HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus,0.999999
2,pancakes trash,pancakes,0.73933
3,"The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick....","The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick....",0.997633
4,I be telling Mcgirt music ain't enough.You gotta have a non music related agenda.Them crackers w...,I be telling Mcgirt music ain't enough. You gotta have a non music related agenda.,0.761215





'davidson_paradetox_output.csv'

STS Average = 0.8027946511081793


Unnamed: 0,test_inputs,test_predictions,cos_similarity
0,"""Why would you wanna be the Green Ranger? He's evil!"" Duh bitch",Why would you wanna be the Green Ranger? He's evil,0.951679
1,#HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus,God still share HIS #Secrets Amos 3:7 blessings #Jesus,0.977319
2,pancakes trash,Pancakes are not good.,0.701987
3,"The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick....","The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick.",0.859712
4,I be telling Mcgirt music ain't enough.You gotta have a non music related agenda.Them crackers w...,I be telling Mcgirt music ain't enough.You gotta have a non music related agenda.Them crackers w...,0.998993





### examine a few predictions in each model

In [None]:
print('The first 5 example\'s input and label')
pd.options.display.max_colwidth = 100
display(df_bart_predictions['test_inputs'][:5])

The first 5 example's input and label


0                                        "Why would you wanna be the Green Ranger? He's evil!" Duh bitch
1                                     #HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus
2                                                                                         pancakes trash
3    The KFAN mock draft continues, Cleveland is "on the clock". Paul Allen has the Vikings 8th pick....
4    I be telling Mcgirt music ain't enough.You gotta have a non music related agenda.Them crackers w...
Name: test_inputs, dtype: object

In [None]:
# examine a few predictions in each model
pd.options.display.max_colwidth = 100
for output_file_name in output_file_names:
  df_bart_predictions = pd.read_csv(csv_path + output_file_name)
  display(output_file_name)
  display(df_bart_predictions.head(5))
  print()

'davidson_t5_test_output.csv'

Unnamed: 0,test_inputs,test_predictions
0,"summarize: ""Why would you wanna be the Green Ranger? He's evil!"" Duh bitch",Why would you wanna be the Green Ranger? He's evil
1,summarize: #HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus,#HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus
2,summarize: pancakes trash,pancakes are not good
3,"summarize: The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Viking...","The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick...."
4,summarize: I be telling Mcgirt music ain't enough.You gotta have a non music related agenda.Them...,I be telling Mcgirt music ain't enough.You gotta have a non music related agenda.They want to se...





'davidson_xsum_output.csv'

Unnamed: 0,test_inputs,test_predictions
0,"""Why would you wanna be the Green Ranger? He's evil!"" Duh bitch",Why would you wanna be the Green Ranger? He's evil.
1,#HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus,#HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus
2,pancakes trash,pancakes are bad
3,"The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick....","The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick."
4,I be telling Mcgirt music ain't enough.You gotta have a non music related agenda.Them crackers w...,I be telling Mcgirt music ain't enough. You gotta have a non music related agenda





'davidson_bart_cnn_test.csv'

Unnamed: 0,test_inputs,test_predictions
0,"""Why would you wanna be the Green Ranger? He's evil!"" Duh bitch",Why would you wanna be the Green Ranger? He's evil!
1,#HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus,#HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus
2,pancakes trash,pancakes
3,"The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick....","The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick...."
4,I be telling Mcgirt music ain't enough.You gotta have a non music related agenda.Them crackers w...,I be telling Mcgirt music ain't enough. You gotta have a non music related agenda.





'davidson_paradetox_output.csv'

Unnamed: 0,test_inputs,test_predictions
0,"""Why would you wanna be the Green Ranger? He's evil!"" Duh bitch",Why would you wanna be the Green Ranger? He's evil
1,#HolySpirit God still share HIS #Secrets Amos 3:7 blessings #Jesus,God still share HIS #Secrets Amos 3:7 blessings #Jesus
2,pancakes trash,Pancakes are not good.
3,"The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick....","The KFAN mock draft continues, Cleveland is ""on the clock"". Paul Allen has the Vikings 8th pick."
4,I be telling Mcgirt music ain't enough.You gotta have a non music related agenda.Them crackers w...,I be telling Mcgirt music ain't enough.You gotta have a non music related agenda.Them crackers w...



