## Import Library

In [34]:
import re
import nltk
import string
import pickle
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt

from nltk.corpus import wordnet
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize 
from nltk.stem import WordNetLemmatizer 

nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')
warnings.simplefilter(action="ignore", category=FutureWarning)

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Legion\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Legion\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping taggers\averaged_perceptron_tagger.zip.


## Load Data

In [55]:
file = open("data/quotexresponse.pkl", "rb")
df = pickle.load(file)
file.close()

In [56]:
df

Unnamed: 0,presented_quote,presented_response,emotion_fact,disagree_agree,plain_sarcasm
0,"I got a good idea. however, they do tend to st...",By your own admission you havenÂ’t 'hung out' ...,feeling-based,disagreement,unsure
1,Be sure to give your guns a big fat kiss tonig...,"Actually, they didn't. The whole tragedy was c...",unsure,disagreement,unsure
2,One of the biggest arguments against gun contr...,Not quite. To be more correct regarding govern...,fact-based,disagreement,no_sarcasm
3,"First of all, compare the ""B"" specimen in your...",At your service:\nComparison\nI could've just ...,unsure,disagreement,unsure
4,There are some incedents that are beyond your ...,Well yes.,feeling-based,agreement,no_sarcasm
...,...,...,...,...,...
9977,The ID movements form of ID states that there ...,"That , of course, is the logical fallacy known...",fact-based,disagreement,no_sarcasm
9978,"For me, it would therefore have made no differ...",It logically follows from the moral foundation...,feeling-based,agreement,no_sarcasm
9979,good thing this argument has never been done!....,"And teen sex doesn't, by the very nature of it...",feeling-based,unsure,no_sarcasm
9980,"I know one thing, anything that happens, polit...",Wasn't sinjin crowing about his plans to take ...,feeling-based,unsure,no_sarcasm


## Preprocessing Experiments

In [57]:
text = df.loc[:1, ['presented_quote', 'presented_response']].to_numpy()
text = text.reshape([4,])
for i in range(text.shape[0]):
    print(text[i])
    print()

I got a good idea. however, they do tend to stay with their own.

By your own admission you havenÂ’t 'hung out' with stoners for a while and you're making generalisations about them to people who do spend a lot of time with stoners?

Be sure to give your guns a big fat kiss tonight before you go to bed tonight 'cuz guns did really good today.

Actually, they didn't. The whole tragedy was caused by gun control. If even one student was packing when that occured, 33 lives could have been saved. But no, more victims of botched laws and corrupt politicians.



In [65]:
lemmatizer = WordNetLemmatizer()

def nltk_tag_to_wordnet_tag(nltk_tag):
    if nltk_tag.startswith('J'):
        return wordnet.ADJ
    elif nltk_tag.startswith('V'):
        return wordnet.VERB
    elif nltk_tag.startswith('N'):
        return wordnet.NOUN
    elif nltk_tag.startswith('R'):
        return wordnet.ADV
    else:          
        return None
    
def lemmatize_sentence(sentence):

    nltk_tagged = nltk.pos_tag(nltk.word_tokenize(sentence))  
    wordnet_tagged = map(lambda x: (x[0], nltk_tag_to_wordnet_tag(x[1])), nltk_tagged)
    
    lemmatized_sentence = []
    for word, tag in wordnet_tagged:
        if tag is None:
            lemmatized_sentence.append(word)
        else:        
            lemmatized_sentence.append(lemmatizer.lemmatize(word, tag))
    return " ".join(lemmatized_sentence)

def sentence_pos_tag(sentence):
    text = word_tokenize(sentence)
    pos_tag = nltk.pos_tag(text)
    pos_tag_res = ''
    for i in range(len(pos_tag)):
        pos_tag_res += pos_tag[i][1]
        pos_tag_res += ' ' if i != len(sentence)-1 else '' 
    return pos_tag_res

def preprocessing(text):
    text = text.lower()
    text = text.strip()
    text = re.sub(r" \d+ ", " ", text)
    text = text.translate(str.maketrans("", "", string.punctuation))
    text = re.sub(r"[^a-z ]", "", text)
    text = re.sub(r"  ", " ", text)
    text = lemmatize_sentence(text)
    return text

In [59]:
for i in range(text.shape[0]):
    print(text[i])
    text[i] = preprocessing(text[i])
    
    print(text[i])
    print()

I got a good idea. however, they do tend to stay with their own.
i get a good idea however they do tend to stay with their own

By your own admission you havenÂ’t 'hung out' with stoners for a while and you're making generalisations about them to people who do spend a lot of time with stoners?
by your own admission you havent hang out with stoner for a while and youre making generalisation about them to people who do spend a lot of time with stoner

Be sure to give your guns a big fat kiss tonight before you go to bed tonight 'cuz guns did really good today.
be sure to give your gun a big fat kiss tonight before you go to bed tonight cuz gun do really good today

Actually, they didn't. The whole tragedy was caused by gun control. If even one student was packing when that occured, 33 lives could have been saved. But no, more victims of botched laws and corrupt politicians.
actually they didnt the whole tragedy be cause by gun control if even one student be pack when that occur life coul

In [64]:
text = word_tokenize('actually they didnt the whole tragedy be cause by gun control if even one student be pack when that occur life could have be save but no more victim of botch law and corrupt politician')
pos_tag = nltk.pos_tag(text)
print(pos_tag)
pos_tag = sentence_pos_tag(pos_tag)
pos_tag

[('actually', 'RB'), ('they', 'PRP'), ('didnt', 'VBP'), ('the', 'DT'), ('whole', 'JJ'), ('tragedy', 'NN'), ('be', 'VB'), ('cause', 'VBN'), ('by', 'IN'), ('gun', 'NN'), ('control', 'NN'), ('if', 'IN'), ('even', 'RB'), ('one', 'CD'), ('student', 'NN'), ('be', 'VB'), ('pack', 'VBN'), ('when', 'WRB'), ('that', 'DT'), ('occur', 'VBP'), ('life', 'NN'), ('could', 'MD'), ('have', 'VB'), ('be', 'VB'), ('save', 'VBN'), ('but', 'CC'), ('no', 'DT'), ('more', 'JJR'), ('victim', 'NN'), ('of', 'IN'), ('botch', 'NN'), ('law', 'NN'), ('and', 'CC'), ('corrupt', 'JJ'), ('politician', 'NN')]
1


'RB PRP VBP DT JJ NN VB VBN IN NN NN IN RB CD NN VB VBN WRB DT VBP NN MD VB VB VBN CC DT JJR NN IN NN NN CC JJ NN'

## Update Database

In [66]:
df['presented_quote'] = df['presented_quote'].apply(lambda text: preprocessing(text))
df['presented_response'] = df['presented_response'].apply(lambda text: preprocessing(text))
df['presented_quote_tag'] = df['presented_quote'].apply(lambda text: sentence_pos_tag(text))
df['presented_response_tag'] = df['presented_response'].apply(lambda text: sentence_pos_tag(text))

df

Unnamed: 0,presented_quote,presented_response,emotion_fact,disagree_agree,plain_sarcasm,presented_quote_tag,presented_response_tag
0,i get a good idea however they do tend to stay...,by your own admission you havent hang out with...,feeling-based,disagreement,unsure,NN VBP DT JJ NN RB PRP VBP VB TO VB IN PRP$ JJ,IN PRP$ JJ NN PRP VBP VB RP IN NN IN DT NN CC ...
1,be sure to give your gun a big fat kiss tonigh...,actually they didnt the whole tragedy be cause...,unsure,disagreement,unsure,VB JJ TO VB PRP$ NN DT JJ NN NN NN IN PRP VBP ...,RB PRP VBP DT JJ NN VB VBN IN NN NN IN RB CD N...
2,one of the big argument against gun control be...,not quite to be more correct regard government...,fact-based,disagreement,no_sarcasm,CD IN DT JJ NN IN NN NN VB IN IN DT NN VB RB J...,RB RB TO VB RBR JJ JJ NN CC VB DT NN TO VB NN ...
3,first of all compare the b specimen in your fo...,at your servicecomparisoni couldve just circle...,unsure,disagreement,unsure,RB IN DT VBP DT NN NNS IN PRP$ NN NN TO DT JJ ...,IN PRP$ JJ NN RB VB DT JJ NN CC DT VBP PRP JJ ...
4,there be some incedents that be beyond your co...,well yes,feeling-based,agreement,no_sarcasm,RB VB DT NNS WDT VB IN PRP$ NN IN PRP VBP DT N...,RB RB
...,...,...,...,...,...,...,...
9977,the id movement form of id state that there be...,that of course be the logical fallacy know as ...,fact-based,disagreement,no_sarcasm,DT JJ NN NN IN JJ NN IN EX VB JJ NN IN DT NN I...,DT IN NN VB DT JJ NN VBP IN JJ NN DT NN PRP VB...
9978,for me it would therefore have make no differe...,it logically follow from the moral foundation ...,feeling-based,agreement,no_sarcasm,IN PRP PRP MD RB VB NN DT NN IN NNS VBP VB VBN...,PRP RB VBP IN DT JJ NN VBN RP CC PRP VBP TO JJ...
9979,good thing this argument have never be doneoh ...,and teen sex doesnt by the very nature of its ...,feeling-based,unsure,no_sarcasm,JJ NN DT NN VBP RB VB JJ NN WDT VBZ PRP MD VB ...,CC JJ NN NN IN DT JJ NN IN PRP$ NN NN IN NN DT...
9980,i know one thing anything that happen politica...,wasnt sinjin crow about his plan to take the f...,feeling-based,unsure,no_sarcasm,NN VBP CD NN NN WDT VBZ RB JJ NNS MD VB PRP RB...,NN NN NN IN PRP$ NN TO VB DT NN NN CC NN TO DT...


In [68]:
file = open("data/quotexresponseprocessed.pkl", "wb")
pickle.dump(df, file)
file.close()

# RT remove, mention remove, Same class domination remove, url remove