# Add model: translation attention ecoder-decocer over the b4 dataset

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchtext import data
import pandas as pd
import unicodedata
import string
import re
import random
import copy
from contra_qa.plots.functions  import simple_step_plot, plot_confusion_matrix
import  matplotlib.pyplot as plt
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from nltk.translate.bleu_score import sentence_bleu


% matplotlib inline

### Preparing data

In [2]:
df2 = pd.read_csv("data/boolean4_train.csv")
df2_test = pd.read_csv("data/boolean4_test.csv")

df2["text"] = df2["sentence1"] + df2["sentence2"] 
df2_test["text"] = df2_test["sentence1"] + df2_test["sentence2"] 

all_sentences = list(df2.text.values) + list(df2_test.text.values)

df2train = df2

In [3]:
df2train.tail()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text
9995,Lynn drew ahead of the other runners and felt ...,Billie didn't draw ahead of the other runners,Lynn drew ahead of the other runners,Lynn felt an outsider,0,Lynn drew ahead of the other runners and felt ...
9996,Crystal slept for eight hours and dreamed of r...,Crystal didn't see him,Crystal slept for eight hours,Crystal dreamed of running in Lugoj,0,Crystal slept for eight hours and dreamed of r...
9997,Wallace lost Alberto's green car and showed Kr...,Alex didn't lose Alberto's green car,Wallace lost Alberto's green car,Wallace showed Kristen's old photos,0,Wallace lost Alberto's green car and showed Kr...
9998,Shelly put Beverly in charge and burned Chad's...,Shelly didn't burn Chad's house,Shelly put Beverly in charge,Shelly burned Chad's house,1,Shelly put Beverly in charge and burned Chad's...
9999,Daisy ate an apple and chose to live in Blaj,Daisy didn't choose to live in Blaj,Daisy ate an apple,Daisy chose to live in Blaj,1,Daisy ate an apple and chose to live in BlajDa...


In [4]:
# Turn a Unicode string to plain ASCII, thanks to
# http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn')

# Lowercase, trim, and remove non-letter characters

def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s


example = "ddddda'''~~çãpoeéééééÈ'''#$$##@!@!@AAS@#12323fdf"
print("Before:", example)
print()
print("After:", normalizeString(example))

Before: ddddda'''~~çãpoeéééééÈ'''#$$##@!@!@AAS@#12323fdf

After: ddddda capoeeeeeee ! ! aas fdf


In [5]:
df2train.and_A = df2train.and_A.map(normalizeString)
df2train.and_B = df2train.and_B.map(normalizeString)

In [6]:
df2train.tail()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text
9995,Lynn drew ahead of the other runners and felt ...,Billie didn't draw ahead of the other runners,lynn drew ahead of the other runners,lynn felt an outsider,0,Lynn drew ahead of the other runners and felt ...
9996,Crystal slept for eight hours and dreamed of r...,Crystal didn't see him,crystal slept for eight hours,crystal dreamed of running in lugoj,0,Crystal slept for eight hours and dreamed of r...
9997,Wallace lost Alberto's green car and showed Kr...,Alex didn't lose Alberto's green car,wallace lost alberto s green car,wallace showed kristen s old photos,0,Wallace lost Alberto's green car and showed Kr...
9998,Shelly put Beverly in charge and burned Chad's...,Shelly didn't burn Chad's house,shelly put beverly in charge,shelly burned chad s house,1,Shelly put Beverly in charge and burned Chad's...
9999,Daisy ate an apple and chose to live in Blaj,Daisy didn't choose to live in Blaj,daisy ate an apple,daisy chose to live in blaj,1,Daisy ate an apple and chose to live in BlajDa...


In [7]:
df2train["sentence1_p"] = df2train["and_A"] + " and " + df2train["and_B"]

In [8]:
df2train.head()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text,sentence1_p
0,Reginald caught a mango fish and ran from me,Reginald didn't catch a mango fish,reginald caught a mango fish,reginald ran from me,1,Reginald caught a mango fish and ran from meRe...,reginald caught a mango fish and reginald ran ...
1,Sandy thought I was living in Craiova and grew...,Sandy didn't think I was living in Craiova,sandy thought i was living in craiova,sandy grew up in jail,1,Sandy thought I was living in Craiova and grew...,sandy thought i was living in craiova and sand...
2,Brenda burned Chris's house and let use this pen,Brenda didn't let use this pen,brenda burned chris s house,brenda let use this pen,1,Brenda burned Chris's house and let use this p...,brenda burned chris s house and brenda let use...
3,Brenda sold Josephine's house and ate an apple,Brenda didn't eat an apple,brenda sold josephine s house,brenda ate an apple,1,Brenda sold Josephine's house and ate an apple...,brenda sold josephine s house and brenda ate a...
4,Tamara fell off and hung up on me,Tamara didn't ring from me,tamara fell off,tamara hung up on me,0,Tamara fell off and hung up on meTamara didn't...,tamara fell off and tamara hung up on me


In [9]:
df2train_plus = df2train[["sentence1_p", "sentence2", "label"]]

df2train_plus.sentence2 = df2train_plus.sentence2.map(normalizeString)
df2train_plus.rename(columns={"sentence1_p": "sentence1"}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


In [17]:
df2train_plus.head()

Unnamed: 0,sentence1,sentence2,label
0,reginald caught a mango fish and reginald ran ...,reginald didn t catch a mango fish,1
1,sandy thought i was living in craiova and sand...,sandy didn t think i was living in craiova,1
2,brenda burned chris s house and brenda let use...,brenda didn t let use this pen,1
3,brenda sold josephine s house and brenda ate a...,brenda didn t eat an apple,1
4,tamara fell off and tamara hung up on me,tamara didn t ring from me,0


In [10]:
df2train_plus.to_csv("data/boolean4_control_train.csv", index=False)

## generating new data for test

In [11]:
df2_test.and_A = df2_test.and_A.map(normalizeString)
df2_test.and_B = df2_test.and_B.map(normalizeString)

In [12]:
df2_test["sentence1_p"] = df2_test["and_A"] + " and " + df2_test["and_B"]

In [13]:
df2_test.head()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text,sentence1_p
0,Seth broke the rules and knew Antoinette's secret,Seth didn't know Antoinette's secret,seth broke the rules,seth knew antoinette s secret,1,Seth broke the rules and knew Antoinette's sec...,seth broke the rules and seth knew antoinette ...
1,Joy lent me Gerald's icy bicycle and blew it,Frances didn't lend me Gerald's icy bicycle,joy lent me gerald s icy bicycle,joy blew it,0,Joy lent me Gerald's icy bicycle and blew itFr...,joy lent me gerald s icy bicycle and joy blew it
2,Arlene grew up and ate an apple,Arlene didn't hang up on me,arlene grew up,arlene ate an apple,0,Arlene grew up and ate an appleArlene didn't h...,arlene grew up and arlene ate an apple
3,Beverly met him and froze Juana's bank account,Antoinette didn't freeze Juana's bank account,beverly met him,beverly froze juana s bank account,0,Beverly met him and froze Juana's bank account...,beverly met him and beverly froze juana s bank...
4,Julie beat her and sang a nice song,Elmer didn't beat her,julie beat her,julie sang a nice song,0,Julie beat her and sang a nice songElmer didn'...,julie beat her and julie sang a nice song


In [14]:
df2_test_plus = df2_test[["sentence1_p", "sentence2", "label"]]

df2_test_plus.sentence2 = df2_test_plus.sentence2.map(normalizeString)
df2_test_plus.rename(columns={"sentence1_p": "sentence1"}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


In [15]:
df2_test_plus.head()

Unnamed: 0,sentence1,sentence2,label
0,seth broke the rules and seth knew antoinette ...,seth didn t know antoinette s secret,1
1,joy lent me gerald s icy bicycle and joy blew it,frances didn t lend me gerald s icy bicycle,0
2,arlene grew up and arlene ate an apple,arlene didn t hang up on me,0
3,beverly met him and beverly froze juana s bank...,antoinette didn t freeze juana s bank account,0
4,julie beat her and julie sang a nice song,elmer didn t beat her,0


In [16]:
df2_test_plus.to_csv("data/boolean4_control_test.csv", index=False)