# Add model: translation attention ecoder-decocer over the b3 dataset

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchtext import data
import pandas as pd
import unicodedata
import string
import re
import random
import copy
from contra_qa.plots.functions  import simple_step_plot, plot_confusion_matrix
import  matplotlib.pyplot as plt
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from nltk.translate.bleu_score import sentence_bleu


% matplotlib inline

### Preparing data

In [2]:
df2 = pd.read_csv("data/boolean3_train.csv")
df2_test = pd.read_csv("data/boolean3_test.csv")

df2["text"] = df2["sentence1"] + df2["sentence2"] 
df2_test["text"] = df2_test["sentence1"] + df2_test["sentence2"] 

all_sentences = list(df2.text.values) + list(df2_test.text.values)

df2train = df2

In [3]:
df2train.tail()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text
9995,Dawn went to Mangalia and Dej,Ramon didn't go to Dej,Dawn went to Mangalia,Dawn went to Dej,0,Dawn went to Mangalia and DejRamon didn't go t...
9996,Dorothy has traveled to Mangalia and Giurgiu,Loretta didn't travel to Mangalia,Dorothy has traveled to Mangalia,Dorothy has traveled to Giurgiu,0,Dorothy has traveled to Mangalia and GiurgiuLo...
9997,Beverly has visited Giurgiu and Dej,Beverly didn't visit Reghin,Beverly has visited Giurgiu,Beverly has visited Dej,0,Beverly has visited Giurgiu and DejBeverly did...
9998,Flora has visited Baia Mare and Hunedoara,Flora didn't visit Bucharest,Flora has visited Baia Mare,Flora has visited Hunedoara,0,Flora has visited Baia Mare and HunedoaraFlora...
9999,Cora and Ruby have traveled to Blaj,Toni didn't travel to Blaj,Cora has traveled to Blaj,Ruby has traveled to Blaj,0,Cora and Ruby have traveled to BlajToni didn't...


In [4]:
# Turn a Unicode string to plain ASCII, thanks to
# http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn')

# Lowercase, trim, and remove non-letter characters

def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s


example = "ddddda'''~~çãpoeéééééÈ'''#$$##@!@!@AAS@#12323fdf"
print("Before:", example)
print()
print("After:", normalizeString(example))

Before: ddddda'''~~çãpoeéééééÈ'''#$$##@!@!@AAS@#12323fdf

After: ddddda capoeeeeeee ! ! aas fdf


In [5]:
df2train.and_A = df2train.and_A.map(normalizeString)
df2train.and_B = df2train.and_B.map(normalizeString)

In [6]:
df2train.tail()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text
9995,Dawn went to Mangalia and Dej,Ramon didn't go to Dej,dawn went to mangalia,dawn went to dej,0,Dawn went to Mangalia and DejRamon didn't go t...
9996,Dorothy has traveled to Mangalia and Giurgiu,Loretta didn't travel to Mangalia,dorothy has traveled to mangalia,dorothy has traveled to giurgiu,0,Dorothy has traveled to Mangalia and GiurgiuLo...
9997,Beverly has visited Giurgiu and Dej,Beverly didn't visit Reghin,beverly has visited giurgiu,beverly has visited dej,0,Beverly has visited Giurgiu and DejBeverly did...
9998,Flora has visited Baia Mare and Hunedoara,Flora didn't visit Bucharest,flora has visited baia mare,flora has visited hunedoara,0,Flora has visited Baia Mare and HunedoaraFlora...
9999,Cora and Ruby have traveled to Blaj,Toni didn't travel to Blaj,cora has traveled to blaj,ruby has traveled to blaj,0,Cora and Ruby have traveled to BlajToni didn't...


In [7]:
df2train["sentence1_p"] = df2train["and_A"] + " and " + df2train["and_B"]

In [8]:
df2train.head()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text,sentence1_p
0,June and Ross have traveled to Arad,Ross didn't travel to Arad,june has traveled to arad,ross has traveled to arad,1,June and Ross have traveled to AradRoss didn't...,june has traveled to arad and ross has travele...
1,Claudia has traveled to Mangalia and Slobozia,Claudia didn't travel to Mangalia,claudia has traveled to mangalia,claudia has traveled to slobozia,1,Claudia has traveled to Mangalia and SloboziaC...,claudia has traveled to mangalia and claudia h...
2,Byron has traveled to Turda and Bucharest,Dwight didn't travel to Bucharest,byron has traveled to turda,byron has traveled to bucharest,0,Byron has traveled to Turda and BucharestDwigh...,byron has traveled to turda and byron has trav...
3,Emily has visited Deva and Arad,Emily didn't visit Arad,emily has visited deva,emily has visited arad,1,Emily has visited Deva and AradEmily didn't vi...,emily has visited deva and emily has visited arad
4,Tyler and Nancy have visited Bucharest,Tyler didn't visit Bucharest,tyler has visited bucharest,nancy has visited bucharest,1,Tyler and Nancy have visited BucharestTyler di...,tyler has visited bucharest and nancy has visi...


In [9]:
df2train_plus = df2train[["sentence1_p", "sentence2", "label"]]

df2train_plus.sentence2 = df2train_plus.sentence2.map(normalizeString)
df2train_plus.rename(columns={"sentence1_p": "sentence1"}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


In [10]:
df2train_plus.to_csv("data/boolean3_control_train.csv", index=False)

## generating new data for test

In [11]:
df2_test.and_A = df2_test.and_A.map(normalizeString)
df2_test.and_B = df2_test.and_B.map(normalizeString)

In [12]:
df2_test["sentence1_p"] = df2_test["and_A"] + " and " + df2_test["and_B"]

In [13]:
df2_test.head()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text,sentence1_p
0,Karen has visited Bucharest and Oradea,Karen didn't visit Oradea,karen has visited bucharest,karen has visited oradea,1,Karen has visited Bucharest and OradeaKaren di...,karen has visited bucharest and karen has visi...
1,Fannie has traveled to Slobozia and Dej,Colleen didn't travel to Slobozia,fannie has traveled to slobozia,fannie has traveled to dej,0,Fannie has traveled to Slobozia and DejColleen...,fannie has traveled to slobozia and fannie has...
2,Calvin has visited Tulcea and Timisoara,Leo didn't visit Timisoara,calvin has visited tulcea,calvin has visited timisoara,0,Calvin has visited Tulcea and TimisoaraLeo did...,calvin has visited tulcea and calvin has visit...
3,Melvin has traveled to Blaj and Vaslui,Melvin didn't travel to Blaj,melvin has traveled to blaj,melvin has traveled to vaslui,1,Melvin has traveled to Blaj and VasluiMelvin d...,melvin has traveled to blaj and melvin has tra...
4,Philip and Dana went to Arad,Dana didn't go to Arad,philip went to arad,dana went to arad,1,Philip and Dana went to AradDana didn't go to ...,philip went to arad and dana went to arad


In [14]:
df2_test_plus = df2_test[["sentence1_p", "sentence2", "label"]]

df2_test_plus.sentence2 = df2_test_plus.sentence2.map(normalizeString)
df2_test_plus.rename(columns={"sentence1_p": "sentence1"}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


In [15]:
df2_test_plus.head()

Unnamed: 0,sentence1,sentence2,label
0,karen has visited bucharest and karen has visi...,karen didn t visit oradea,1
1,fannie has traveled to slobozia and fannie has...,colleen didn t travel to slobozia,0
2,calvin has visited tulcea and calvin has visit...,leo didn t visit timisoara,0
3,melvin has traveled to blaj and melvin has tra...,melvin didn t travel to blaj,1
4,philip went to arad and dana went to arad,dana didn t go to arad,1


In [16]:
df2_test_plus.to_csv("data/boolean3_control_test.csv", index=False)