# Add model: translation attention ecoder-decocer over the b3 dataset

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchtext import data
import pandas as pd
import unicodedata
import string
import re
import random
import copy
from contra_qa.plots.functions  import simple_step_plot, plot_confusion_matrix
import  matplotlib.pyplot as plt
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from nltk.translate.bleu_score import sentence_bleu


% matplotlib inline

### Preparing data

In [2]:
df2 = pd.read_csv("data/boolean3_train.csv")
df2_test = pd.read_csv("data/boolean3_test.csv")

df2["text"] = df2["sentence1"] + df2["sentence2"] 
df2_test["text"] = df2_test["sentence1"] + df2_test["sentence2"] 

all_sentences = list(df2.text.values) + list(df2_test.text.values)

df2train = df2

In [3]:
df2train.tail()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text
9995,Dawn went to Mangalia and Dej,Ramon didn't go to Dej,Dawn went to Mangalia,Dawn went to Dej,0,Dawn went to Mangalia and DejRamon didn't go t...
9996,Dorothy has traveled to Mangalia and Giurgiu,Loretta didn't travel to Mangalia,Dorothy has traveled to Mangalia,Dorothy has traveled to Giurgiu,0,Dorothy has traveled to Mangalia and GiurgiuLo...
9997,Beverly has visited Giurgiu and Dej,Beverly didn't visit Reghin,Beverly has visited Giurgiu,Beverly has visited Dej,0,Beverly has visited Giurgiu and DejBeverly did...
9998,Flora has visited Baia Mare and Hunedoara,Flora didn't visit Bucharest,Flora has visited Baia Mare,Flora has visited Hunedoara,0,Flora has visited Baia Mare and HunedoaraFlora...
9999,Cora and Ruby have traveled to Blaj,Toni didn't travel to Blaj,Cora has traveled to Blaj,Ruby has traveled to Blaj,0,Cora and Ruby have traveled to BlajToni didn't...


In [5]:
SOS_token = 0
EOS_token = 1

class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [6]:
# Turn a Unicode string to plain ASCII, thanks to
# http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn')

# Lowercase, trim, and remove non-letter characters

def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s


example = "ddddda'''~~çãpoeéééééÈ'''#$$##@!@!@AAS@#12323fdf"
print("Before:", example)
print()
print("After:", normalizeString(example))

Before: ddddda'''~~çãpoeéééééÈ'''#$$##@!@!@AAS@#12323fdf

After: ddddda capoeeeeeee ! ! aas fdf


In [7]:
pairs_A = list(zip(list(df2train.sentence1.values), list(df2train.and_A.values)))
pairs_B = list(zip(list(df2train.sentence1.values), list(df2train.and_B.values)))
pairs_A = [(normalizeString(s1), normalizeString(s2)) for s1, s2 in pairs_A]
pairs_B = [(normalizeString(s1), normalizeString(s2)) for s1, s2 in pairs_B]



In [8]:
all_text_pairs = zip(all_sentences, all_sentences)
all_text_pairs = [(normalizeString(s1), normalizeString(s2)) for s1, s2 in all_text_pairs]

In [9]:
def readLangs(lang1, lang2, pairs, reverse=False):
    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [tuple(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

In [10]:
f = lambda x: len(x.split(" "))

MAX_LENGTH = np.max(list(map(f, all_sentences)))

In [11]:
def filterPair(p):
    cond1 = len(p[0].split(' ')) < MAX_LENGTH
    cond2 = len(p[1].split(' ')) < MAX_LENGTH 
    return cond1 and cond2

def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]


In [12]:
def prepareData(lang1, lang2, pairs, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, pairs, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs

In [13]:
_, _, training_pairs_A = prepareData("eng_enc",
                                             "eng_dec",
                                             pairs_A)

print()


input_lang, _, _ = prepareData("eng_enc",
                               "eng_dec",
                               all_text_pairs)

output_lang = copy.deepcopy(input_lang)


Read 10000 sentence pairs
Trimmed to 10000 sentence pairs
Counting words...
Counted words:
eng_enc 685
eng_dec 683

Read 11000 sentence pairs
Trimmed to 10714 sentence pairs
Counting words...
Counted words:
eng_enc 8008
eng_dec 8008


In [14]:
_, _, training_pairs_B = prepareData("eng_enc",
                                     "eng_dec",
                                     pairs_B)

Read 10000 sentence pairs
Trimmed to 10000 sentence pairs
Counting words...
Counted words:
eng_enc 685
eng_dec 683


### sentences 2 tensors

In [15]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]

In [16]:
def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)

In [17]:
def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

In [18]:
def tensorsFromTriple(triple):
    input_tensor = tensorFromSentence(input_lang, triple[0])
    target_tensor = tensorFromSentence(output_lang, triple[1])
    label_tensor = torch.tensor(triple[2], dtype=torch.long).view((1))
    return (input_tensor, target_tensor, label_tensor)

### models

In [19]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [20]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))
        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [21]:
hidden_size = 256
eng_enc_v_size = input_lang.n_words
eng_dec_v_size = output_lang.n_words

In [22]:
input_lang.n_words

8008

In [23]:
encoderA = EncoderRNN(eng_enc_v_size, hidden_size)
decoderA = AttnDecoderRNN(hidden_size, eng_dec_v_size)
encoderA.load_state_dict(torch.load("b3_encoder1_att.pkl"))
decoderA.load_state_dict(torch.load("b3_decoder1_att.pkl"))

In [24]:
encoderB = EncoderRNN(eng_enc_v_size, hidden_size)
decoderB = AttnDecoderRNN(hidden_size, eng_dec_v_size)
encoderB.load_state_dict(torch.load("b3_encoder2_att.pkl"))
decoderB.load_state_dict(torch.load("b3_decoder2_att.pkl"))

## translating

In [25]:
def translate(encoder,
              decoder,
              sentence,
              max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(
            max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
            _, topone = decoder_output.data.topk(1)
            if topone.item() == EOS_token:
                break
            else:
                decoded_words.append(output_lang.index2word[topone.item()])

            decoder_input = topone.squeeze().detach()

        return " ".join(decoded_words)

In [31]:
def projectA(sent):
    neural_translation = translate(encoderA,
                                   decoderA,
                                   sent,
                                   max_length=MAX_LENGTH)
    return neural_translation


def projectB(sent):
    neural_translation = translate(encoderB,
                                   decoderB,
                                   sent,
                                   max_length=MAX_LENGTH)
    return neural_translation

## translation of a trained model: and A

In [30]:
for t in training_pairs_A[0:3]:
    print("input_sentence : " + t[0])
    neural_translation = projectA(t[0])
    print("neural translation : " + neural_translation)
    reference = t[1]
    print("reference translation : " + reference)
    reference = reference.split(" ")
    candidate = neural_translation.split(" ")
    score = sentence_bleu([reference], candidate)
    print("blue score = {:.2f}".format(score))
    print()

input_sentence : june and ross have traveled to arad
neural translation : alison has traveled to arad
reference translation : june has traveled to arad
blue score = 0.67

input_sentence : claudia has traveled to mangalia and slobozia
neural translation : carl has traveled to mangalia
reference translation : claudia has traveled to mangalia
blue score = 0.67

input_sentence : byron has traveled to turda and bucharest
neural translation : jackie has traveled to turda
reference translation : byron has traveled to turda
blue score = 0.67



## translation of a trained model: and B

In [29]:
for t in training_pairs_B[0:3]:
    print("input_sentence : " + t[0])
    neural_translation = projectB(t[0])
    print("neural translation : " + neural_translation)
    reference = t[1]
    print("reference translation : " + reference)
    reference = reference.split(" ")
    candidate = neural_translation.split(" ")
    score = sentence_bleu([reference], candidate)
    print("blue score = {:.2f}".format(score))
    print()

input_sentence : june and ross have traveled to arad
neural translation : sandy has traveled to arad
reference translation : ross has traveled to arad
blue score = 0.67

input_sentence : claudia has traveled to mangalia and slobozia
neural translation : sandy has traveled to slobozia
reference translation : claudia has traveled to slobozia
blue score = 0.67

input_sentence : byron has traveled to turda and bucharest
neural translation : sandy has traveled to bucharest
reference translation : byron has traveled to bucharest
blue score = 0.67



## generating new data for training

In [38]:
df2train.sentence1 = df2train.sentence1.map(normalizeString)

In [39]:
df2train["project A"] = df2train.sentence1.map(projectA)

In [40]:
df2.head()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text,project A
0,june and ross have traveled to arad,Ross didn't travel to Arad,June has traveled to Arad,Ross has traveled to Arad,1,June and Ross have traveled to AradRoss didn't...,alison has traveled to arad
1,claudia has traveled to mangalia and slobozia,Claudia didn't travel to Mangalia,Claudia has traveled to Mangalia,Claudia has traveled to Slobozia,1,Claudia has traveled to Mangalia and SloboziaC...,carl has traveled to mangalia
2,byron has traveled to turda and bucharest,Dwight didn't travel to Bucharest,Byron has traveled to Turda,Byron has traveled to Bucharest,0,Byron has traveled to Turda and BucharestDwigh...,jackie has traveled to turda
3,emily has visited deva and arad,Emily didn't visit Arad,Emily has visited Deva,Emily has visited Arad,1,Emily has visited Deva and AradEmily didn't vi...,alison has visited deva
4,tyler and nancy have visited bucharest,Tyler didn't visit Bucharest,Tyler has visited Bucharest,Nancy has visited Bucharest,1,Tyler and Nancy have visited BucharestTyler di...,erin has visited bucharest


In [41]:
df2train["project B"] = df2train.sentence1.map(projectB)

In [42]:
df2.head()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text,project A,project B
0,june and ross have traveled to arad,Ross didn't travel to Arad,June has traveled to Arad,Ross has traveled to Arad,1,June and Ross have traveled to AradRoss didn't...,alison has traveled to arad,sandy has traveled to arad
1,claudia has traveled to mangalia and slobozia,Claudia didn't travel to Mangalia,Claudia has traveled to Mangalia,Claudia has traveled to Slobozia,1,Claudia has traveled to Mangalia and SloboziaC...,carl has traveled to mangalia,sandy has traveled to slobozia
2,byron has traveled to turda and bucharest,Dwight didn't travel to Bucharest,Byron has traveled to Turda,Byron has traveled to Bucharest,0,Byron has traveled to Turda and BucharestDwigh...,jackie has traveled to turda,sandy has traveled to bucharest
3,emily has visited deva and arad,Emily didn't visit Arad,Emily has visited Deva,Emily has visited Arad,1,Emily has visited Deva and AradEmily didn't vi...,alison has visited deva,sandy has visited arad
4,tyler and nancy have visited bucharest,Tyler didn't visit Bucharest,Tyler has visited Bucharest,Nancy has visited Bucharest,1,Tyler and Nancy have visited BucharestTyler di...,erin has visited bucharest,sandy has visited bucharest


In [43]:
df2train["sentence1_p"] = df2train["project A"] + " and " + df2train["project B"]

In [44]:
df2.head()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text,project A,project B,sentence1_p
0,june and ross have traveled to arad,Ross didn't travel to Arad,June has traveled to Arad,Ross has traveled to Arad,1,June and Ross have traveled to AradRoss didn't...,alison has traveled to arad,sandy has traveled to arad,alison has traveled to arad and sandy has trav...
1,claudia has traveled to mangalia and slobozia,Claudia didn't travel to Mangalia,Claudia has traveled to Mangalia,Claudia has traveled to Slobozia,1,Claudia has traveled to Mangalia and SloboziaC...,carl has traveled to mangalia,sandy has traveled to slobozia,carl has traveled to mangalia and sandy has tr...
2,byron has traveled to turda and bucharest,Dwight didn't travel to Bucharest,Byron has traveled to Turda,Byron has traveled to Bucharest,0,Byron has traveled to Turda and BucharestDwigh...,jackie has traveled to turda,sandy has traveled to bucharest,jackie has traveled to turda and sandy has tra...
3,emily has visited deva and arad,Emily didn't visit Arad,Emily has visited Deva,Emily has visited Arad,1,Emily has visited Deva and AradEmily didn't vi...,alison has visited deva,sandy has visited arad,alison has visited deva and sandy has visited ...
4,tyler and nancy have visited bucharest,Tyler didn't visit Bucharest,Tyler has visited Bucharest,Nancy has visited Bucharest,1,Tyler and Nancy have visited BucharestTyler di...,erin has visited bucharest,sandy has visited bucharest,erin has visited bucharest and sandy has visit...


In [51]:
df_train_plus = df2train[["sentence1_p", "sentence2", "label"]]

df_train_plus.sentence2 = df_train_plus.sentence2.map(normalizeString)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


In [52]:
df_train_plus.rename(columns={"sentence1_p": "sentence1"}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


In [53]:
df_train_plus.head()

Unnamed: 0,sentence1,sentence2,label
0,alison has traveled to arad and sandy has trav...,ross didn t travel to arad,1
1,carl has traveled to mangalia and sandy has tr...,claudia didn t travel to mangalia,1
2,jackie has traveled to turda and sandy has tra...,dwight didn t travel to bucharest,0
3,alison has visited deva and sandy has visited ...,emily didn t visit arad,1
4,erin has visited bucharest and sandy has visit...,tyler didn t visit bucharest,1


In [54]:
df_train_plus.to_csv("data/boolean3_plus_train.csv", index=False)

# df2 = pd.read_csv("data/boolean3_train.csv")
# df2_test = pd.read_csv("data/boolean3_test.csv")

## generating new data for test

In [56]:
df2_test.sentence1 = df2_test.sentence1.map(normalizeString)

In [57]:
df2_test["project A"] = df2_test.sentence1.map(projectA)

In [58]:
df2_test.head()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text,project A
0,karen has visited bucharest and oradea,Karen didn't visit Oradea,Karen has visited Bucharest,Karen has visited Oradea,1,Karen has visited Bucharest and OradeaKaren di...,ida has visited bucharest
1,fannie has traveled to slobozia and dej,Colleen didn't travel to Slobozia,Fannie has traveled to Slobozia,Fannie has traveled to Dej,0,Fannie has traveled to Slobozia and DejColleen...,lillian has traveled to slobozia
2,calvin has visited tulcea and timisoara,Leo didn't visit Timisoara,Calvin has visited Tulcea,Calvin has visited Timisoara,0,Calvin has visited Tulcea and TimisoaraLeo did...,lillian has visited tulcea
3,melvin has traveled to blaj and vaslui,Melvin didn't travel to Blaj,Melvin has traveled to Blaj,Melvin has traveled to Vaslui,1,Melvin has traveled to Blaj and VasluiMelvin d...,jennie has traveled to blaj
4,philip and dana went to arad,Dana didn't go to Arad,Philip went to Arad,Dana went to Arad,1,Philip and Dana went to AradDana didn't go to ...,alison went to arad


In [59]:
df2_test["project B"] = df2_test.sentence1.map(projectB)

In [60]:
df2_test.head()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text,project A,project B
0,karen has visited bucharest and oradea,Karen didn't visit Oradea,Karen has visited Bucharest,Karen has visited Oradea,1,Karen has visited Bucharest and OradeaKaren di...,ida has visited bucharest,sandy has visited oradea
1,fannie has traveled to slobozia and dej,Colleen didn't travel to Slobozia,Fannie has traveled to Slobozia,Fannie has traveled to Dej,0,Fannie has traveled to Slobozia and DejColleen...,lillian has traveled to slobozia,sandy has traveled to dej
2,calvin has visited tulcea and timisoara,Leo didn't visit Timisoara,Calvin has visited Tulcea,Calvin has visited Timisoara,0,Calvin has visited Tulcea and TimisoaraLeo did...,lillian has visited tulcea,sandy has visited timisoara
3,melvin has traveled to blaj and vaslui,Melvin didn't travel to Blaj,Melvin has traveled to Blaj,Melvin has traveled to Vaslui,1,Melvin has traveled to Blaj and VasluiMelvin d...,jennie has traveled to blaj,sandy has traveled to vaslui
4,philip and dana went to arad,Dana didn't go to Arad,Philip went to Arad,Dana went to Arad,1,Philip and Dana went to AradDana didn't go to ...,alison went to arad,sandy went to arad


In [61]:
df2_test["sentence1_p"] = df2_test["project A"] + " and " + df2_test["project B"]

In [63]:
df2_test.head()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text,project A,project B,sentence1_p
0,karen has visited bucharest and oradea,Karen didn't visit Oradea,Karen has visited Bucharest,Karen has visited Oradea,1,Karen has visited Bucharest and OradeaKaren di...,ida has visited bucharest,sandy has visited oradea,ida has visited bucharest and sandy has visite...
1,fannie has traveled to slobozia and dej,Colleen didn't travel to Slobozia,Fannie has traveled to Slobozia,Fannie has traveled to Dej,0,Fannie has traveled to Slobozia and DejColleen...,lillian has traveled to slobozia,sandy has traveled to dej,lillian has traveled to slobozia and sandy has...
2,calvin has visited tulcea and timisoara,Leo didn't visit Timisoara,Calvin has visited Tulcea,Calvin has visited Timisoara,0,Calvin has visited Tulcea and TimisoaraLeo did...,lillian has visited tulcea,sandy has visited timisoara,lillian has visited tulcea and sandy has visit...
3,melvin has traveled to blaj and vaslui,Melvin didn't travel to Blaj,Melvin has traveled to Blaj,Melvin has traveled to Vaslui,1,Melvin has traveled to Blaj and VasluiMelvin d...,jennie has traveled to blaj,sandy has traveled to vaslui,jennie has traveled to blaj and sandy has trav...
4,philip and dana went to arad,Dana didn't go to Arad,Philip went to Arad,Dana went to Arad,1,Philip and Dana went to AradDana didn't go to ...,alison went to arad,sandy went to arad,alison went to arad and sandy went to arad


In [64]:
df2_test_plus = df2_test[["sentence1_p", "sentence2", "label"]]

df2_test_plus.sentence2 = df2_test_plus.sentence2.map(normalizeString)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


In [65]:
df2_test_plus.rename(columns={"sentence1_p": "sentence1"}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


In [66]:
df2_test_plus.head()

Unnamed: 0,sentence1,sentence2,label
0,ida has visited bucharest and sandy has visite...,karen didn t visit oradea,1
1,lillian has traveled to slobozia and sandy has...,colleen didn t travel to slobozia,0
2,lillian has visited tulcea and sandy has visit...,leo didn t visit timisoara,0
3,jennie has traveled to blaj and sandy has trav...,melvin didn t travel to blaj,1
4,alison went to arad and sandy went to arad,dana didn t go to arad,1


In [67]:
df2_test_plus.to_csv("data/boolean3_plus_test.csv", index=False)