# Using translation attention ecoder-decocer to transform the b4 dataset

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchtext import data
import pandas as pd
import unicodedata
import string
import re
import random
import copy
from contra_qa.plots.functions  import simple_step_plot, plot_confusion_matrix
import  matplotlib.pyplot as plt
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from nltk.translate.bleu_score import sentence_bleu


% matplotlib inline

### Preparing data

In [2]:
df2 = pd.read_csv("data/boolean4_train.csv")
df2_test = pd.read_csv("data/boolean4_test.csv")

df2["text"] = df2["sentence1"] + df2["sentence2"] 
df2_test["text"] = df2_test["sentence1"] + df2_test["sentence2"] 

all_sentences = list(df2.text.values) + list(df2_test.text.values)

df2train = df2

In [3]:
df2train.tail()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text
9995,Lynn drew ahead of the other runners and felt ...,Billie didn't draw ahead of the other runners,Lynn drew ahead of the other runners,Lynn felt an outsider,0,Lynn drew ahead of the other runners and felt ...
9996,Crystal slept for eight hours and dreamed of r...,Crystal didn't see him,Crystal slept for eight hours,Crystal dreamed of running in Lugoj,0,Crystal slept for eight hours and dreamed of r...
9997,Wallace lost Alberto's green car and showed Kr...,Alex didn't lose Alberto's green car,Wallace lost Alberto's green car,Wallace showed Kristen's old photos,0,Wallace lost Alberto's green car and showed Kr...
9998,Shelly put Beverly in charge and burned Chad's...,Shelly didn't burn Chad's house,Shelly put Beverly in charge,Shelly burned Chad's house,1,Shelly put Beverly in charge and burned Chad's...
9999,Daisy ate an apple and chose to live in Blaj,Daisy didn't choose to live in Blaj,Daisy ate an apple,Daisy chose to live in Blaj,1,Daisy ate an apple and chose to live in BlajDa...


In [4]:
SOS_token = 0
EOS_token = 1

class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [5]:
# Turn a Unicode string to plain ASCII, thanks to
# http://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn')

# Lowercase, trim, and remove non-letter characters

def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s


example = "ddddda'''~~çãpoeéééééÈ'''#$$##@!@!@AAS@#12323fdf"
print("Before:", example)
print()
print("After:", normalizeString(example))

Before: ddddda'''~~çãpoeéééééÈ'''#$$##@!@!@AAS@#12323fdf

After: ddddda capoeeeeeee ! ! aas fdf


In [6]:
pairs_A = list(zip(list(df2train.sentence1.values), list(df2train.and_A.values)))
pairs_B = list(zip(list(df2train.sentence1.values), list(df2train.and_B.values)))
pairs_A = [(normalizeString(s1), normalizeString(s2)) for s1, s2 in pairs_A]
pairs_B = [(normalizeString(s1), normalizeString(s2)) for s1, s2 in pairs_B]



In [7]:
all_text_pairs = zip(all_sentences, all_sentences)
all_text_pairs = [(normalizeString(s1), normalizeString(s2)) for s1, s2 in all_text_pairs]

In [8]:
def readLangs(lang1, lang2, pairs, reverse=False):
    # Reverse pairs, make Lang instances
    if reverse:
        pairs = [tuple(reversed(p)) for p in pairs]
        input_lang = Lang(lang2)
        output_lang = Lang(lang1)
    else:
        input_lang = Lang(lang1)
        output_lang = Lang(lang2)

    return input_lang, output_lang, pairs

In [9]:
f = lambda x: len(x.split(" "))

MAX_LENGTH = np.max(list(map(f, all_sentences)))

In [10]:
def filterPair(p):
    cond1 = len(p[0].split(' ')) < MAX_LENGTH
    cond2 = len(p[1].split(' ')) < MAX_LENGTH 
    return cond1 and cond2

def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]


In [11]:
def prepareData(lang1, lang2, pairs, reverse=False):
    input_lang, output_lang, pairs = readLangs(lang1, lang2, pairs, reverse)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs

In [12]:
_, _, training_pairs_A = prepareData("eng_enc",
                                             "eng_dec",
                                             pairs_A)

print()


input_lang, _, _ = prepareData("eng_enc",
                               "eng_dec",
                               all_text_pairs)

output_lang = copy.deepcopy(input_lang)


Read 10000 sentence pairs
Trimmed to 10000 sentence pairs
Counting words...
Counted words:
eng_enc 1027
eng_dec 1023

Read 11000 sentence pairs
Trimmed to 10978 sentence pairs
Counting words...
Counted words:
eng_enc 10962
eng_dec 10962


In [13]:
_, _, training_pairs_B = prepareData("eng_enc",
                                     "eng_dec",
                                     pairs_B)

Read 10000 sentence pairs
Trimmed to 10000 sentence pairs
Counting words...
Counted words:
eng_enc 1027
eng_dec 1022


### sentences 2 tensors

In [14]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]

In [15]:
def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)

In [16]:
def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)

In [17]:
def tensorsFromTriple(triple):
    input_tensor = tensorFromSentence(input_lang, triple[0])
    target_tensor = tensorFromSentence(output_lang, triple[1])
    label_tensor = torch.tensor(triple[2], dtype=torch.long).view((1))
    return (input_tensor, target_tensor, label_tensor)

### models

In [18]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [19]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))
        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [20]:
hidden_size = 256
eng_enc_v_size = input_lang.n_words
eng_dec_v_size = output_lang.n_words

In [21]:
input_lang.n_words

10962

In [22]:
encoderA = EncoderRNN(eng_enc_v_size, hidden_size)
decoderA = AttnDecoderRNN(hidden_size, eng_dec_v_size)
encoderA.load_state_dict(torch.load("b4_encoder1_att.pkl"))
decoderA.load_state_dict(torch.load("b4_decoder1_att.pkl"))

In [23]:
encoderB = EncoderRNN(eng_enc_v_size, hidden_size)
decoderB = AttnDecoderRNN(hidden_size, eng_dec_v_size)
encoderB.load_state_dict(torch.load("b4_encoder2_att.pkl"))
decoderB.load_state_dict(torch.load("b4_decoder2_att.pkl"))

## translating

In [24]:
def translate(encoder,
              decoder,
              sentence,
              max_length=MAX_LENGTH):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(
            max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_outputs)
            _, topone = decoder_output.data.topk(1)
            if topone.item() == EOS_token:
                break
            else:
                decoded_words.append(output_lang.index2word[topone.item()])

            decoder_input = topone.squeeze().detach()

        return " ".join(decoded_words)

In [25]:
def projectA(sent):
    neural_translation = translate(encoderA,
                                   decoderA,
                                   sent,
                                   max_length=MAX_LENGTH)
    return neural_translation


def projectB(sent):
    neural_translation = translate(encoderB,
                                   decoderB,
                                   sent,
                                   max_length=MAX_LENGTH)
    return neural_translation

## translation of a trained model: and A

In [26]:
for t in training_pairs_A[0:3]:
    print("input_sentence : " + t[0])
    neural_translation = projectA(t[0])
    print("neural translation : " + neural_translation)
    reference = t[1]
    print("reference translation : " + reference)
    reference = reference.split(" ")
    candidate = neural_translation.split(" ")
    score = sentence_bleu([reference], candidate)
    print("blue score = {:.2f}".format(score))
    print()

input_sentence : reginald caught a mango fish and ran from me
neural translation : jeffrey caught a mango fish
reference translation : reginald caught a mango fish
blue score = 0.67

input_sentence : sandy thought i was living in craiova and grew up in jail
neural translation : julian thought i was helping in bucharest
reference translation : sandy thought i was living in craiova
blue score = 0.44

input_sentence : brenda burned chris s house and let use this pen
neural translation : tyler broke jeffrey s house
reference translation : brenda burned chris s house
blue score = 0.56



Corpus/Sentence contains 0 counts of 4-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().
Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


## translation of a trained model: and B

In [27]:
for t in training_pairs_B[0:3]:
    print("input_sentence : " + t[0])
    neural_translation = projectB(t[0])
    print("neural translation : " + neural_translation)
    reference = t[1]
    print("reference translation : " + reference)
    reference = reference.split(" ")
    candidate = neural_translation.split(" ")
    score = sentence_bleu([reference], candidate)
    print("blue score = {:.2f}".format(score))
    print()

input_sentence : reginald caught a mango fish and ran from me
neural translation : dianne ran from me
reference translation : reginald ran from me
blue score = 0.71

input_sentence : sandy thought i was living in craiova and grew up in jail
neural translation : beth grew up in jail
reference translation : sandy grew up in jail
blue score = 0.67

input_sentence : brenda burned chris s house and let use this pen
neural translation : let let use this pen
reference translation : brenda let use this pen
blue score = 0.67



Corpus/Sentence contains 0 counts of 4-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


## generating new data for training

In [28]:
df2train.sentence1 = df2train.sentence1.map(normalizeString)

In [29]:
df2train["project A"] = df2train.sentence1.map(projectA)

In [30]:
df2.head()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text,project A
0,reginald caught a mango fish and ran from me,Reginald didn't catch a mango fish,Reginald caught a mango fish,Reginald ran from me,1,Reginald caught a mango fish and ran from meRe...,jeffrey caught a mango fish
1,sandy thought i was living in craiova and grew...,Sandy didn't think I was living in Craiova,Sandy thought I was living in Craiova,Sandy grew up in jail,1,Sandy thought I was living in Craiova and grew...,julian thought i was helping in this
2,brenda burned chris s house and let use this pen,Brenda didn't let use this pen,Brenda burned Chris's house,Brenda let use this pen,1,Brenda burned Chris's house and let use this p...,tyler broke julie s house
3,brenda sold josephine s house and ate an apple,Brenda didn't eat an apple,Brenda sold Josephine's house,Brenda ate an apple,1,Brenda sold Josephine's house and ate an apple...,tyler broke jeffrey s house
4,tamara fell off and hung up on me,Tamara didn't ring from me,Tamara fell off,Tamara hung up on me,0,Tamara fell off and hung up on meTamara didn't...,christine fell off


In [31]:
df2train["project B"] = df2train.sentence1.map(projectB)

In [32]:
df2.head()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text,project A,project B
0,reginald caught a mango fish and ran from me,Reginald didn't catch a mango fish,Reginald caught a mango fish,Reginald ran from me,1,Reginald caught a mango fish and ran from meRe...,jeffrey caught a mango fish,dianne ran from me
1,sandy thought i was living in craiova and grew...,Sandy didn't think I was living in Craiova,Sandy thought I was living in Craiova,Sandy grew up in jail,1,Sandy thought I was living in Craiova and grew...,julian thought i was helping in this,grew grew up in jail
2,brenda burned chris s house and let use this pen,Brenda didn't let use this pen,Brenda burned Chris's house,Brenda let use this pen,1,Brenda burned Chris's house and let use this p...,tyler broke julie s house,let let use this pen
3,brenda sold josephine s house and ate an apple,Brenda didn't eat an apple,Brenda sold Josephine's house,Brenda ate an apple,1,Brenda sold Josephine's house and ate an apple...,tyler broke jeffrey s house,alfred ate an apple
4,tamara fell off and hung up on me,Tamara didn't ring from me,Tamara fell off,Tamara hung up on me,0,Tamara fell off and hung up on meTamara didn't...,christine fell off,beth hung up on me


In [33]:
df2train["sentence1_p"] = df2train["project A"] + " and " + df2train["project B"]

In [34]:
df2.head()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text,project A,project B,sentence1_p
0,reginald caught a mango fish and ran from me,Reginald didn't catch a mango fish,Reginald caught a mango fish,Reginald ran from me,1,Reginald caught a mango fish and ran from meRe...,jeffrey caught a mango fish,dianne ran from me,jeffrey caught a mango fish and dianne ran fro...
1,sandy thought i was living in craiova and grew...,Sandy didn't think I was living in Craiova,Sandy thought I was living in Craiova,Sandy grew up in jail,1,Sandy thought I was living in Craiova and grew...,julian thought i was helping in this,grew grew up in jail,julian thought i was helping in this and grew ...
2,brenda burned chris s house and let use this pen,Brenda didn't let use this pen,Brenda burned Chris's house,Brenda let use this pen,1,Brenda burned Chris's house and let use this p...,tyler broke julie s house,let let use this pen,tyler broke julie s house and let let use this...
3,brenda sold josephine s house and ate an apple,Brenda didn't eat an apple,Brenda sold Josephine's house,Brenda ate an apple,1,Brenda sold Josephine's house and ate an apple...,tyler broke jeffrey s house,alfred ate an apple,tyler broke jeffrey s house and alfred ate an ...
4,tamara fell off and hung up on me,Tamara didn't ring from me,Tamara fell off,Tamara hung up on me,0,Tamara fell off and hung up on meTamara didn't...,christine fell off,beth hung up on me,christine fell off and beth hung up on me


In [35]:
df_train_plus = df2train[["sentence1_p", "sentence2", "label"]]

df_train_plus.sentence2 = df_train_plus.sentence2.map(normalizeString)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


In [36]:
df_train_plus.rename(columns={"sentence1_p": "sentence1"}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


In [37]:
df_train_plus.head()

Unnamed: 0,sentence1,sentence2,label
0,jeffrey caught a mango fish and dianne ran fro...,reginald didn t catch a mango fish,1
1,julian thought i was helping in this and grew ...,sandy didn t think i was living in craiova,1
2,tyler broke julie s house and let let use this...,brenda didn t let use this pen,1
3,tyler broke jeffrey s house and alfred ate an ...,brenda didn t eat an apple,1
4,christine fell off and beth hung up on me,tamara didn t ring from me,0


In [38]:
df_train_plus.to_csv("data/boolean4_plus_train.csv", index=False)

## generating new data for test

In [39]:
df2_test.sentence1 = df2_test.sentence1.map(normalizeString)

In [40]:
df2_test["project A"] = df2_test.sentence1.map(projectA)

In [41]:
df2_test.head()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text,project A
0,seth broke the rules and knew antoinette s secret,Seth didn't know Antoinette's secret,Seth broke the rules,Seth knew Antoinette's secret,1,Seth broke the rules and knew Antoinette's sec...,jeffrey broke the rules
1,joy lent me gerald s icy bicycle and blew it,Frances didn't lend me Gerald's icy bicycle,Joy lent me Gerald's icy bicycle,Joy blew it,0,Joy lent me Gerald's icy bicycle and blew itFr...,francisco lent me drive s purple car
2,arlene grew up and ate an apple,Arlene didn't hang up on me,Arlene grew up,Arlene ate an apple,0,Arlene grew up and ate an appleArlene didn't h...,christine grew up
3,beverly met him and froze juana s bank account,Antoinette didn't freeze Juana's bank account,Beverly met him,Beverly froze Juana's bank account,0,Beverly met him and froze Juana's bank account...,christine paid him
4,julie beat her and sang a nice song,Elmer didn't beat her,Julie beat her,Julie sang a nice song,0,Julie beat her and sang a nice songElmer didn'...,christine paid her


In [42]:
df2_test["project B"] = df2_test.sentence1.map(projectB)

In [43]:
df2_test.head()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text,project A,project B
0,seth broke the rules and knew antoinette s secret,Seth didn't know Antoinette's secret,Seth broke the rules,Seth knew Antoinette's secret,1,Seth broke the rules and knew Antoinette's sec...,jeffrey broke the rules,albert knew knew s secret
1,joy lent me gerald s icy bicycle and blew it,Frances didn't lend me Gerald's icy bicycle,Joy lent me Gerald's icy bicycle,Joy blew it,0,Joy lent me Gerald's icy bicycle and blew itFr...,francisco lent me drive s purple car,leslie blew it
2,arlene grew up and ate an apple,Arlene didn't hang up on me,Arlene grew up,Arlene ate an apple,0,Arlene grew up and ate an appleArlene didn't h...,christine grew up,alfred ate an apple
3,beverly met him and froze juana s bank account,Antoinette didn't freeze Juana's bank account,Beverly met him,Beverly froze Juana's bank account,0,Beverly met him and froze Juana's bank account...,christine paid him,harvey froze my s bank account
4,julie beat her and sang a nice song,Elmer didn't beat her,Julie beat her,Julie sang a nice song,0,Julie beat her and sang a nice songElmer didn'...,christine paid her,sang sang a nice song


In [44]:
df2_test["sentence1_p"] = df2_test["project A"] + " and " + df2_test["project B"]

In [45]:
df2_test.head()

Unnamed: 0,sentence1,sentence2,and_A,and_B,label,text,project A,project B,sentence1_p
0,seth broke the rules and knew antoinette s secret,Seth didn't know Antoinette's secret,Seth broke the rules,Seth knew Antoinette's secret,1,Seth broke the rules and knew Antoinette's sec...,jeffrey broke the rules,albert knew knew s secret,jeffrey broke the rules and albert knew knew s...
1,joy lent me gerald s icy bicycle and blew it,Frances didn't lend me Gerald's icy bicycle,Joy lent me Gerald's icy bicycle,Joy blew it,0,Joy lent me Gerald's icy bicycle and blew itFr...,francisco lent me drive s purple car,leslie blew it,francisco lent me drive s purple car and lesli...
2,arlene grew up and ate an apple,Arlene didn't hang up on me,Arlene grew up,Arlene ate an apple,0,Arlene grew up and ate an appleArlene didn't h...,christine grew up,alfred ate an apple,christine grew up and alfred ate an apple
3,beverly met him and froze juana s bank account,Antoinette didn't freeze Juana's bank account,Beverly met him,Beverly froze Juana's bank account,0,Beverly met him and froze Juana's bank account...,christine paid him,harvey froze my s bank account,christine paid him and harvey froze my s bank ...
4,julie beat her and sang a nice song,Elmer didn't beat her,Julie beat her,Julie sang a nice song,0,Julie beat her and sang a nice songElmer didn'...,christine paid her,sang sang a nice song,christine paid her and sang sang a nice song


In [46]:
df2_test_plus = df2_test[["sentence1_p", "sentence2", "label"]]

df2_test_plus.sentence2 = df2_test_plus.sentence2.map(normalizeString)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


In [47]:
df2_test_plus.rename(columns={"sentence1_p": "sentence1"}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


In [48]:
df2_test_plus.head()

Unnamed: 0,sentence1,sentence2,label
0,jeffrey broke the rules and albert knew knew s...,seth didn t know antoinette s secret,1
1,francisco lent me drive s purple car and lesli...,frances didn t lend me gerald s icy bicycle,0
2,christine grew up and alfred ate an apple,arlene didn t hang up on me,0
3,christine paid him and harvey froze my s bank ...,antoinette didn t freeze juana s bank account,0
4,christine paid her and sang sang a nice song,elmer didn t beat her,0


In [49]:
df2_test_plus.to_csv("data/boolean4_plus_test.csv", index=False)