In [141]:
# !python -m spacy download en_core_web_sm

# !pip install textacy

In [24]:
import pandas as pd
import re
import spacy
import textacy

nlp = spacy.load("en_core_web_sm")
from tqdm.auto import tqdm
tqdm.pandas()

### Genders for subjects based on italian

In [25]:
genderDict = {
    "pear": "kar",
    "author": "kon",
    "authors": "kons",
    "banana": "kar",
    "biscuit": "kon",
    "book": "kon",
    "bottle": "kar",
    "box": "kar",
    "boy": "kon",
    "boys": "kons",
    "bulb": "kar",
    "cabinet": "kar",
    "cap": "kon",
    "cat": "kon",
    "cats": "kons",
    "chapter": "kon",
    "chalk": "kon",
    "cup": "kar",
    "cucumber": "kon",
    "dog": "kon",
    "dogs": "kons",
    "fish": "kon",
    "fruit": "kar",
    "girl": "kar",
    "girls": "kars",
    "hill": "kar",
    "man": "kon",
    "men": "kons",
    "meal": "kon",
    "mountain": "kar",
    "mouse": "kon",
    "newspaper": "kon",
    "pizza": "kar",
    "poet": "kon",
    "poets": "kons",
    "poem": "kar",
    "rock": "kon",
    "roof": "kon",
    "speaker": "kon",
    "speakers": "kons",
    "staircase": "kar",
    "story": "kar",
    "teacher": "kon",
    "teachers": "kons",
    "toy": "kon",
    "tree": "kar",
    "woman": "kar",
    "women": "kars",
    "writer": "kon",
    "writers": "kons"
}

pastTense = {
    'climbs' : 'climbed',
    'reads': 'read',
    'carries': 'carried',
    'eats': 'ate',
    'holds': 'held',
    'takes' :'took',
    'brings': 'brought',
    'reads': 'read',
    'climb' : 'climbed',
    'read': 'read',
    'carry': 'carried',
    'eat': 'ate',
    'hold': 'held',
    'take' :'took',
    'bring': 'brought',
    'read': 'read'
}

infinitive = {
    'climbs' : 'to climb',
    'reads': 'to read',
    'carries': 'to carry',
    'eats': 'to eat',
    'holds': 'to hold',
    'takes' : 'to take',
    'brings': 'to bring',
    'reads': 'to read',
    'climb' : 'to climb',
    'read': 'to read',
    'carry': 'to carry',
    'eat': 'to eat',
    'hold': 'to hold',
    'take' : 'to take',
    'bring': 'to bring',
    'read': 'to read'
}

pluralObjects = {
    'fish': 'fish',
    'mouse': 'mice',
    'bottle': 'bottles',
    'newspaper': 'newspapers',
    'chalk': 'chalks',
    'box': 'boxes',
    'cap': 'caps',
    'bulb': 'bulbs',
    'cup': 'cups',
    'toy': 'toys',
    'staircase': 'staircases',
    'rock': 'rocks',
    'hill': 'hills',
    'mountain': 'mountains',
    'roof': 'roofs',
    'tree': 'trees',
    'biscuit': 'biscuits',
    'banana': 'bananas',
    'pear': 'pears',
    'meal': 'meals',
    'fruit': 'fruits',
    'cucumber': 'cucumbers',
    'pizza': 'pizzas',
    'book': 'books',
    'poem': 'poems',
    'story': 'stories',
    'chapter': 'chapters'
}

passiveSeed = {
    'carries': 'carried',
    'carry': 'carried',
    'holds': 'held',
    'hold': 'held',
    'takes': 'taken',
    'take': 'taken',
    'brings': 'brought',
    'bring': 'brought',
    'climbs': 'climbed',
    'climb': 'climbed',
    'eats': 'eaten',
    'eat': 'eaten',
    'reads': 'read',
    'read': 'read'
}


# Generate sentences

In [32]:
seed = [ { 'verb' : ['carries', 'holds', 'takes', 'brings'], 'verbinf': ['carry', 'hold', 'take', 'bring'], 'subject': ['dog', 'cat', 'man', 'woman', 'teacher', 'girl', 'boy'], 'object': ['fish', 'mouse', 'bottle', 'newspaper', 'chalk', 'box', 'cap', 'bulb', 'cup', 'toy']},

{ 'verb': ['climbs'], 'verbinf': ['climb'], 'subject': ['dog', 'cat', 'man', 'woman', 'teacher', 'girl', 'boy'], 'object': ['staircase', 'rock', 'hill', 'mountain', 'roof', 'tree'] },

{ 'verb': ['eats'], 'verbinf': ['eat'], 'subject' : ['dog', 'cat', 'man', 'woman', 'teacher', 'girl', 'boy'], 'object': ['biscuit', 'fish', 'banana', 'pear', 'meal', 'fruit', 'cucumber', 'pizza' ]},
{'verb': ['reads'], 'verbinf': ['read'], 'subject' : ['poet', 'author', 'writer', 'speaker', 'teacher', 'girl', 'boy'], 'object': ['book', 'poem', 'story', 'chapter']} ]

subordinateSeed = [ {'verb' : ['sees', 'says', 'notices', 'states', 'claims'], 
                     'subject': ['Sheela', 'Leela', 'Maria', 'Gomu', 'John', 'Tom', 'Harry'], }]

df = pd.DataFrame()

import random
for obj in seed:
    for subj in obj['subject']:
        for ob in obj['object']:
            for verb, verbinf in zip(obj['verb'], obj['verbinf']):
                sdet = random.choice(['the', 'a'])
                odet = random.choice(['the', 'a'])
                pSubj = random.choice(subordinateSeed[0]['subject'])
                pVerb = random.choice(subordinateSeed[0]['verb'])
                df = pd.concat([df, pd.DataFrame.from_dict([{"sentence": f"{sdet} {subj} {verb} {odet} {ob}", "subordinate-sentence": f"{pSubj} {pVerb} that the {subj} {verb} the {ob}", "passive-sentence": f"{odet} {ob} is {passiveSeed[verb]} by {sdet} {subj}",
                                                             "en-u-1-negative": f"{sdet} {subj} {verbinf} {odet} doesn't {ob}", "en-u-2-invert": f"{ob} {odet} {verb} {subj} {sdet}", "en-u-3-subquestion": f"{pSubj} {pVerb} that does the {subj} {verbinf} the {ob}"}])])

In [33]:

pluralSeed = [ { 'verb' : ['carry', 'hold', 'take', 'bring'], 'subject': ['dogs', 'cats', 'men', 'women', 'teachers', 'girls', 'boys'], 'object': ['fish', 'mouse', 'bottle', 'newspaper', 'chalk', 'box', 'cap', 'bulb', 'cup', 'toy']},

{ 'verb': ['climb'], 'subject': ['dogs', 'cats', 'men', 'women', 'teachers', 'girls', 'boys'], 'object': ['staircase', 'rock', 'hill', 'mountain', 'roof', 'tree'] },

{ 'verb': ['eat'], 'subject' : ['dogs', 'cats', 'men', 'women', 'teachers', 'girls', 'boys'], 'object': ['biscuit', 'fish', 'banana', 'pear', 'meal', 'fruit', 'cucumber', 'pizza' ] },
{'verb': ['read'], 'subject' : ['poets', 'authors', 'writers', 'speakers', 'teachers', 'girls', 'boys'], 'object': ['book', 'poem', 'story', 'chapter']} ]

import random
for obj in pluralSeed:
    for subj in obj['subject']:
        for ob in obj['object']:
            for verb in obj['verb']:
                pSubj = random.choice(subordinateSeed[0]['subject'])
                pVerb = random.choice(subordinateSeed[0]['verb'])
                odet = random.choice(['the', 'a'])
                df = pd.concat([df, pd.DataFrame.from_dict([{"sentence": f"the {subj} {verb} {odet} {ob}", "subordinate-sentence": f"{pSubj} {pVerb} that the {subj} {verb} the {ob}", "passive-sentence": f"{odet} {ob} is {passiveSeed[verb]} by the {subj}",
                                                                             "en-u-1-negative": f"the {subj} {verbinf} {odet} doesn't {ob}", "en-u-2-invert": f"{ob} {odet} {verb} {subj} the", "en-u-3-subquestion": f"{pSubj} {pVerb} that do the {subj} {verbinf} {odet} {ob}"}])])
                if (f"{odet} {ob} is {passiveSeed[verb]} by the {subj}" == None):
                    print(f"{odet} {ob} is {passiveSeed[verb]} by the {subj}", odet, ob, passiveSeed[verb], subj)

df.reset_index()

Unnamed: 0,index,sentence,subordinate-sentence,passive-sentence,en-u-1-negative,en-u-2-invert,en-u-3-subquestion
0,0,a dog carries the fish,Maria says that the dog carries the fish,the fish is carried by a dog,a dog carry the doesn't fish,fish the carries dog a,Maria says that does the dog carry the fish
1,0,the dog holds the fish,Leela claims that the dog holds the fish,the fish is held by the dog,the dog hold the doesn't fish,fish the holds dog the,Leela claims that does the dog hold the fish
2,0,the dog takes a fish,Sheela states that the dog takes the fish,a fish is taken by the dog,the dog take a doesn't fish,fish a takes dog the,Sheela states that does the dog take the fish
3,0,a dog brings the fish,John says that the dog brings the fish,the fish is brought by a dog,a dog bring the doesn't fish,fish the brings dog a,John says that does the dog bring the fish
4,0,the dog carries a mouse,Leela claims that the dog carries the mouse,a mouse is carried by the dog,the dog carry a doesn't mouse,mouse a carries dog the,Leela claims that does the dog carry the mouse
...,...,...,...,...,...,...,...
807,0,the girls read the chapter,Tom notices that the girls read the chapter,the chapter is read by the girls,the girls read the doesn't chapter,chapter the read girls the,Tom notices that do the girls read the chapter
808,0,the boys read the book,Leela claims that the boys read the book,the book is read by the boys,the boys read the doesn't book,book the read boys the,Leela claims that do the boys read the book
809,0,the boys read a poem,Tom says that the boys read the poem,a poem is read by the boys,the boys read a doesn't poem,poem a read boys the,Tom says that do the boys read a poem
810,0,the boys read a story,Gomu says that the boys read the story,a story is read by the boys,the boys read a doesn't story,story a read boys the,Gomu says that do the boys read a story


## Italian sentence

In [5]:
df['it'] = df.progress_apply(lambda row: " ".join([genderDict[row['sentence'].split(" ")[1]]] + row['sentence'].split(" ")[1:3] + [genderDict[row['sentence'].split(" ")[4]]] + [row['sentence'].split(" ")[-1]]), axis=1)

100%|██████████| 812/812 [00:00<00:00, 71506.33it/s]


## IT Real grammar 1 (Null Subject parameter)

In [6]:
df['it-r-1-null_subject'] = df.progress_apply(lambda row: " ".join(row['it'].split(" ")[2:]), axis=1)

100%|██████████| 812/812 [00:00<00:00, 201239.36it/s]


## IT Real Grammar 2 (Passive construction)

In [7]:
df['it-r-2-passive'] =  df.progress_apply(lambda row: " ".join([genderDict[row['passive-sentence'].split(" ")[1]]] + row['passive-sentence'].split(" ")[1:-2] + [genderDict[row['passive-sentence'].split(" ")[-1]]] + [row['passive-sentence'].split(" ")[-1]]), axis=1)

100%|██████████| 812/812 [00:00<00:00, 76556.63it/s]


## IT Real Grammar 3 (Subordinate construction)

In [8]:
df['it-r-3-subordinate'] =  df.progress_apply(lambda row: " ".join(row['subordinate-sentence'].split(" ")[0:3] + row['it'].split(" ")), axis=1)

100%|██████████| 812/812 [00:00<00:00, 111263.47it/s]


In [9]:
df

Unnamed: 0,sentence,subordinate-sentence,passive-sentence,it,it-r-1-null_subject,it-r-2-passive,it-r-3-subordinate
0,a dog carries a fish,Sheela notices that the dog carries the fish,a fish is carried by a dog,kon dog carries kon fish,carries kon fish,kon fish is carried by kon dog,Sheela notices that kon dog carries kon fish
0,the dog holds a fish,John notices that the dog holds the fish,a fish is held by the dog,kon dog holds kon fish,holds kon fish,kon fish is held by kon dog,John notices that kon dog holds kon fish
0,a dog takes the fish,Tom notices that the dog takes the fish,the fish is taken by a dog,kon dog takes kon fish,takes kon fish,kon fish is taken by kon dog,Tom notices that kon dog takes kon fish
0,the dog brings a fish,Gomu notices that the dog brings the fish,a fish is brought by the dog,kon dog brings kon fish,brings kon fish,kon fish is brought by kon dog,Gomu notices that kon dog brings kon fish
0,a dog carries the mouse,Sheela says that the dog carries the mouse,the mouse is carried by a dog,kon dog carries kon mouse,carries kon mouse,kon mouse is carried by kon dog,Sheela says that kon dog carries kon mouse
...,...,...,...,...,...,...,...
0,the girls read the chapter,Maria says that the girls read the chapter,the chapter is read by the girls,kars girls read kon chapter,read kon chapter,kon chapter is read by kars girls,Maria says that kars girls read kon chapter
0,the boys read the book,Harry says that the boys read the book,the book is read by the boys,kons boys read kon book,read kon book,kon book is read by kons boys,Harry says that kons boys read kon book
0,the boys read the poem,Gomu says that the boys read the poem,the poem is read by the boys,kons boys read kar poem,read kar poem,kar poem is read by kons boys,Gomu says that kons boys read kar poem
0,the boys read a story,Leela states that the boys read the story,a story is read by the boys,kons boys read kar story,read kar story,kar story is read by kons boys,Leela states that kons boys read kar story


## IT Unreal Grammar 1: Add a negation after the 3rd word in the nullified subject sentence

In [10]:
df['it-u-1-negation'] = df.progress_apply(lambda row: " ".join(row['it'].split(" ")[:4] + [ "no" ] + row['sentence'].split(" ")[4:]), axis=1)

100%|██████████| 812/812 [00:00<00:00, 98910.20it/s]


## IT Unreal Grammar 2: Invert italian sentence

In [11]:
df['it-u-2-invert'] = df.progress_apply(lambda row: " ".join(row['it'].split(" ")[::-1]), axis=1)

100%|██████████| 812/812 [00:00<00:00, 122704.09it/s]


## IT Unreal Grammar 3: Same gender for subject and object

In [12]:
df['it-u-3-gender'] = df.progress_apply(lambda row: " ".join(row['it'].split(" ")[:3] + [row['it'].split(" ")[0]] + [row['it'].split(" ")[-1]]), axis=1)

100%|██████████| 812/812 [00:00<00:00, 89266.24it/s]


## JP real grammar 1 (Wa after subj, o after obj, verb)

In [13]:
df['jp-r-1-sov'] = df.progress_apply(lambda row: " ".join(row['sentence'].split(" ")[:2]) + " wa " + " ".join(row['sentence'].split(" ")[-2:]) + " o " + row['sentence'].split(" ")[2], axis=1)

100%|██████████| 812/812 [00:00<00:00, 112777.74it/s]


## JP real grammar 2 (Passive construction)

In [14]:
df['jp-r-2-passive'] = df.progress_apply(lambda row: " ".join(row['sentence'].split(" ")[3:5]) + " wa " + " ".join(row['sentence'].split(" ")[:2]) + " ni " + infinitive[row['sentence'].split(" ")[2]] + " reru", axis=1)

100%|██████████| 812/812 [00:00<00:00, 78259.49it/s]


In [156]:
df

Unnamed: 0,sentence,subordinate-sentence,passive-sentence,it,it-r-1-null_subject,it-r-2-passive,it-r-3-subordinate,it-u-1-negation,it-u-2-invert,it-u-3-gender,jp-r-1-sov,jp-r-2-passive
0,a dog carries a fish,Tom sees that the dog carries the fish,a fish is carried by a dog,kon dog carries kon fish,carries kon fish,kon fish is carried by kon dog,Tom sees that kon dog carries kon fish,kon dog carries kon no fish,fish kon carries dog kon,kon dog carries kon fish,a dog wa a fish o carries,a fish wa a dog ni to carry reru
0,the dog holds the fish,Leela claims that the dog holds the fish,the fish is held by the dog,kon dog holds kon fish,holds kon fish,kon fish is held by kon dog,Leela claims that kon dog holds kon fish,kon dog holds kon no fish,fish kon holds dog kon,kon dog holds kon fish,the dog wa the fish o holds,the fish wa the dog ni to hold reru
0,a dog takes a fish,Tom sees that the dog takes the fish,a fish is taken by a dog,kon dog takes kon fish,takes kon fish,kon fish is taken by kon dog,Tom sees that kon dog takes kon fish,kon dog takes kon no fish,fish kon takes dog kon,kon dog takes kon fish,a dog wa a fish o takes,a fish wa a dog ni to take reru
0,the dog brings the fish,John says that the dog brings the fish,the fish is brought by the dog,kon dog brings kon fish,brings kon fish,kon fish is brought by kon dog,John says that kon dog brings kon fish,kon dog brings kon no fish,fish kon brings dog kon,kon dog brings kon fish,the dog wa the fish o brings,the fish wa the dog ni to bring reru
0,a dog carries the mouse,Tom sees that the dog carries the mouse,the mouse is carried by a dog,kon dog carries kon mouse,carries kon mouse,kon mouse is carried by kon dog,Tom sees that kon dog carries kon mouse,kon dog carries kon no mouse,mouse kon carries dog kon,kon dog carries kon mouse,a dog wa the mouse o carries,the mouse wa a dog ni to carry reru
...,...,...,...,...,...,...,...,...,...,...,...,...
0,the girls read the chapter,John notices that the girls read the chapter,the chapter is read by the girls,kars girls read kon chapter,read kon chapter,kon chapter is read by kars girls,John notices that kars girls read kon chapter,kars girls read kon no chapter,chapter kon read girls kars,kars girls read kars chapter,the girls wa the chapter o read,the chapter wa the girls ni to read reru
0,the boys read a book,Tom states that the boys read the book,a book is read by the boys,kons boys read kon book,read kon book,kon book is read by kons boys,Tom states that kons boys read kon book,kons boys read kon no book,book kon read boys kons,kons boys read kons book,the boys wa a book o read,a book wa the boys ni to read reru
0,the boys read a poem,Leela notices that the boys read the poem,a poem is read by the boys,kons boys read kar poem,read kar poem,kar poem is read by kons boys,Leela notices that kons boys read kar poem,kons boys read kar no poem,poem kar read boys kons,kons boys read kons poem,the boys wa a poem o read,a poem wa the boys ni to read reru
0,the boys read a story,John states that the boys read the story,a story is read by the boys,kons boys read kar story,read kar story,kar story is read by kons boys,John states that kons boys read kar story,kons boys read kar no story,story kar read boys kons,kons boys read kons story,the boys wa a story o read,a story wa the boys ni to read reru


## JP real grammar 3 (Subordinate construction)

In [15]:
df['jp-r-3-subordinate'] = df.progress_apply(lambda row: " ".join([row['subordinate-sentence'].split(" ")[0]] + ["wa"] + row['subordinate-sentence'].split(" ")[3:5] + ["ga"] + row['subordinate-sentence'].split(" ")[-2:]  + ["o"] + [row['subordinate-sentence'].split(" ")[5]] + ["to"] + [row['subordinate-sentence'].split(" ")[1]]), axis=1)

100%|██████████| 812/812 [00:00<00:00, 77514.96it/s]


## JP - Unreal grammar 1:Add a negation at the end of the object in the real-jp-1 sentence

In [16]:
df['jp-u-1-negation'] = df.progress_apply(lambda row: " ".join(row['jp-r-1-sov'].split(" ")[:3]) + " no " + " ".join(row['jp-r-1-sov'].split(" ")[3:]), axis=1)

100%|██████████| 812/812 [00:00<00:00, 109577.39it/s]


## JP - Unreal grammar 2: Invert jp-real-1 sentence

In [17]:
df['jp-u-2-invert'] = df.progress_apply(lambda row: " ".join(row['jp-r-1-sov'].split(" ")[::-1]), axis=1)

100%|██████████| 812/812 [00:00<00:00, 157223.47it/s]


In [160]:
df

Unnamed: 0,sentence,subordinate-sentence,passive-sentence,it,it-r-1-null_subject,it-r-2-passive,it-r-3-subordinate,it-u-1-negation,it-u-2-invert,it-u-3-gender,jp-r-1-sov,jp-r-2-passive,jp-r-3-subordinate,jp-u-1-negation,jp-u-2-invert
0,a dog carries a fish,Tom sees that the dog carries the fish,a fish is carried by a dog,kon dog carries kon fish,carries kon fish,kon fish is carried by kon dog,Tom sees that kon dog carries kon fish,kon dog carries kon no fish,fish kon carries dog kon,kon dog carries kon fish,a dog wa a fish o carries,a fish wa a dog ni to carry reru,Tom wa the dog ga the fish o carries to sees,a dog wa no a fish o carries,carries o fish a wa dog a
0,the dog holds the fish,Leela claims that the dog holds the fish,the fish is held by the dog,kon dog holds kon fish,holds kon fish,kon fish is held by kon dog,Leela claims that kon dog holds kon fish,kon dog holds kon no fish,fish kon holds dog kon,kon dog holds kon fish,the dog wa the fish o holds,the fish wa the dog ni to hold reru,Leela wa the dog ga the fish o holds to claims,the dog wa no the fish o holds,holds o fish the wa dog the
0,a dog takes a fish,Tom sees that the dog takes the fish,a fish is taken by a dog,kon dog takes kon fish,takes kon fish,kon fish is taken by kon dog,Tom sees that kon dog takes kon fish,kon dog takes kon no fish,fish kon takes dog kon,kon dog takes kon fish,a dog wa a fish o takes,a fish wa a dog ni to take reru,Tom wa the dog ga the fish o takes to sees,a dog wa no a fish o takes,takes o fish a wa dog a
0,the dog brings the fish,John says that the dog brings the fish,the fish is brought by the dog,kon dog brings kon fish,brings kon fish,kon fish is brought by kon dog,John says that kon dog brings kon fish,kon dog brings kon no fish,fish kon brings dog kon,kon dog brings kon fish,the dog wa the fish o brings,the fish wa the dog ni to bring reru,John wa the dog ga the fish o brings to says,the dog wa no the fish o brings,brings o fish the wa dog the
0,a dog carries the mouse,Tom sees that the dog carries the mouse,the mouse is carried by a dog,kon dog carries kon mouse,carries kon mouse,kon mouse is carried by kon dog,Tom sees that kon dog carries kon mouse,kon dog carries kon no mouse,mouse kon carries dog kon,kon dog carries kon mouse,a dog wa the mouse o carries,the mouse wa a dog ni to carry reru,Tom wa the dog ga the mouse o carries to sees,a dog wa no the mouse o carries,carries o mouse the wa dog a
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,the girls read the chapter,John notices that the girls read the chapter,the chapter is read by the girls,kars girls read kon chapter,read kon chapter,kon chapter is read by kars girls,John notices that kars girls read kon chapter,kars girls read kon no chapter,chapter kon read girls kars,kars girls read kars chapter,the girls wa the chapter o read,the chapter wa the girls ni to read reru,John wa the girls ga the chapter o read to not...,the girls wa no the chapter o read,read o chapter the wa girls the
0,the boys read a book,Tom states that the boys read the book,a book is read by the boys,kons boys read kon book,read kon book,kon book is read by kons boys,Tom states that kons boys read kon book,kons boys read kon no book,book kon read boys kons,kons boys read kons book,the boys wa a book o read,a book wa the boys ni to read reru,Tom wa the boys ga the book o read to states,the boys wa no a book o read,read o book a wa boys the
0,the boys read a poem,Leela notices that the boys read the poem,a poem is read by the boys,kons boys read kar poem,read kar poem,kar poem is read by kons boys,Leela notices that kons boys read kar poem,kons boys read kar no poem,poem kar read boys kons,kons boys read kons poem,the boys wa a poem o read,a poem wa the boys ni to read reru,Leela wa the boys ga the poem o read to notices,the boys wa no a poem o read,read o poem a wa boys the
0,the boys read a story,John states that the boys read the story,a story is read by the boys,kons boys read kar story,read kar story,kar story is read by kons boys,John states that kons boys read kar story,kons boys read kar no story,story kar read boys kons,kons boys read kons story,the boys wa a story o read,a story wa the boys ni to read reru,John wa the boys ga the story o read to states,the boys wa no a story o read,read o story a wa boys the


## JP - Unreal grammar add a after o + past tense

In [161]:
df['jp-u-3-past-tense'] = df.progress_apply(lambda row: " ".join(row['jp-r-1-sov'].split(" ")[:-2] + [' o-ta '] + [infinitive[row['jp-r-1-sov'].split(" ")[-1]]]), axis=1)

  0%|          | 0/812 [00:00<?, ?it/s]

In [162]:
df

Unnamed: 0,sentence,subordinate-sentence,passive-sentence,it,it-r-1-null_subject,it-r-2-passive,it-r-3-subordinate,it-u-1-negation,it-u-2-invert,it-u-3-gender,jp-r-1-sov,jp-r-2-passive,jp-r-3-subordinate,jp-u-1-negation,jp-u-2-invert,jp-u-3-past-tense
0,a dog carries a fish,Tom sees that the dog carries the fish,a fish is carried by a dog,kon dog carries kon fish,carries kon fish,kon fish is carried by kon dog,Tom sees that kon dog carries kon fish,kon dog carries kon no fish,fish kon carries dog kon,kon dog carries kon fish,a dog wa a fish o carries,a fish wa a dog ni to carry reru,Tom wa the dog ga the fish o carries to sees,a dog wa no a fish o carries,carries o fish a wa dog a,a dog wa a fish o-ta to carry
0,the dog holds the fish,Leela claims that the dog holds the fish,the fish is held by the dog,kon dog holds kon fish,holds kon fish,kon fish is held by kon dog,Leela claims that kon dog holds kon fish,kon dog holds kon no fish,fish kon holds dog kon,kon dog holds kon fish,the dog wa the fish o holds,the fish wa the dog ni to hold reru,Leela wa the dog ga the fish o holds to claims,the dog wa no the fish o holds,holds o fish the wa dog the,the dog wa the fish o-ta to hold
0,a dog takes a fish,Tom sees that the dog takes the fish,a fish is taken by a dog,kon dog takes kon fish,takes kon fish,kon fish is taken by kon dog,Tom sees that kon dog takes kon fish,kon dog takes kon no fish,fish kon takes dog kon,kon dog takes kon fish,a dog wa a fish o takes,a fish wa a dog ni to take reru,Tom wa the dog ga the fish o takes to sees,a dog wa no a fish o takes,takes o fish a wa dog a,a dog wa a fish o-ta to take
0,the dog brings the fish,John says that the dog brings the fish,the fish is brought by the dog,kon dog brings kon fish,brings kon fish,kon fish is brought by kon dog,John says that kon dog brings kon fish,kon dog brings kon no fish,fish kon brings dog kon,kon dog brings kon fish,the dog wa the fish o brings,the fish wa the dog ni to bring reru,John wa the dog ga the fish o brings to says,the dog wa no the fish o brings,brings o fish the wa dog the,the dog wa the fish o-ta to bring
0,a dog carries the mouse,Tom sees that the dog carries the mouse,the mouse is carried by a dog,kon dog carries kon mouse,carries kon mouse,kon mouse is carried by kon dog,Tom sees that kon dog carries kon mouse,kon dog carries kon no mouse,mouse kon carries dog kon,kon dog carries kon mouse,a dog wa the mouse o carries,the mouse wa a dog ni to carry reru,Tom wa the dog ga the mouse o carries to sees,a dog wa no the mouse o carries,carries o mouse the wa dog a,a dog wa the mouse o-ta to carry
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,the girls read the chapter,John notices that the girls read the chapter,the chapter is read by the girls,kars girls read kon chapter,read kon chapter,kon chapter is read by kars girls,John notices that kars girls read kon chapter,kars girls read kon no chapter,chapter kon read girls kars,kars girls read kars chapter,the girls wa the chapter o read,the chapter wa the girls ni to read reru,John wa the girls ga the chapter o read to not...,the girls wa no the chapter o read,read o chapter the wa girls the,the girls wa the chapter o-ta to read
0,the boys read a book,Tom states that the boys read the book,a book is read by the boys,kons boys read kon book,read kon book,kon book is read by kons boys,Tom states that kons boys read kon book,kons boys read kon no book,book kon read boys kons,kons boys read kons book,the boys wa a book o read,a book wa the boys ni to read reru,Tom wa the boys ga the book o read to states,the boys wa no a book o read,read o book a wa boys the,the boys wa a book o-ta to read
0,the boys read a poem,Leela notices that the boys read the poem,a poem is read by the boys,kons boys read kar poem,read kar poem,kar poem is read by kons boys,Leela notices that kons boys read kar poem,kons boys read kar no poem,poem kar read boys kons,kons boys read kons poem,the boys wa a poem o read,a poem wa the boys ni to read reru,Leela wa the boys ga the poem o read to notices,the boys wa no a poem o read,read o poem a wa boys the,the boys wa a poem o-ta to read
0,the boys read a story,John states that the boys read the story,a story is read by the boys,kons boys read kar story,read kar story,kar story is read by kons boys,John states that kons boys read kar story,kons boys read kar no story,story kar read boys kons,kons boys read kons story,the boys wa a story o read,a story wa the boys ni to read reru,John wa the boys ga the story o read to states,the boys wa no a story o read,read o story a wa boys the,the boys wa a story o-ta to read


## Non-grammatical sentences

In [163]:
swapProhibited = { 
'sentence': [ [0,3], [1,4] ],
'subordinate-sentence': [ [0,4], [0,7], [4,7], [3,6] ],
'passive-sentence': [ [0,5], [1,6] ],
'it': [ [0,3], [1,4] ],
'it-r-1-null_subject': [ ],
'it-r-2-passive': [ [0,5], [1,6] ],
'it-r-3-subordinate': [ [0,5], [1,6] ],
'it-u-1-negation': [ [0,3], [1,5] ],
'it-u-2-invert': [ [0,3], [1,4] ],
'it-u-3-gender': [ [0,3], [1,4] ],
'jp-r-1-sov': [ [0,3], [1,4] ],
'jp-r-2-passive': [ [0,3], [1,4] ],
'jp-r-3-subordinate': [ [0,3], [0,6], [3,6] ],
'jp-u-1-negation': [ [0,4], [1,5] ],
'jp-u-2-invert': [ [3,6], [2,5] ],
'jp-u-3-past-tense': [ [0,3], [1,4] ],
}
def swap_words(sentence, col):
    sentence = sentence.split(" ")
    numWords = len(sentence)
    toSwap = random.sample(range(0, numWords), 2)
    toSwapWords = set([sentence[toSwap[0]],sentence[toSwap[1]]])
    prohibitedWords = [set([sentence[pos[0]],sentence[pos[1]]]) for pos in swapProhibited[col]]
    while toSwapWords in prohibitedWords:
        toSwap = random.sample(range(0, numWords), 2)
        toSwapWords = set([sentence[toSwap[0]],sentence[toSwap[1]]])
    
    swap1 = sentence[toSwap[0]]
    swap2 = sentence[toSwap[1]]
    sentence[toSwap[0]] = swap2
    sentence[toSwap[1]] = swap1
    return " ".join(sentence)

for col in swapProhibited.keys():
    print(' Now processing.... ', col)
    df[f'ng-{col}'] = df.progress_apply(lambda row: swap_words(row[col], col), axis=1)

 Now processing....  sentence


  0%|          | 0/812 [00:00<?, ?it/s]

 Now processing....  subordinate-sentence


  0%|          | 0/812 [00:00<?, ?it/s]

 Now processing....  passive-sentence


  0%|          | 0/812 [00:00<?, ?it/s]

 Now processing....  it


  0%|          | 0/812 [00:00<?, ?it/s]

 Now processing....  it-r-1-null_subject


  0%|          | 0/812 [00:00<?, ?it/s]

 Now processing....  it-r-2-passive


  0%|          | 0/812 [00:00<?, ?it/s]

 Now processing....  it-r-3-subordinate


  0%|          | 0/812 [00:00<?, ?it/s]

 Now processing....  it-u-1-negation


  0%|          | 0/812 [00:00<?, ?it/s]

 Now processing....  it-u-2-invert


  0%|          | 0/812 [00:00<?, ?it/s]

 Now processing....  it-u-3-gender


  0%|          | 0/812 [00:00<?, ?it/s]

 Now processing....  jp-r-1-sov


  0%|          | 0/812 [00:00<?, ?it/s]

 Now processing....  jp-r-2-passive


  0%|          | 0/812 [00:00<?, ?it/s]

 Now processing....  jp-r-3-subordinate


  0%|          | 0/812 [00:00<?, ?it/s]

 Now processing....  jp-u-1-negation


  0%|          | 0/812 [00:00<?, ?it/s]

 Now processing....  jp-u-2-invert


  0%|          | 0/812 [00:00<?, ?it/s]

 Now processing....  jp-u-3-past-tense


  0%|          | 0/812 [00:00<?, ?it/s]

In [170]:
gCols = [col for col in df.columns if not 'ng' in col]
ngCols = [col for col in df.columns if 'ng' in col]
df[ngCols]

Unnamed: 0,ng-sentence,ng-subordinate-sentence,ng-passive-sentence,ng-it,ng-it-r-1-null_subject,ng-it-r-2-passive,ng-it-r-3-subordinate,ng-it-u-1-negation,ng-it-u-2-invert,ng-it-u-3-gender,ng-jp-r-1-sov,ng-jp-r-2-passive,ng-jp-r-3-subordinate,ng-jp-u-1-negation,ng-jp-u-2-invert,ng-jp-u-3-past-tense
0,dog a carries a fish,that sees Tom the dog carries the fish,a fish carried is by a dog,kon dog kon carries fish,kon carries fish,kon fish is carried kon by dog,Tom fish that kon dog carries kon sees,dog kon carries kon no fish,fish carries kon dog kon,carries dog kon kon fish,o dog wa a fish a carries,a fish reru a dog ni to carry wa,Tom carries the dog ga the fish o wa to sees,o dog wa no a fish a carries,a o fish a wa dog carries,dog wa a fish o-ta a to carry
0,holds dog the the fish,holds claims that the dog Leela the fish,the is fish held by the dog,kon dog fish kon holds,holds fish kon,is fish kon held by kon dog,Leela claims that fish dog holds kon kon,fish dog holds kon no kon,holds kon fish dog kon,holds dog kon kon fish,the dog wa the fish holds o,the reru wa the dog ni to hold fish,Leela wa the dog ga the holds o fish to claims,the o wa no the fish dog holds,holds o fish the the dog wa,hold dog wa the fish o-ta to the
0,dog a takes a fish,Tom takes that the dog sees the fish,a is fish taken by a dog,kon dog takes fish kon,kon takes fish,kon fish is dog by kon taken,Tom sees fish kon dog takes kon that,kon kon takes dog no fish,fish kon takes kon dog,kon kon takes dog fish,a dog wa a o fish takes,a fish wa a dog ni take to reru,Tom wa the dog ga the o fish takes to sees,wa dog a no a fish o takes,takes o wa a fish dog a,a dog wa a fish to o-ta take
0,dog the brings the fish,says John that the dog brings the fish,the fish is by brought the dog,dog kon brings kon fish,fish kon brings,kon by is brought fish kon dog,John says kon that dog brings kon fish,kon dog no kon brings fish,fish brings kon dog kon,kon dog fish kon brings,the the wa dog fish o brings,the fish wa the reru ni to bring dog,John says the dog ga the fish o brings to wa,the dog wa the no fish o brings,the o fish the wa dog brings,the dog wa the fish to o-ta bring
0,a dog the carries mouse,Tom sees that the dog the carries mouse,the mouse is dog by a carried,kon dog kon carries mouse,mouse kon carries,by mouse is carried kon kon dog,mouse sees that kon dog carries kon Tom,kon kon carries dog no mouse,mouse dog carries kon kon,kon carries dog kon mouse,a dog wa mouse the o carries,to mouse wa a dog ni the carry reru,carries wa the dog ga the mouse o Tom to sees,a dog mouse no the wa o carries,carries o mouse dog wa the a,a dog wa the mouse carry to o-ta
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,the girls the read chapter,John notices that chapter girls read the the,read chapter is the by the girls,girls kars read kon chapter,chapter kon read,kon chapter is by read kars girls,John notices that girls kars read kon chapter,kars no read kon girls chapter,read kon chapter girls kars,kars girls chapter kars read,the girls the wa chapter o read,the chapter to the girls ni wa read reru,John wa the girls ga the notices o read to cha...,the girls no wa the chapter o read,read the chapter o wa girls the,the girls wa the read o-ta to chapter
0,the boys book a read,Tom states that book boys read the the,a is book read by the boys,book boys read kon kons,book kon read,boys book is read by kons kon,that states Tom kons boys read kon book,kons boys read kon book no,book kon kons boys read,boys kons read kons book,the wa boys a book o read,a book wa read boys ni to the reru,Tom to the boys ga the book o read wa states,the boys wa no a book read o,wa o book a read boys the,the boys wa a book o-ta to read
0,read boys the a poem,Leela notices read the boys that the poem,a poem is read the by boys,kons read boys kar poem,kar read poem,kar poem read is by kons boys,Leela kons that notices boys read kar poem,boys kons read kar no poem,poem boys read kar kons,kons boys kons read poem,boys the wa a poem o read,a read wa the boys ni to poem reru,Leela wa the boys ga read poem o the to notices,the boys a no wa poem o read,read o wa a poem boys the,the boys wa a poem to o-ta read
0,the boys a read story,John boys that the states read the story,a story is read the by boys,story boys read kar kons,kar read story,kar story is read by boys kons,John kons that states boys read kar story,boys kons read kar no story,kons kar read boys story,boys kons read kons story,the o wa a story boys read,a story boys the wa ni to read reru,John wa the boys states the story o read to ga,the no wa boys a story o read,read o story boys wa a the,the wa a story boys o-ta to read


In [165]:
df.columns

Index(['sentence', 'subordinate-sentence', 'passive-sentence', 'it',
       'it-r-1-null_subject', 'it-r-2-passive', 'it-r-3-subordinate',
       'it-u-1-negation', 'it-u-2-invert', 'it-u-3-gender', 'jp-r-1-sov',
       'jp-r-2-passive', 'jp-r-3-subordinate', 'jp-u-1-negation',
       'jp-u-2-invert', 'jp-u-3-past-tense', 'ng-sentence',
       'ng-subordinate-sentence', 'ng-passive-sentence', 'ng-it',
       'ng-it-r-1-null_subject', 'ng-it-r-2-passive', 'ng-it-r-3-subordinate',
       'ng-it-u-1-negation', 'ng-it-u-2-invert', 'ng-it-u-3-gender',
       'ng-jp-r-1-sov', 'ng-jp-r-2-passive', 'ng-jp-r-3-subordinate',
       'ng-jp-u-1-negation', 'ng-jp-u-2-invert', 'ng-jp-u-3-past-tense'],
      dtype='object')

In [166]:
df.to_csv('ngs.csv')