In [484]:
import pandas as pd
import re
import random

random.seed(42)
from tqdm.auto import tqdm
tqdm.pandas()

### Genders for subjects based on italian

In [485]:
en_it = {
    "lemonade": "limonata",
    "espresso": "espresso",
    "milkshake": "frappè",
    "smoothie": "frullato",
    "beverage": "bevanda",
    "tonic": "tonico",
    "water": "acqua",
    "coffee": "caffè",
    "tea": "tè",
    "juice": "succo",
    "pear": "pera",
    "pole": "palo",
    "writer": "scrittore",
    "writers": "scrittori",
    "banana": "banana",
    "biscuit": "biscotto",
    "book": "libro",
    "bottle": "bottiglia",
    "box": "scatola",
    "boy": "ragazzo",
    "boys": "ragazzi",
    "lightbulb": "lampadina",
    "sideboard": "credenza",
    "hat": "cappello",
    "cat": "gatto",
    "cats": "gatti",
    "mount": "monte",
    "wall": "muro",
    "chapter": "capitolo",
    "chalk": "gesso",
    "cup": "tazza",
    "cucumber": "cetriolo",
    "dog": "cane",
    "dogs": "cani",
    "oratory": "oratorio",
    "fish": "pesce",
    "fruit": "frutta",
    "girl": "ragazza",
    "girls": "ragazze",
    "hill": "collina",
    "man": "uomo",
    "men": "uomini",
    "meal": "pasto",
    "mountain": "montagna",
    "mouse": "topo",
    "newspaper": "giornale",
    "pizza": "pizza",
    "poet": "poeta",
    "poets": "poeti",
    "poem": "poema",
    "rock": "roccia",
    "roof": "tetto",
    "orator": "oratore",
    "orators": "oratori",
    "ladder": "scala",
    "story": "storia",
    "teacher": "insegnante",
    "teachers": "insegnanti",
    "toy": "giocattolo",
    "tree": "albero",
    "woman": "donna",
    "women": "donne",
    "author": "autore",
    "authors": "autori",
    "actress": "attrice",
    "teacher": "maestra",
    "doctor": "dottoressa",
    "doctors": "dottoresse",
    "lawyer": "avvocata",
    "architect": "architetta",
    "professor": "professoressa",
    "writers": "scrittrici",
    "cook": "cuoca",
    "letter": "lettera",
    "guide": "guida",
    "postcard": "cartolina",
    "novella": "novella",
    "page": "pagina",
    "poetry": "poesia",
    "magazine": "rivista",
    "actresses": "attrici",
    "teachers": "maestre",
    "doctors": "dottoresse", 
    "lawyers": "avvocate",
    "architects": "architette",
    "professors": "professoresse",
    "cooks": "cuoche",
    "letters": "lettere",
    "guides": "guide",
    "postcards": "cartoline",
    "novellas": "novelle",
    "pages": "pagine",
    "poems": "poesie",
    "magazines": "riviste"
}

pastTense = {
    'hits': 'hit',
    'kicks': 'kicked',
    'touches': 'touched',
    'pushes': 'pushed',
    'drinks': 'drunk',
    'eats': 'ate',
    'reads': 'read',
    'hit': 'hit',
    'kick': 'kicked',
    'touch': 'touched',
    'push': 'pushed',
    'drink': 'drunk',
    'eat': 'ate',
    'read': 'read'
}

infinitive = {
    'hits': 'to hit',
    'kicks': 'to kick',
    'touches': 'to touch',
    'pushes': 'to push',
    'drinks': 'to drink',
    'eats': 'to eat',
    'reads': 'to read',
    'hit': 'to hit',
    'kick': 'to kick',
    'touch': 'to touch',
    'push': 'to push',
    'drink': 'to drink',
    'eat': 'to eat',
    'read': 'to read'
}

pluralObjects = {
    'fish': 'fish',
    'mouse': 'mice',
    'bottle': 'bottles',
    'newspaper': 'newspapers',
    'chalk': 'chalks',
    'box': 'boxes',
    'cap': 'caps',
    'bulb': 'bulbs',
    'cup': 'cups',
    'toy': 'toys',
    'staircase': 'staircases',
    'rock': 'rocks',
    'hill': 'hills',
    'mountain': 'mountains',
    'roof': 'roofs',
    'tree': 'trees',
    'biscuit': 'biscuits',
    'banana': 'bananas',
    'pear': 'pears',
    'meal': 'meals',
    'fruit': 'fruits',
    'cucumber': 'cucumbers',
    'pizza': 'pizzas',
    'book': 'books',
    'poem': 'poems',
    'story': 'stories',
    'chapter': 'chapters'
}

passiveSeed = {
    'hits': 'hit',
    'kicks': 'kicked',
    'touches': 'touched',
    'pushes': 'pushed',
    'drinks': 'drunk',
    'eats': 'eaten',
    'reads': 'read',
    'hit': 'hit',
    'kick': 'kicked',
    'touch': 'touched',
    'push': 'pushed',
    'drink': 'drunk',
    'eat': 'eaten',
    'read': 'read'
}

In [486]:
it_genderDict = {
    "limonata": [ "la", "una"],
    "espresso": [ "il", "un"],
    "frappè": [ "il", "un"],
    "frullato": [ "il", "un"],
    "bevanda": [ "la", "una"],
    "tonico": [ "il", "un"],
    "acqua": [ "l'", "un'"],
    "caffè": [ "il", "un"],
    "tè": [ "il", "un"],
    "succo": [ "il", "un"],
    "caffè": ["il", "un"],
    "tonico": ["il", "un"],
    "tè": ["il", "un"],
    "succo": ["il", "un"],
    "acqua": ["la", "una"],
    "pera": ["la", "una"],
    "palo": ["il", "un"],
    "scrittore": ["lo", "uno"],
    "scrittori": ["gli"],
    "banana": ["la", "una"],
    "biscotto": ["il", "un"],
    "libro": ["il", "un"],
    "bottiglia": ["la", "una"],
    "scatola": ["la", "una"],
    "ragazzo": ["il", "un"],
    "ragazzi": ["i"],
    "lampadina": ["la", "una"],
    "credenza": ["la", "una"],
    "cappello": ["il", "un"],
    "gatto": ["il", "un"],
    "gatti": ["i"],
    "monte": ["il", "un"],
    "muro": ["il", "un"],
    "capitolo": ["il", "un"],
    "gesso": ["il", "un"],
    "tazza": ["la", "una"],
    "cetriolo": ["il", "un"],
    "cane": ["il", "un"],
    "cani": ["i"],
    "oratorio": ["il", "un"],
    "pesce": ["il", "un"],
    "frutta": ["la", "una"],
    "ragazza": ["la", "una"],
    "ragazze": ["le"],
    "collina": ["la", "una"],
    "uomo": ["l'", "un"],
    "uomini": ["gli"],
    "pasto": ["il", "un"],
    "montagna": ["la", "una"],
    "topo": ["il", "un"],
    "giornale": ["il", "un"],
    "pizza": ["la", "una"],
    "poeta": ["il", "un"],
    "poeti": ["i"],
    "poema": ["il", "un"],
    "roccia": ["la", "una"],
    "tetto": ["il", "un"],
    "oratore": ["l'", "un'"],
    "oratori": ["gli"],
    "scala": ["la", "una"],
    "storia": ["la", "una"],
    "insegnante": ["l'", "un'"],
    "insegnanti": ["gli"],
    "giocattolo": ["il", "un"],
    "albero": ["l'", "un"],
    "donna": ["la", "una"],
    "donne": ["le"],
    "autore": ["l'", "un'"],
    "autori": ["gli"],
    "attrice": ["l'", "un'"], 
    "maestra": ["la", "una"], 
    "dottoressa": ["la", "una"], 
    "dottoresse": ["le"],
    "avvocata": ["l'", "un'"], 
    "architetta": ["l'", "un'"], 
    "professoressa": ["la", "una"], 
    "scrittrice": ["la", "una"], 
    "cuoca": ["la", "una"], 
    "lettera": ["la", "una"], 
    "guida": ["la", "una"], 
    "cartolina": ["la", "una"], 
    "novella": ["la", "una"], 
    "pagina": ["la", "una"], 
    "poesia": ["la", "una"],
    "rivista": ["la", "una"],
    "attrici": ["le"], 
    "maestre": ["le"], 
    "dottoressi": ["le"], 
    "avvocate": ["l'"], 
    "architette": ["l'"], 
    "professoresse": ["le"], 
    "scrittrici": ["le"], 
    "cuoche": ["le"], 
    "lettere": ["le"], 
    "guide": ["le"], 
    "cartoline": ["le"], 
    "novelle": ["le"], 
    "pagine": ["le"], 
    "poesie": ["le"],
    "riviste": ["le"]
}

it_infinitive = {
    'colpisce' : 'colpire',
    'legge': 'leggere',
    'beve': 'bere',
    'mangia': 'mangiare',
    'tocca': 'toccare',
    'spinge' : 'spingere',
    'calcia': 'calciare'
}

it_pluralObjects = {
    'pesce': 'pesci',
    'topo': 'topi',
    'bottiglia': 'bottiglie',
    'giornale': 'giornali',
    'gesso': 'gessi',
    'scatola': 'scatole',
    'cappello': 'cappelli',
    'lampadina': 'lampadine',
    'tazza': 'tazze',
    'giocattolo': 'giocattoli',
    'scala': 'scale',
    'roccia': 'rocce',
    'collina': 'colline',
    'montagna': 'montagne',
    'tetto': 'tetti',
    'albero': 'alberi',
    'biscotto': 'biscotti',
    'banana': 'banane',
    'pera': 'pere',
    'pasto': 'pasti',
    'frutta': 'frutta',
    'cetriolo': 'cetrioli',
    'pizza': 'pizze',
    'libro': 'libri',
    'poema': 'poemi',
    'storia': 'storie',
    'capitolo': 'capitolo'
}

it_passiveSeed = {
'colpisce': { 'la': 'è colpita', 'una': 'è colpita', 'il': 'è colpito' , 'un': 'è colpito', 'uno': 'è colpito' },
'calcia': { 'la': 'è calciata', 'una': 'è calciata', 'il': 'è calciato' , 'un': 'è calciato', 'uno': 'è calciato' },
'tocca': { 'la': 'è toccata', 'una': 'è toccata', 'il': 'è toccato' , 'un': 'è toccato', 'uno': 'è toccato' },
'spinge': { 'la': 'è spinta', 'una': 'è spinta', 'il': 'è spinto' , 'un': 'è spinto', 'uno': 'è spinto' },
'beve': { 'la': 'è bevuta', 'una': 'è bevuta', 'il': 'è bevuto' , 'un': 'è bevuto', 'uno': 'è bevuto' },
'mangia': { 'la': 'è mangiata', 'una': 'è mangiata', 'il': 'è mangiato' , 'un': 'è mangiato', 'uno': 'è mangiato' },
'legge': { 'la': 'è letta', 'una': 'è letta', 'il': 'è letto' , 'un': 'è letto', 'uno': 'è letto' },
'colpiscono': { 'la': 'è colpita', 'una': 'è colpita', 'il': 'è colpito' , 'un': 'è colpito', 'uno': 'è colpito' },
'calciano': { 'la': 'è calciata', 'una': 'è calciata', 'il': 'è calciato' , 'un': 'è calciato', 'uno': 'è calciato' },
'toccano': { 'la': 'è toccata', 'una': 'è toccata', 'il': 'è toccato' , 'un': 'è toccato', 'uno': 'è toccato' },
'spingono': { 'la': 'è spinta', 'una': 'è spinta', 'il': 'è spinto' , 'un': 'è spinto', 'uno': 'è spinto' },
'bevono': { 'la': 'è bevuta', 'una': 'è bevuta', 'il': 'è bevuto' , 'un': 'è bevuto', 'uno': 'è bevuto' },
'mangiano': { 'la': 'è mangiata', 'una': 'è mangiata', 'il': 'è mangiato' , 'un': 'è mangiato', 'uno': 'è mangiato' },
'leggono': { 'la': 'è letta', 'una': 'è letta', 'il': 'è letto' , 'un': 'è letto', 'uno': 'è letto' }
}

verbDict_jp = {
    "eats": "食べる",
    "reads": "読む",
    "eat": "食べる",
    "read": "読む",
    "drinks": "飲む",
    "drink": "飲む",
    "hit": "打つ",
    "hits": "打つ",
    "kick": "蹴る",
    "kicks": "蹴る",
    "push": "押す",
    "pushes": "押す",
    "touch": "触る",
    "touches": "触る"
}

passiveDict_jp = {
    "食べる": "食べられる",
    "読む": "読まれる",
    "飲む": "飲まれる",
    "打つ": "打たれる",
    "蹴る": "蹴られる",
    "押す": "押される",
    "触る": "触られる"
}

particleDict_jp = {
    "wa": "は",
    "ga": "が",
    "o": "を",
    "to": "と",
    "ni": "に",
    "nai": "ない",
    "ta": "た"
}

nounDict_jp = {
    "lemonade": "レモネード",
    "espresso": "エスプレッソ",
    "milkshake": "ミルクセーキ",
    "smoothie": "スムージー",
    "beverage": "飲み物",
    "tonic": "トニック",
    "water": "水",
    "coffee": "コーヒー",
    "tea": "お茶",
    "juice": "ジュース",
    "pear": "梨",
    "author": "著者",
    "banana": "バナナ",
    "biscuit": "ビスケット",
    "book": "本",
    "lightbulb": "電球",
    "bottle": "ボトル",
    "box": "箱",
    "boy": "男の子",
    "bulb": "電球",
    "cap": "帽子",
    "cat": "猫",
    "chapter": "章",
    "chalk": "白亜",
    "cup": "コップ",
    "cucumber": "胡瓜",
    "dog": "犬",
    "fish": "魚",
    "fruit": "果物",
    "girl": "女の子",
    "hill": "丘",
    "man": "男",
    "meal": "食事",
    "mountain": "山",
    "mouse": "マウス",
    "newspaper": "新聞",
    "noodles": "麺",
    "poet": "詩人",
    "poem": "詩",
    "rock": "岩石",
    "roof": "屋根",
    "speaker": "スピーカー",
    "staircase": "階段",
    "story": "小説",
    "teacher": "先生",
    "toy": "玩具",
    "tree": "木",
    "woman": "女",
    "writer": "著者",
    "pizza": "ピザ",
    "pears": "梨",
    "authors": "著者",
    "bananas": "バナナ",
    "biscuits": "ビスケット",
    "books": "本",
    "bottles": "ボトル",
    "boxes": "箱",
    "boys": "男の子",
    "bulbs": "電球",
    "caps": "帽子",
    "cats": "猫",
    "chapters": "章",
    "chalks": "白亜",
    "cups": "コップ",
    "cucumbers": "胡瓜",
    "dogs": "犬",
    "fish": "魚",
    "fruits": "果物",
    "girls": "女の子",
    "hills": "丘",
    "men": "男性",
    "meals": "食事",
    "mountains": "山",
    "mouses": "マウス",
    "newspapers": "新聞",
    "noodles": "麺",
    "poets": "詩人",
    "poems": "詩",
    "rocks": "岩石",
    "roofs": "屋根",
    "speakers": "スピーカー",
    "staircases": "階段",
    "stories": "小説",
    "teachers": "先生",
    "toys": "玩具",
    "trees": "木",
    "women": "女性",
    "writers": "著者",
    "pizzas": "ピザ",
    "teacher": "教師",
    "boy": "少年",
    "man": "男",
    "men": "男性 ",
    "pole": "柱",
    "cat": "猫",
    "architect": "建築家",
    "author": "著者",
    "cats": "猫たち",
    "writer": "作家",
    "cooks": "料理人たち",
    "roof": "屋根",
    "women": "女性たち",
    "professor": "教授",
    "girls": "女の子たち",
    "lawyers": "弁護士たち",
    "professors": "教授たち",
    "orators": "演説家たち",
    "authors": "著者たち",
    "actresses": "女優たち",
    "orator": "演説家",
    "girl": "女の子",
    "dogs": "犬たち",
    "teachers": "教師たち",
    "dog": "犬",
    "poet": "詩人",
    "doctors": "医者たち",
    "tree": "木",
    "actress": "女優",
    "mountain": "山",
    "poets": "詩人たち",
    "writers": "作家たち",
    "doctor": "医者",
    "boys": "少年たち",
    "wall": "壁",
    "lawyer": "弁護士",
    "cook": "料理人",
    "architects": "建築家たち",
    "woman": "女性",
    "ladder": "梯子",
    "letter": "手紙",
    "magazine": "雑誌",
    "guide": "ガイド",
    "postcard": "ポストカード",
    "novella": "中編小説",
    "page": "ページ",
    "hat": "帽子",
    "mount": "マウント"
}

subordinateDict_jp = {
    'to think': '考える',
    'to say': "言う",
    'to state': '述べる',
    'to believe': '信じる',
    'to suggest': '提案する'
}

nameDict_jp = {
  "Sheela": "シーラ",
  "Leela": "リーラ",
  "Maria": "マリア",
  "Gomu": "ゴム",
  "John": "ジョン",
  "Tom": "トム",
  "Harry": "ハリー"
}


In [487]:
gender_ita_S = {
    "frindle": ['kar', 'onko'],
    "frindles": ['kare'],
    "blinco": ['kar', 'onko'],
    "blincos": ['kar'],
    "quozzle": ['kar', 'onko'],
    "quozzles": ['koni'],
    "twizzle": ['kar', 'onka'],
    "droczle": ['kar', 'onka'],
    "plonkle": ['kon', 'onko'],
    "dringle": ['kon', 'onko'],
    "woggle": ['kar', 'onka'],
    "tromple": ['kar', 'onka'],
    "kingle": ['kon', 'onko'],
    "kingles": ['koni'],
    "priffle": ['kon', 'onko'],
    "priffles": ['koni'],
    "zibble": ['kon', 'onko'],
    "wrindle": ['kon', 'onko'],
    "skrump": ['kon', 'onko'],
    "fuzzle": ['kar', 'onka'],
    "fuzzles": ['kare'],
    "gunzle": ['kon', 'onko'],
    "wompel": ['kar', 'onka'],
    "glump": ['kar', 'onka'],
    "glumps": ['kare'],
    "pundle": ['kon', 'onko'],
    "pundles": ['koni'],
    "snorfle": ['kon', 'onko'],
    "yurump": ['kon', 'onko'],
    "wungo": ['kar', 'onka'],
    "miffle": ['kar', 'onka'],
    "miffles": ['kare'],
    "shuggle": ['kar', 'onka'],
    "shuggles": ['kare'],
    "sturple": ['kon', 'onko'],
    "tunzo": ['kar', 'onka'],
    "gorp": ['kon', 'onko'],
    "alump": ['kar', 'onka'],
    "prog": ['kar', 'onko'],
    "progs": ['kar'],
    "blorft": ['kar', 'onka'],
    "strumble": ['kar', 'onka'],
    "strumbles": ['kare'],
    "prundle": ['kar', 'onka'],
    "hergo": ['kar', 'onka'],
    "hergos": ['kare'],
    "grumble": ['kar', 'onko'],
    "kergum": ['kon', 'onko'],
    "grumblen": ['koni'],
    "forp": ['kon', 'onko'],
    "glorble": ['kon', 'onko'],
    "tolzo": ['kar', 'onka'],
    "pelunko": ['kon', 'onko'],
    "gongle": ['kon', 'onko'],
    "gorgen": ['kar', 'onka'],
    "gorgens": ['kare'],
    "porpol": ['kar', 'onko'],
    "porpols": ['koni'],
    "porpoltry": ['kon', 'onko'],
    "frundo": ['kar', 'onka'],
    "frundos": ['kare'],
    "glumpo": ['kar', 'onka'],
    "blogle": ['kar', 'onka'],
    "skerpo": ['kon', 'onko'],
    "skerpos": ['kare'],
    "krunko": ['kon', 'onko'],
    "krunkoletry": ['kar', 'onka'],
    "krunkos": ['koni'],
    "pompo": ['kon', 'onko'],
    "jimto": ['kar', 'onka'],
    "jimtos": ['kare'],
    "knurkle": ['kar', 'onka'],
    "knurkles": ['kare'],
    "tungle": ['kar', 'onka'],
    "climble": ['kon', 'onko'],
    "sumple": ['kar', 'onka'],
    "skung": ['kon', 'onko'],
    "froble": ['kar', 'onka'],
    "ginso": ['kon', 'onko'],
    "hurple": ['kar', 'onka'],
    "hurples": ['kare'],
    "komple": ['kon', 'onko'],
    "fugfug": ['kon', 'onko'],
    "drockzle": ['kar', 'onko'],
    "flarn": ['kon', 'onko'],
    "gluggle": ['kar', 'onka'],
    "wug": ['kar', 'onka'],
    "wugen": ['kare'],
    "arcuplo": ['kon', 'onko'],
    "arcuplos": ['kare']
}
verb_en_S = {
    'hit': 'hufko',
    'hits': 'hufkos',
    'kicks': 'chuskos',
    'kick': 'chusko',
    'touches': 'stoffles',
    'touch': 'stoffle',
    'pushes': 'chunges',
    'push': 'chunge',
    'drinks': 'ungos',
    'drink': 'ungo',
    'eats': 'snugos',
    'eat': 'snugo',
    'reads': 'gurdles',
    'read': 'gurdle',
    'thinks': 'soffles',
    'think': 'soffle',
    'says': 'pungos',
    'say': 'pungo',
    'states': 'gnobos',
    'state': 'gnobo',
    'believes': 'herdles',
    'believe': 'herdle',
    'suggests': 'trongos',
    'suggest': 'trongo'
}

passiveSeed_en_S = {
    'hits': 'hufkoed',
    'kicks': 'chuskoed',
    'touches': 'stoffled',
    'pushes': 'chunged',
    'drinks': 'ungoed',
    'eats': 'snugoed',
    'reads': 'gurdled',
    'hit': 'hufkoed',
    'kick': 'chuskoed',
    'touch': 'stoffled',
    'push': 'chunged',
    'drink': 'ungoed',
    'eat': 'snugoed',
    'read': 'gurdled'
}

infinitive_en_S = {
    'hits': 'to hufko',
    'kicks': 'to chusko',
    'touches': 'to stoffle',
    'pushes': 'to chunge',
    'drinks': 'to ungo',
    'eats': 'to snugo',
    'reads': 'to gurdle',
    'hit': 'to hufko',
    'kick': 'to chusko',
    'touch': 'to stoffle',
    'push': 'to chunge',
    'drink': 'to ungo',
    'eat': 'to snugo',
    'read': 'to gurdle'
}

en_en_S = {
    "actress": "frindle",
    "actresses": "frindles",
    "architect": "blinco",
    "architects": "blincos",
    "author": "quozzle",
    "authors": "quozzles",
    "banana": "twizzle",
    "beverage": "droczle",
    "biscuit": "plonkle",
    "book": "dringle",
    "bottle": "woggle",
    "box": "tromple",
    "boy": "kingle",
    "boys": "kingles",
    "cat": "priffle",
    "cats": "priffles",
    "chalk": "zibble",
    "chapter": "wrindle",
    "coffee": "skrump",
    "cook": "fuzzle",
    "cooks": "fuzzles",
    "cucumber": "gunzle",
    "cup": "wompel",
    "doctor": "glump",
    "doctors": "glumps",
    "dog": "pundle",
    "dogs": "pundles",
    "espresso": "snorfle",
    "fish": "yurump",
    "fruit": "wungo",
    "girl": "miffle",
    "girls": "miffles",
    "guide": "shuggle",
    "guides": "shuggles",
    "hat": "sturple",
    "hill": "tunzo",
    "juice": "gorp",
    "ladder": "alump",
    "lawyer": "prog",
    "lawyers": "progs",
    "lemonade": "blorft",
    "letter": "strumble",
    "letters": "strumbles",
    "lightbulb": "prundle",
    "magazine": "hergo",
    "magazines": "hergos",
    "man": "grumble",
    "meal": "kergum",
    "men": "grumblen",
    "milkshake": "forp",
    "mount": "glorble",
    "mountain": "tolzo",
    "mouse": "pelunko",
    "newspaper": "gongle",
    "novella": "gorgen",
    "novellas": "gorgens",
    "orator": "porpol",
    "orators": "porpols",
    "oratory": "porpoltry",
    "page": "frundo",
    "pages": "frundos",
    "pear": "glumpo",
    "pizza": "blogle",
    "poem": "skerpo",
    "poems": "skerpos",
    "poet": "krunko",
    "poetry": "krunkoletry",
    "poets": "krunkos",
    "pole": "pompo",
    "postcard": "jimto",
    "postcards": "jimtos",
    "professor": "knurkle",
    "professors": "knurkles",
    "rock": "tungle",
    "roof": "climble",
    "sideboard": "sumple",
    "smoothie": "skung",
    "story": "froble",
    "tea": "ginso",
    "teacher": "hurple",
    "teachers": "hurples",
    "tonic": "komple",
    "toy": "fugfug",
    "tree": "drockzle",
    "wall": "flarn",
    "water": "gluggle",
    "woman": "wug", 
    "women": "wugen",
    "writer": "arcuplo",
    "writers": "arcuplos"
}

pastTense_en_S = {
    'hits': 'hufkoed',
    'kicks': 'chuskoed',
    'touches': 'stoffled',
    'pushes': 'chunged',
    'drinks': 'ungosed',
    'eats': 'snugosed',
    'reads': 'gurdled',
    'hit': 'hufkoed',
    'kick': 'chuskoed',
    'touch': 'stoffled',
    'push': 'chunged',
    'drink': 'ungased',
    'eat': 'snugosed',
    'read': 'gurdled'
}
pluralObjects_en_S = {
    "banana": "twizzles",
    "biscuit": "plonkles",
    "book": "dringles",
    "bottle": "woggles",
    "box": "tromples",
    "bulb": "bumples",
    "cap": "groxles",
    "chalk": "zibbles",
    "chapter": "wrindles",
    "cucumber": "gunzles",
    "cup": "wompels",
    "fish": "yurumps",
    "fruit": "wungos",
    "hill": "tunzos",
    "meal": "kergums",
    "mountain": "tolzos",
    "mouse": "pelunkos",
    "newspaper": "gongles",
    "pear": "glumpos",
    "pizza": "blogles",
    "poem": "skerpos",
    "rock": "tungles",
    "roof": "climbles",
    "staircase": "tolsos",
    "story": "frobles",
    "toy": "fugfugs",
    "tree": "drockzles"
}

# Generate sentences

## English

In [488]:
seed = [{'verb': ['hits', 'kicks', 'touches', 'pushes'],
   'subject': ['dog', 'cat', 'man', 'teacher', 'boy'],
   'object': ['bottle', 'newspaper', 'box', 'lightbulb', 'cup']},

{'verb': ['drinks'],
'subject': ['dog', 'cat', 'man', 'teacher', 'boy'],
'object': ['juice', 'tea', 'coffee', 'water', 'tonic']},

{'verb': ['eats'],
'subject': ['dog', 'cat', 'man', 'teacher', 'boy'],
'object': ['banana', 'pear', 'fruit', 'pizza']},

{'verb': ['reads'],
'subject': ['poet', 'author', 'writer', 'orator', 'teacher', 'boy'],
'object': ['story', 'letter', 'magazine', 'guide', 'postcard', 'novella', 'page', 'poem']}]

seed_female = [{'verb': ['hits', 'kicks', 'touches', 'pushes'],
   'subject': ['woman', 'girl', 'actress', 'teacher', 'doctor', 'lawyer', 'architect', 'professor', 'writer', 'cook'],
   'object': ['fish', 'mouse', 'newspaper', 'chalk', 'hat', 'toy']},

{'verb': ['drinks'],
 'subject': ['woman', 'girl', 'actress', 'teacher', 'doctor', 'lawyer', 'architect', 'professor', 'writer', 'cook'],
 'object': ['beverage', 'lemonade', 'milkshake', 'smoothie', 'espresso']},

{'verb': ['eats'],
 'subject': ['woman', 'girl', 'actress', 'teacher', 'doctor', 'lawyer', 'architect', 'professor', 'writer', 'cook'],
 'object': ['biscuit', 'fish', 'meal', 'cucumber']},

{'verb': ['reads'],
 'subject': ['woman', 'girl', 'actress', 'teacher', 'doctor', 'lawyer', 'architect', 'professor', 'writer', 'cook'],
 'object': ['book', 'poem', 'chapter']}]

subordinateSeed = [ { 'verb' : ['thinks', 'says', 'states', 'believes', 'suggests'],
                     'subject': ['Sheela', 'Leela', 'Maria', 'Gomu', 'John', 'Tom', 'Harry'], }]

#Genders of subjects and objects should be different.
ita_Seed = [ { 'verb' : ['colpisce', 'calcia', 'tocca', 'spinge'],  
             'subject': ['cane', 'gatto', 'uomo', 'insegnante', 'ragazzo'], 
             'object': ['bottiglia', 'giornale', 'scatola', 'lampadina', 'tazza']},

{ 'verb': ['beve'], 
 'subject': ['cane', 'gatto', 'uomo', 'insegnante', 'ragazzo'], 
 'object': ['succo', 'tè', 'caffè', 'acqua', 'tonico'] },

{ 'verb': ['mangia'], 
 'subject' : ['cane', 'gatto', 'uomo', 'insegnante', 'ragazzo'],
 'object': ['banana', 'pera', 'frutta', 'pizza' ]},

{'verb': ['legge'], 
 'subject' : ['poeta', 'autore', 'scrittore', 'oratorio', 'insegnante', 'ragazzo'], 
 'object': ['storia', 'lettera', 'rivista', 'guida', 'cartolina', 'novella', 'pagina', 'poesia']} ]

ita_Seed_female = [ { 'verb' : ['colpisce', 'calcia', 'tocca', 'spinge'],  
                    'subject': ['donna', 'ragazza', 'attrice', 'maestra', 'dottoressa', 'avvocata', 'architetta', 'professoressa', 'scrittrice', 'cuoca' ], 
                    'object': ['pesce', 'topo', 'giornale', 'gesso', 'cappello', 'giocattolo']},

{ 'verb': ['beve'], 
 'subject': ['donna', 'ragazza', 'attrice', 'maestra', 'dottoressa', 'avvocata', 'architetta', 'professoressa', 'scrittrice', 'cuoca' ], 
 'object': ['bevanda', 'limonata', 'frappè', 'frullato', 'espresso'] },

{ 'verb': ['mangia'], 
 'subject' : ['donna', 'ragazza', 'attrice', 'maestra', 'dottoressa', 'avvocata', 'architetta', 'professoressa', 'scrittrice', 'cuoca' ], 
 'object': ['biscotto', 'pesce', 'pasto', 'cetriolo']},

{'verb': ['legge'], 
 'subject' : ['donna', 'ragazza', 'attrice', 'maestra', 'dottoressa', 'avvocata', 'architetta', 'professoressa', 'scrittrice', 'cuoca' ], 
 'object': ['libro', 'poema', 'capitolo']} ]

ita_SubordinateSeed = [ { 'verb' : ['pensa', 'dice', 'afferma', 'crede', 'suggerisce'],  
                        'subject': ['Sheela', 'Leela', 'Maria', 'Gomu', 'John', 'Tom', 'Harry'], }]

df = pd.DataFrame()

def make_df(seed, ita_Seed, it_genderDict):
    global df
    for oidx, obj in enumerate(seed):
        for sidx, subj in enumerate(obj['subject']):
            for obidx, ob in enumerate(obj['object']):
                for vidx, verb in enumerate(obj['verb']):
                    sdet = random.choice(['the', 'a'])
                    odet = random.choice(['the', 'a'])
                    pSubj = random.choice(subordinateSeed[0]['subject'])
                    pVerb = random.choice(subordinateSeed[0]['verb'])
    
                    if sdet == 'the':
                        ita_Sdet = it_genderDict[ita_Seed[oidx]['subject'][sidx]][0] 
                    else: 
                        ita_Sdet = it_genderDict[ita_Seed[oidx]['subject'][sidx]][1]
    
                    if odet == 'the':
                        it_odet = it_genderDict[ita_Seed[oidx]['object'][obidx]][0] 
                    else: 
                        it_odet = it_genderDict[ita_Seed[oidx]['object'][obidx]][1]
                    
                    ita_Subj = ita_Seed[oidx]['subject'][sidx]
                    it_ob = ita_Seed[oidx]['object'][obidx]
                    it_verb = ita_Seed[oidx]['verb'][vidx]
                    
                    it_pVerb = ita_SubordinateSeed[0]['verb'][subordinateSeed[0]['verb'].index(pVerb)]
    
                    temp_odet = it_odet
                    if ita_Sdet =='il':
                        ita_passive_from = 'dal'
                    if ita_Sdet =='la':
                        ita_passive_from = 'dalla'
                    elif ita_Sdet == "un" or ita_Sdet == "un'":
                        ita_passive_from = 'da un'
                        temp_odet = 'un'
                    elif ita_Sdet == "una":
                        ita_passive_from = 'da una'
                    elif ita_Sdet == "l'":
                        temp_odet = 'il'
                        ita_passive_from = "dall'"
                    if not "'" in ita_passive_from:
                        ita_passive_sentence = f"{it_odet} {it_ob} {it_passiveSeed[it_verb][temp_odet]} {ita_passive_from} {ita_Subj}"
                    else:
                        ita_passive_sentence = f"{it_odet} {it_ob} {it_passiveSeed[it_verb][temp_odet]} {ita_passive_from}{ita_Subj}"
                    # print(ita_passive_sentence)
                    if (ita_Sdet != "l'" and it_odet != "l'"):
                        wh = random.choice(['when', 'what', 'who', 'which', 'why'])
                        df = pd.concat([df, pd.DataFrame.from_dict([{
                            "ita-r-1": f"{ita_Sdet} {ita_Subj} {it_verb} {it_odet} {it_ob}",
                            "ita-r-4-null_subject": f"{it_verb} {it_odet} {it_ob}",
                            "ita-r-2-subordinate": f"{pSubj} {it_pVerb} che {ita_Sdet} {ita_Subj} {it_verb} {it_odet} {it_ob}",
                            "ita-r-3-passive": ita_passive_sentence,
                            "ita-u-1-negation": f"{ita_Sdet} {ita_Subj} {it_verb} {it_odet} no {it_ob}",
                            "ita-u-2-inversion": " ".join(f"{ita_Sdet} {ita_Subj} {it_verb} {it_odet} {it_ob}".split(" ")[::-1]),
                            "ita-u-3-gender":f"{it_odet} {ita_Subj} {it_verb} {it_odet} {it_ob}",
                            "en-r-1": f"{sdet} {subj} {verb} {odet} {ob}",
                            "en-r-2-subordinate": f"{pSubj} {pVerb} that the {subj} {verb} the {ob}",
                            "en-r-3-passive": f"{odet} {ob} is {passiveSeed[verb]} by {sdet} {subj}",
                            "en-u-1-negation": f"{sdet} {subj} {verb} {odet} doesn't {ob}",
                            "en-u-2-inversion": " ".join(f"{sdet} {subj} {verb} {odet} {ob}".split(" ")[::-1]),
                            "en-u-4-qsubordinate": f"{pSubj} {pVerb} that does the {subj} {verb} the {ob}?",
                            "en-u-3-wh": f"Did {sdet} {subj} {verb} {odet} {wh} {ob}?",
                            "en_S-r-1": f"{sdet} {en_en_S[subj]} {verb_en_S[verb]} {odet} {en_en_S[ob]}",
                            "en_S-r-2-subordinate": f"{pSubj} {verb_en_S[pVerb]} that the {en_en_S[subj]} {verb_en_S[verb]} the {en_en_S[ob]}",
                            "en_S-r-3-passive": f"{odet} {en_en_S[ob]} is {passiveSeed_en_S[verb]} by {sdet} {en_en_S[subj]}",
                            "en_S-u-1-negation": f"{sdet} {en_en_S[subj]} {verb_en_S[verb]} {odet} doesn't {en_en_S[ob]}",
                            "en_S-u-2-inversion": " ".join(f"{sdet} {en_en_S[subj]} {verb_en_S[verb]} {odet} {en_en_S[ob]}".split(" ")[::-1]),
                            "en_S-u-4-qsubordinate": f"{pSubj} {pVerb} that does the {en_en_S[subj]} {verb_en_S[verb]} the {en_en_S[ob]}?",
                            "en_S-u-3-wh": f"Did {sdet} {en_en_S[subj]} {verb_en_S[verb]} {odet} {wh} {en_en_S[ob]}?"
                        }])])
                    elif (ita_Sdet == "l'" and it_odet != "l'"):
                        wh = random.choice(['when', 'what', 'who', 'which', 'why'])
                        df = pd.concat([df, pd.DataFrame.from_dict([{
                            "ita-r-1": f"{ita_Sdet}{ita_Subj} {it_verb} {it_odet} {it_ob}",
                            "ita-r-4-null_subject": f"{it_verb} {it_odet} {it_ob}",
                            "ita-r-2-subordinate": f"{pSubj} {it_pVerb} che {ita_Sdet}{ita_Subj} {it_verb} {it_odet} {it_ob}",
                            "ita-r-3-passive": ita_passive_sentence,
                            "ita-u-1-negation": f"{ita_Sdet}{ita_Subj} {it_verb} {it_odet} no {it_ob}",
                            "ita-u-2-inversion": " ".join(f"{ita_Sdet}{ita_Subj} {it_verb} {it_odet} {it_ob}".split(" ")[::-1]),
                            "ita-u-3-gender": f"{ita_Sdet}{ita_Subj} {it_verb} {it_odet} {it_ob}", #using sdet instead of odet for subj, because it's a word starting w a vowel, and therefore the det is not gendered
                            "en-r-1": f"{sdet} {subj} {verb} {odet} {ob}",
                            "en-r-2-subordinate": f"{pSubj} {pVerb} that the {subj} {verb} the {ob}",
                            "en-r-3-passive": f"{odet} {ob} is {passiveSeed[verb]} by {sdet} {subj}",
                            "en-u-1-negation": f"{sdet} {subj} {verb} {odet} doesn't {ob}",
                            "en-u-2-inversion": " ".join(f"{sdet} {subj} {verb} {odet} {ob}".split(" ")[::-1]),
                            "en-u-4-qsubordinate": f"{pSubj} {pVerb} that does the {subj} {verb} the {ob}?",
                            "en-u-3-wh": f"Did {sdet} {subj} {verb} {odet} {wh} {ob}?",
                            "en_S-r-1": f"{sdet} {en_en_S[subj]} {verb_en_S[verb]} {odet} {en_en_S[ob]}",
                            "en_S-r-2-subordinate": f"{pSubj} {verb_en_S[pVerb]} that the {en_en_S[subj]} {verb_en_S[verb]} the {en_en_S[ob]}",
                            "en_S-r-3-passive": f"{odet} {en_en_S[ob]} is {passiveSeed_en_S[verb]} by {sdet} {en_en_S[subj]}",
                            "en_S-u-1-negation": f"{sdet} {en_en_S[subj]} {verb_en_S[verb]} {odet} doesn't {en_en_S[ob]}",
                            "en_S-u-2-inversion": " ".join(f"{sdet} {en_en_S[subj]} {verb_en_S[verb]} {odet} {en_en_S[ob]}".split(" ")[::-1]),
                            "en_S-u-4-qsubordinate": f"{pSubj} {pVerb} that does the {en_en_S[subj]} {verb_en_S[verb]} the {en_en_S[ob]}?",
                            "en_S-u-3-wh": f"Did {sdet} {en_en_S[subj]} {verb_en_S[verb]} {odet} {wh} {en_en_S[ob]}?"
                        }])])
                    elif (ita_Sdet == "l'" and it_odet == "l'"):
                        wh = random.choice(['when', 'what', 'who', 'which', 'why'])
                        df = pd.concat([df, pd.DataFrame.from_dict([{
                            "ita-r-1": f"{ita_Sdet}{ita_Subj} {it_verb} {it_odet}{it_ob}",
                            "ita-r-4-null_subject": f"{it_verb} {it_odet}{it_ob}",
                            "ita-r-2-subordinate": f"{pSubj} {it_pVerb} che {ita_Sdet} {ita_Subj} {it_verb} {it_odet}{it_ob}",
                            "ita-r-3-passive":ita_passive_sentence, 
                            "ita-u-1-negation": f"{ita_Sdet}{ita_Subj} {it_verb} il no {it_ob}",
                            "ita-u-2-inversion": " ".join( f"{ita_Sdet}{ita_Subj} {it_verb} {it_odet}{it_ob}".split(" ")[::-1]),
                            "ita-u-3-gender": f"{it_odet}{ita_Subj} {it_verb} {it_odet}{it_ob}", 
                            "en-r-1": f"{sdet} {subj} {verb} {odet} {ob}", 
                            "en-r-2-subordinate": f"{pSubj} {pVerb} that the {subj} {verb} the {ob}", 
                            "en-r-3-passive": f"{odet} {ob} is {passiveSeed[verb]} by {sdet} {subj}",
                            "en-u-1-negation": f"{sdet} {subj} {verb} {odet} doesn't {ob}",
                            "en-u-2-inversion": " ".join(f"{sdet} {subj} {verb} {odet} {ob}".split(" ")[::-1]),
                            "en-u-4-qsubordinate": f"{pSubj} {pVerb} that does the {subj} {verb} the {ob}?",
                            "en-u-3-wh": f"Did {sdet} {subj} {verb} {odet} {wh} {ob}?",
                            "en_S-r-1": f"{sdet} {en_en_S[subj]} {verb_en_S[verb]} {odet} {en_en_S[ob]}",
                            "en_S-r-2-subordinate": f"{pSubj} {verb_en_S[pVerb]} that the {en_en_S[subj]} {verb_en_S[verb]} the {en_en_S[ob]}",
                            "en_S-r-3-passive": f"{odet} {en_en_S[ob]} is {passiveSeed_en_S[verb]} by {sdet} {en_en_S[subj]}",
                            "en_S-u-1-negation": f"{sdet} {en_en_S[subj]} {verb_en_S[verb]} {odet} doesn't {en_en_S[ob]}",
                            "en_S-u-2-inversion": " ".join(f"{sdet} {en_en_S[subj]} {verb_en_S[verb]} {odet} {en_en_S[ob]}".split(" ")[::-1]),
                            "en_S-u-4-qsubordinate": f"{pSubj} {pVerb} that does the {en_en_S[subj]} {verb_en_S[verb]} the {en_en_S[ob]}?",
                            "en_S-u-3-wh": f"Did {sdet} {en_en_S[subj]} {verb_en_S[verb]} {odet} {wh} {en_en_S[ob]}?"
                        }])])
                    else:
                        wh = random.choice(['when', 'what', 'who', 'which', 'why'])
                        df = pd.concat([df, pd.DataFrame.from_dict([{
                            "ita-r-1": f"{ita_Sdet} {ita_Subj} {it_verb} {it_odet}{it_ob}",
                            "ita-r-4-null_subject": f"{it_verb} {it_odet}{it_ob}",
                            "ita-r-2-subordinate": f"{pSubj} {it_pVerb} che {ita_Sdet}{ita_Subj} {it_verb} {it_odet}{it_ob}",
                            "ita-r-3-passive":ita_passive_sentence, 
                            "ita-u-1-negation": f"{ita_Sdet} {ita_Subj} {it_verb} {it_odet} no {it_ob}",
                            "ita-u-2-inversion": " ".join(f"{ita_Sdet} {ita_Subj} {it_verb} {it_odet}{it_ob}".split(" ")[::-1]),
                            "ita-u-3-gender": f"il {ita_Subj} {it_verb} {it_odet}{it_ob}",
                            "en-r-1": f"{sdet} {subj} {verb} {odet} {ob}",
                            "en-r-2-subordinate": f"{pSubj} {pVerb} that the {subj} {verb} the {ob}",
                            "en-r-3-passive": f"{odet} {ob} is {passiveSeed[verb]} by {sdet} {subj}", 
                            "en-u-1-negation": f"{sdet} {subj} {verb} {odet} doesn't {ob}",
                            "en-u-2-inversion": " ".join(f"{sdet} {subj} {verb} {odet} {ob}".split(" ")[::-1]),
                            "en-u-4-qsubordinate": f"{pSubj} {pVerb} that does the {subj} {verb} the {ob}?",
                            "en-u-3-wh": f"Did {sdet} {subj} {verb} {odet} {wh} {ob}?",
                            "en_S-r-1": f"{sdet} {en_en_S[subj]} {verb_en_S[verb]} {odet} {en_en_S[ob]}",
                            "en_S-r-2-subordinate": f"{pSubj} {verb_en_S[pVerb]} that the {en_en_S[subj]} {verb_en_S[verb]} the {en_en_S[ob]}",
                            "en_S-r-3-passive": f"{odet} {en_en_S[ob]} is {passiveSeed_en_S[verb]} by {sdet} {en_en_S[subj]}",
                            "en_S-u-1-negation": f"{sdet} {en_en_S[subj]} {verb_en_S[verb]} {odet} doesn't {en_en_S[ob]}",
                            "en_S-u-2-inversion": " ".join(f"{sdet} {en_en_S[subj]} {verb_en_S[verb]} {odet} {en_en_S[ob]}".split(" ")[::-1]),
                            "en_S-u-4-qsubordinate": f"{pSubj} {pVerb} that does the {en_en_S[subj]} {verb_en_S[verb]} the {en_en_S[ob]}?",
                            "en_S-u-3-wh": f"Did {sdet} {en_en_S[subj]} {verb_en_S[verb]} {odet} {wh} {en_en_S[ob]}?"
                        }])])

make_df(seed, ita_Seed, it_genderDict)
make_df(seed_female, ita_Seed_female, it_genderDict)

In [489]:
pluralSeed = [{'verb': ['hit', 'kick', 'touch', 'push'],
   'subject': ['dogs', 'cats', 'men', 'teachers', 'boys'],
   'object': ['bottle', 'newspaper', 'box', 'lightbulb', 'cup']},

{'verb': ['drink'],
'subject': ['dogs', 'cats', 'men', 'teachers', 'boys'],
'object': ['juice', 'tea', 'coffee', 'water', 'tonic']},

{'verb': ['eat'],
'subject': ['dogs', 'cats', 'men', 'teachers', 'boys'],
'object': ['banana', 'pear', 'fruit', 'pizza']},

{'verb': ['read'],
'subject': ['poets', 'authors', 'writers', 'orators', 'teachers', 'boys'],
'object': ['story', 'letter', 'magazine', 'guide', 'postcard', 'novella', 'page', 'poem']}]

pluralSeed_female = [{'verb': ['hit', 'kick', 'touch', 'push'],
   'subject': ['women', 'girls', 'actresses', 'teachers', 'doctors', 'lawyers', 'architects', 'professors', 'writers', 'cooks'],
   'object': ['fish', 'mouse', 'newspaper', 'chalk', 'hat', 'toy']},

{'verb': ['drink'],
 'subject': ['women', 'girls', 'actresses', 'teachers', 'doctors', 'lawyers', 'architects', 'professors', 'writers', 'cooks'],
 'object': ['beverage', 'lemonade', 'milkshake', 'smoothie', 'espresso']},

{'verb': ['eat'],
 'subject': ['women', 'girls', 'actresses', 'teachers', 'doctors', 'lawyers', 'architects', 'professors', 'writers', 'cooks'],
 'object': ['biscuit', 'fish', 'meal', 'cucumber']},

{'verb': ['read'],
 'subject': ['women', 'girls', 'actresses', 'teachers', 'doctors', 'lawyers', 'architects', 'professors', 'writers', 'cooks'],
 'object': ['book', 'poem', 'chapter']}]

it_pluralSeed = [ { 'verb' : ['colpiscono', 'calciano', 'toccano', 'spingono'],  
             'subject': ['cani', 'gatti', 'uomini', 'insegnanti', 'ragazzi'], 
             'object': ['bottiglia', 'giornale', 'scatola', 'lampadina', 'tazza']},

{ 'verb': ['bevono'], 
 'subject': ['cani', 'gatti', 'uomini', 'insegnanti', 'ragazzi'], 
 'object': ['succo', 'tè', 'caffè', 'acqua', 'tonico'] },

{ 'verb': ['mangiano'], 
 'subject' : ['cani', 'gatti', 'uomini', 'insegnanti', 'ragazzi'],
 'object': ['banana', 'pera', 'frutta', 'pizza' ]},

{'verb': ['leggono'], 
 'subject' : ['poeti', 'autori', 'scrittori', 'oratori', 'insegnanti', 'ragazzi'], 
 'object': ['storia', 'lettera', 'rivista', 'guida', 'cartolina', 'novella', 'pagina', 'poesia']} ]

it_pluralSeed_female = [ { 'verb' : ['colpiscono', 'calciano', 'toccano', 'spingono'],  
                    'subject': ['donne', 'ragazze', 'attrici', 'maestre', 'dottoresse', 'avvocate', 'architette', 'professoresse', 'scrittrici', 'cuoche' ], 
                    'object': ['pesce', 'topo', 'giornale', 'gesso', 'cappello', 'giocattolo']},

{ 'verb': ['bevono'], 
 'subject': ['donne', 'ragazze', 'attrici', 'maestre', 'dottoresse', 'avvocate', 'architette', 'professoresse', 'scrittrici', 'cuoche' ], 
 'object': ['bevanda', 'limonata', 'frappè', 'frullato', 'espresso'] },

{ 'verb': ['mangiano'], 
 'subject' : ['donne', 'ragazze', 'attrici', 'maestre', 'dottoresse', 'avvocate', 'architette', 'professoresse', 'scrittrici', 'cuoche' ], 
 'object': ['biscotto', 'pesce', 'pasto', 'cetriolo']},

{'verb': ['leggono'], 
 'subject' : ['donne', 'ragazze', 'attrici', 'maestre', 'dottoresse', 'avvocate', 'architette', 'professoresse', 'scrittrici', 'cuoche' ], 
 'object': ['libro', 'poema', 'capitolo']} ]

ita_SubordinateSeed = [ { 'verb' : ['pensa', 'dice', 'afferma', 'crede', 'suggerisce'],
                        'subject': ['Sheela', 'Leela', 'Maria', 'Gomu', 'John', 'Tom', 'Harry'], }]

def make_plural_df(pluralSeed, it_pluralSeed, it_genderDict):
    global df
    for oidx, obj in enumerate(pluralSeed):
        for sidx, subj in enumerate(obj['subject']):
            for obidx, ob in enumerate(obj['object']):
                for vidx, verb in enumerate(obj['verb']):
                    pSubj = random.choice(subordinateSeed[0]['subject'])
                    pVerb = random.choice(subordinateSeed[0]['verb'])
                    odet = random.choice(['the', 'a'])
                    
                    ita_Subj = it_pluralSeed[oidx]['subject'][sidx]
                    ita_Sdet = it_genderDict[ita_Subj][0]
                    it_ob = it_pluralSeed[oidx]['object'][obidx]
                    it_odet = it_genderDict[it_ob][0]
    
                    it_verb = it_pluralSeed[oidx]['verb'][vidx]
                    
                    it_pVerb = ita_SubordinateSeed[0]['verb'][subordinateSeed[0]['verb'].index(pVerb)]
    
                    if ita_Sdet == 'i':
                        ita_passive_from = 'dai'
                    if ita_Sdet == 'le':
                        ita_passive_from = 'dalle'
                    elif ita_Sdet == "gli":
                        ita_passive_from = "dagli'"

                    temp_odet = it_odet
                    if it_odet == "l'":
                        temp_odet = 'il'
                    else:
                        temp_odet = it_odet

                    if not "'" in ita_passive_from:
                        ita_passive_sentence = f"{it_odet} {it_ob} {it_passiveSeed[it_verb][temp_odet]} {ita_passive_from} {ita_Subj}"
                    else:
                        ita_passive_sentence = f"{it_odet} {it_ob} {it_passiveSeed[it_verb][temp_odet]} {ita_passive_from}{ita_Subj}"
    
                    wh = random.choice(['when', 'what', 'who', 'which', 'why'])
                    df = pd.concat([df, pd.DataFrame.from_dict([{
                        "ita-r-1": f"{ita_Sdet} {ita_Subj} {it_verb} {it_odet} {it_ob}", 
                        "ita-r-4-null_subject": f"{it_verb} {it_odet} {it_ob}", 
                        "ita-r-2-subordinate": f"{pSubj} {it_pVerb} che {ita_Sdet} {ita_Subj} {it_verb} {it_odet} {it_ob}",
                        "ita-r-3-passive": ita_passive_sentence,
                        "ita-u-1-negation": f"{ita_Sdet} {ita_Subj} {it_verb} {it_odet} no {it_ob}",
                        "ita-u-2-inversion": " ".join(f"{ita_Sdet} {ita_Subj} {it_verb} {it_odet} {it_ob}".split(" ")[::-1]), 
                        "ita-u-3-gender": f"{temp_odet} {ita_Subj} {it_verb} {it_odet} {it_ob}",
                        "en-r-1": f"the {subj} {verb} {odet} {ob}",
                        "en-r-2-subordinate": f"{pSubj} {pVerb} that the {subj} {verb} the {ob}",
                        "en-r-3-passive": f"{odet} {ob} is {passiveSeed[verb]} by the {subj}",
                        "en-u-1-negation": f"the {subj} {verb} {odet} doesn't {ob}",
                        "en-u-2-inversion": " ".join(f"the {subj} {verb} {odet} {ob}".split(" ")[::-1]),
                        "en-u-4-qsubordinate": f"{pSubj} {pVerb} that does the {subj} {verb} the {ob}?",
                        "en-u-3-wh": f"Did the {subj} {verb} {odet} {wh} {ob}?",
                        "en_S-r-1": f"the {en_en_S[subj]} {verb_en_S[verb]} {odet} {en_en_S[ob]}",
                        "en_S-r-2-subordinate": f"{pSubj} {verb_en_S[pVerb]} that the {en_en_S[subj]} {verb_en_S[verb]} the {en_en_S[ob]}",
                        "en_S-r-3-passive": f"{odet} {en_en_S[ob]} is {passiveSeed_en_S[verb]} by the {en_en_S[subj]}",
                        "en_S-u-1-negation": f"the {en_en_S[subj]} {verb_en_S[verb]} {odet} doesn't {en_en_S[ob]}",
                        "en_S-u-2-inversion": " ".join(f"the {en_en_S[subj]} {verb_en_S[verb]} {odet} {en_en_S[ob]}".split(" ")[::-1]),
                        "en_S-u-4-qsubordinate": f"{pSubj} {pVerb} that does the {en_en_S[subj]} {verb_en_S[verb]} the {en_en_S[ob]}?",
                        "en_S-u-3-wh": f"Did the {en_en_S[subj]} {verb_en_S[verb]} {odet} {wh} {en_en_S[ob]}?"
                    }])])
                    if (f"{odet} {ob} is {passiveSeed[verb]} by the {subj}" == None):
                        print(f"{odet} {ob} is {passiveSeed[verb]} by the {subj}", odet, ob, passiveSeed[verb], subj)

make_plural_df(pluralSeed, it_pluralSeed, it_genderDict)
make_plural_df(pluralSeed_female, it_pluralSeed_female, it_genderDict)

df.reset_index()

Unnamed: 0,index,ita-r-1,ita-r-4-null_subject,ita-r-2-subordinate,ita-r-3-passive,ita-u-1-negation,ita-u-2-inversion,ita-u-3-gender,en-r-1,en-r-2-subordinate,...,en-u-2-inversion,en-u-4-qsubordinate,en-u-3-wh,en_S-r-1,en_S-r-2-subordinate,en_S-r-3-passive,en_S-u-1-negation,en_S-u-2-inversion,en_S-u-4-qsubordinate,en_S-u-3-wh
0,0,il cane colpisce la bottiglia,colpisce la bottiglia,Tom afferma che il cane colpisce la bottiglia,la bottiglia è colpita dal cane,il cane colpisce la no bottiglia,bottiglia la colpisce cane il,la cane colpisce la bottiglia,the dog hits the bottle,Tom states that the dog hits the bottle,...,bottle the hits dog the,Tom states that does the dog hits the bottle?,Did the dog hits the what bottle?,the pundle hufkos the woggle,Tom gnobos that the pundle hufkos the woggle,the woggle is hufkoed by the pundle,the pundle hufkos the doesn't woggle,woggle the hufkos pundle the,Tom states that does the pundle hufkos the wog...,Did the pundle hufkos the what woggle?
1,0,il cane calcia la bottiglia,calcia la bottiglia,Tom pensa che il cane calcia la bottiglia,la bottiglia è calciata dal cane,il cane calcia la no bottiglia,bottiglia la calcia cane il,la cane calcia la bottiglia,the dog kicks the bottle,Tom thinks that the dog kicks the bottle,...,bottle the kicks dog the,Tom thinks that does the dog kicks the bottle?,Did the dog kicks the why bottle?,the pundle chuskos the woggle,Tom soffles that the pundle chuskos the woggle,the woggle is chuskoed by the pundle,the pundle chuskos the doesn't woggle,woggle the chuskos pundle the,Tom thinks that does the pundle chuskos the wo...,Did the pundle chuskos the why woggle?
2,0,il cane tocca una bottiglia,tocca una bottiglia,Sheela pensa che il cane tocca una bottiglia,una bottiglia è toccata dal cane,il cane tocca una no bottiglia,bottiglia una tocca cane il,una cane tocca una bottiglia,the dog touches a bottle,Sheela thinks that the dog touches the bottle,...,bottle a touches dog the,Sheela thinks that does the dog touches the bo...,Did the dog touches a when bottle?,the pundle stoffles a woggle,Sheela soffles that the pundle stoffles the wo...,a woggle is stoffled by the pundle,the pundle stoffles a doesn't woggle,woggle a stoffles pundle the,Sheela thinks that does the pundle stoffles th...,Did the pundle stoffles a when woggle?
3,0,il cane spinge la bottiglia,spinge la bottiglia,John suggerisce che il cane spinge la bottiglia,la bottiglia è spinta dal cane,il cane spinge la no bottiglia,bottiglia la spinge cane il,la cane spinge la bottiglia,the dog pushes the bottle,John suggests that the dog pushes the bottle,...,bottle the pushes dog the,John suggests that does the dog pushes the bot...,Did the dog pushes the when bottle?,the pundle chunges the woggle,John trongos that the pundle chunges the woggle,the woggle is chunged by the pundle,the pundle chunges the doesn't woggle,woggle the chunges pundle the,John suggests that does the pundle chunges the...,Did the pundle chunges the when woggle?
4,0,il cane colpisce un giornale,colpisce un giornale,Leela crede che il cane colpisce un giornale,un giornale è colpito dal cane,il cane colpisce un no giornale,giornale un colpisce cane il,un cane colpisce un giornale,the dog hits a newspaper,Leela believes that the dog hits the newspaper,...,newspaper a hits dog the,Leela believes that does the dog hits the news...,Did the dog hits a why newspaper?,the pundle hufkos a gongle,Leela herdles that the pundle hufkos the gongle,a gongle is hufkoed by the pundle,the pundle hufkos a doesn't gongle,gongle a hufkos pundle the,Leela believes that does the pundle hufkos the...,Did the pundle hufkos a why gongle?
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1101,0,le scrittrici leggono il poema,leggono il poema,Tom pensa che le scrittrici leggono il poema,il poema è letto dalle scrittrici,le scrittrici leggono il no poema,poema il leggono scrittrici le,il scrittrici leggono il poema,the writers read the poem,Tom thinks that the writers read the poem,...,poem the read writers the,Tom thinks that does the writers read the poem?,Did the writers read the when poem?,the arcuplos gurdle the skerpo,Tom soffles that the arcuplos gurdle the skerpo,the skerpo is gurdled by the arcuplos,the arcuplos gurdle the doesn't skerpo,skerpo the gurdle arcuplos the,Tom thinks that does the arcuplos gurdle the s...,Did the arcuplos gurdle the when skerpo?
1102,0,le scrittrici leggono il capitolo,leggono il capitolo,Maria crede che le scrittrici leggono il capitolo,il capitolo è letto dalle scrittrici,le scrittrici leggono il no capitolo,capitolo il leggono scrittrici le,il scrittrici leggono il capitolo,the writers read a chapter,Maria believes that the writers read the chapter,...,chapter a read writers the,Maria believes that does the writers read the ...,Did the writers read a which chapter?,the arcuplos gurdle a wrindle,Maria herdles that the arcuplos gurdle the wri...,a wrindle is gurdled by the arcuplos,the arcuplos gurdle a doesn't wrindle,wrindle a gurdle arcuplos the,Maria believes that does the arcuplos gurdle t...,Did the arcuplos gurdle a which wrindle?
1103,0,le cuoche leggono il libro,leggono il libro,Gomu suggerisce che le cuoche leggono il libro,il libro è letto dalle cuoche,le cuoche leggono il no libro,libro il leggono cuoche le,il cuoche leggono il libro,the cooks read the book,Gomu suggests that the cooks read the book,...,book the read cooks the,Gomu suggests that does the cooks read the book?,Did the cooks read the why book?,the fuzzles gurdle the dringle,Gomu trongos that the fuzzles gurdle the dringle,the dringle is gurdled by the fuzzles,the fuzzles gurdle the doesn't dringle,dringle the gurdle fuzzles the,Gomu suggests that does the fuzzles gurdle the...,Did the fuzzles gurdle the why dringle?
1104,0,le cuoche leggono il poema,leggono il poema,Sheela pensa che le cuoche leggono il poema,il poema è letto dalle cuoche,le cuoche leggono il no poema,poema il leggono cuoche le,il cuoche leggono il poema,the cooks read the poem,Sheela thinks that the cooks read the poem,...,poem the read cooks the,Sheela thinks that does the cooks read the poem?,Did the cooks read the why poem?,the fuzzles gurdle the skerpo,Sheela soffles that the fuzzles gurdle the skerpo,the skerpo is gurdled by the fuzzles,the fuzzles gurdle the doesn't skerpo,skerpo the gurdle fuzzles the,Sheela thinks that does the fuzzles gurdle the...,Did the fuzzles gurdle the why skerpo?


## Italian stylized

In [490]:
def my_lambda(row):
    new_words = []
    words = row['en_S-r-1'].split(" ")
    if (words[0] == 'the'):
        new_words.append(gender_ita_S[words[1]][0])
    else:
        new_words.append(gender_ita_S[words[1]][1])
    new_words += words[1:3]
    if (words[3] == 'the'):
        new_words.append(gender_ita_S[words[4]][0])
    else:
        new_words.append(gender_ita_S[words[4]][1])
    new_words.append(words[4])
    # print(new_words)
    return " ".join(new_words)
df['ita_S-r-1'] = df.progress_apply(lambda row: my_lambda(row), axis=1)

  0%|          | 0/1106 [00:00<?, ?it/s]

## Italian stylized Real grammar 1 (Null Subject parameter)

In [491]:
df['ita_S-r-4-null_subject'] = df.progress_apply(lambda row: " ".join(row['ita_S-r-1'].split(" ")[2:]), axis=1)

  0%|          | 0/1106 [00:00<?, ?it/s]

## Italian stylized Real Grammar 2 (Passive construction)

In [492]:
def my_lambda(row):
    new_words = []
    words = row['en_S-r-3-passive'].split(" ")
    if (words[0] == 'the'):
        new_words.append(gender_ita_S[words[1]][0])
    else:
        new_words.append(gender_ita_S[words[1]][1])
    new_words += words[1:5]
    if (words[5] == 'the'):
        new_words.append(gender_ita_S[words[-1]][0])
    else:
        new_words.append(gender_ita_S[words[-1]][1])
    new_words.append(words[-1])
    # print(new_words)
    return " ".join(new_words)
    # [gender_ita_S[row['en_S-r-3-passive'].split(" ")[1]]] + row['en_S-r-3-passive'].split(" ")[1:-2] + [gender_ita_S[row['en_S-r-3-passive'].split(" ")[-1]]] + [row['en_S-r-3-passive'].split(" ")[-1]]
df['ita_S-r-3-passive'] =  df.progress_apply(lambda row: my_lambda(row), axis=1)

  0%|          | 0/1106 [00:00<?, ?it/s]

## Italian stylized Real Grammar 3 (Subordinate construction)

In [493]:
def my_lambda(row):
    new_words = []
    words = row['en_S-r-2-subordinate'].split(" ")
    new_words += words[:3]
    if (words[4] == 'the'):
        new_words.append(gender_ita_S[words[4]][1])
    else:
        try:
            new_words.append(gender_ita_S[words[4]][0])
        except:
            print(words[4], gender_ita_S[words[4]])
            new_words.append(gender_ita_S[words[4]][0])
    new_words += words[4:6]
    if (words[6] == 'the'):
        new_words.append(gender_ita_S[words[-1]][0])
    else:
        new_words.append(gender_ita_S[words[-1]][1])
    new_words.append(words[-1])
    # print(new_words)
    return " ".join(new_words)
df['ita_S-r-2-subordinate'] =  df.progress_apply(lambda row: my_lambda(row), axis=1)

  0%|          | 0/1106 [00:00<?, ?it/s]

## Italian stylized Unreal Grammar 1: Add a negation after the 3rd word in the nullified subject sentence

In [494]:
df['ita_S-u-1-negation'] = df.progress_apply(lambda row: " ".join(row['ita_S-r-1'].split(" ")[:4] + [ "no" ] + row['en_S-r-1'].split(" ")[4:]), axis=1)

  0%|          | 0/1106 [00:00<?, ?it/s]

## Italian stylized Unreal Grammar 2: Invert italian sentence

In [495]:
df['ita_S-u-2-inversion'] = df.progress_apply(lambda row: " ".join(row['ita_S-r-1'].split(" ")[::-1]), axis=1)

  0%|          | 0/1106 [00:00<?, ?it/s]

## Italian stylized Unreal Grammar 3: Same gender for subject and object

In [496]:
df['ita_S-u-3-gender'] = df.progress_apply(lambda row: " ".join(row['ita_S-r-1'].split(" ")[:3] + [row['ita_S-r-1'].split(" ")[0]] + [row['ita_S-r-1'].split(" ")[-1]]), axis=1)

  0%|          | 0/1106 [00:00<?, ?it/s]

# Japanese

## Japanese  real grammar 1 (Wa after subj, o after obj, verb)

In [497]:
df['jap_S-r-1'] = df.progress_apply(lambda row: " ".join(row["en_S-r-1"].split(" ")[:2]) + " wa " + " ".join(row["en_S-r-1"].split(" ")[-2:]) + " o " + row["en_S-r-1"].split(" ")[2], axis=1)

df['jp-r-1-sov'] = df.progress_apply(lambda row: " ".join(row["en-r-1"].split(" ")[:2]) + " wa " + " ".join(row["en-r-1"].split(" ")[-2:]) + " o " + row["en-r-1"].split(" ")[2], axis=1)

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

In [498]:
df['jap-r-1'] = df.progress_apply(lambda row: f'{nounDict_jp[row["jp-r-1-sov"].split(" ")[1]]} {particleDict_jp["wa"]} {nounDict_jp[row["jp-r-1-sov"].split(" ")[4]]} {particleDict_jp["o"]} {verbDict_jp[row["jp-r-1-sov"].split(" ")[-1]]}' , axis=1)

  0%|          | 0/1106 [00:00<?, ?it/s]

## JP  real grammar 2 (Passive construction)

In [499]:
df['jap_S-r-3-passive'] = df.progress_apply(lambda row: " ".join(row["en_S-r-1"].split(" ")[3:5]) + " wa " + " ".join(row["en_S-r-1"].split(" ")[:2]) + " ni " + infinitive_en_S[row["en-r-1"].split(" ")[2]] + " reru", axis=1)
df['jp-r-2-passive'] = df.progress_apply(lambda row: " ".join(row["en-r-1"].split(" ")[3:5]) + " wa " + " ".join(row["en-r-1"].split(" ")[:2]) + " ni " + infinitive[row["en-r-1"].split(" ")[2]] + " reru", axis=1)

df['jap-r-3-passive'] = df.progress_apply(lambda row: f'{nounDict_jp[row["jp-r-2-passive"].split(" ")[1]]} {particleDict_jp["wa"]} {nounDict_jp[row["jp-r-2-passive"].split(" ")[4]]} {particleDict_jp["ni"]} {passiveDict_jp[verbDict_jp[row["jp-r-2-passive"].split(" ")[-2]]]}' , axis=1)

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

## JP  real grammar 3 (Subordinate construction)

In [500]:
df['jap_S-r-2-subordinate'] = df.progress_apply(lambda row: " ".join([row["en_S-r-2-subordinate"].split(" ")[0]] + ["wa"] + row["en_S-r-2-subordinate"].split(" ")[3:5] + ["ga"] + row["en_S-r-2-subordinate"].split(" ")[-2:]  + ["o"] + [row["en_S-r-2-subordinate"].split(" ")[5]] + ["to"] + [row["en_S-r-2-subordinate"].split(" ")[1][:-1]]), axis=1)
df['jp-r-3-subordinate'] = df.progress_apply(lambda row: " ".join([row["en-r-2-subordinate"].split(" ")[0]] + ["wa"] + row["en-r-2-subordinate"].split(" ")[3:5] + ["ga"] + row["en-r-2-subordinate"].split(" ")[-2:]  + ["o"] + [row["en-r-2-subordinate"].split(" ")[5]] + ["to"] + [row["en-r-2-subordinate"].split(" ")[1][:-1]]), axis=1)

df['jap-r-2-subordinate'] = df.progress_apply(lambda row: f'{nameDict_jp[row["jp-r-3-subordinate"].split(" ")[0]]} {particleDict_jp["wa"]} {row["jap-r-1"].replace(particleDict_jp["wa"],particleDict_jp["ga"])} {particleDict_jp["to"]} {subordinateDict_jp[" ".join(row["jp-r-3-subordinate"].split(" ")[-2:])]}' , axis=1)

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

## JP - Unreal grammar 1:Add a negation at the end of the object in the real-jp-1 sentence

In [501]:
df['jap_S-u-1-negation'] = df.progress_apply(lambda row: " ".join(row['jap_S-r-1'].split(" ")[:5]) + " nai " + " ".join(row['jap_S-r-1'].split(" ")[5:]), axis=1)
df['jp-u-1-negation'] = df.progress_apply(lambda row: " ".join(row['jp-r-1-sov'].split(" ")[:5]) + " no " + " ".join(row['jp-r-1-sov'].split(" ")[5:]), axis=1)

df['jap-u-1-negation'] = df.progress_apply(lambda row: f'{" ".join(row["jap-r-1"].split(" ")[:3])} {particleDict_jp["nai"]} {" ".join(row["jap-r-1"].split(" ")[3:])}', axis=1)

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

## JP - Unreal grammar 2: Invert jp-real-1 sentence

In [502]:
df['jap_S-u-2-inversion'] = df.progress_apply(lambda row: " ".join(row['jap_S-r-1'].split(" ")[::-1]), axis=1)
df['jp-u-2-invert'] = df.progress_apply(lambda row: " ".join(row['jp-r-1-sov'].split(" ")[::-1]), axis=1)

df['jap-u-2-inversion'] = df.progress_apply(lambda row: " ".join(row['jap-r-1'].split(" ")[::-1]), axis=1)

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

## JP - Unreal grammar add a after o + past tense

In [504]:
df['jap_S-u-3-past-tense'] = df.progress_apply(lambda row: " ".join(row['jap_S-r-1'].split(" ")[:-2]) + ' o-ta ' + " ".join([infinitive_en_S[row['jp-r-1-sov'].split(" ")[-1]]]), axis=1)
df['jp-u-3-past-tense'] = df.progress_apply(lambda row: " ".join(row['jp-r-1-sov'].split(" ")[:-2]) + ' o-ta ' + " ".join([infinitive[row['jp-r-1-sov'].split(" ")[-1]]]), axis=1)
df['jap-u-3-past-tense'] = df.progress_apply(lambda row: f'{" ".join(row['jap-r-1'].split(" ")[:-2])} {particleDict_jp['o']}{particleDict_jp['ta']} {row['jap-r-1'].split(" ")[-1]}', axis=1)

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

In [505]:
df[['en_S-r-1', 'jap_S-r-1', 'jap_S-r-2-subordinate', 'jap_S-r-3-passive', 'jap_S-u-1-negation', 'jap_S-u-2-inversion', 'jap_S-u-3-past-tense']]

Unnamed: 0,en_S-r-1,jap_S-r-1,jap_S-r-2-subordinate,jap_S-r-3-passive,jap_S-u-1-negation,jap_S-u-2-inversion,jap_S-u-3-past-tense
0,the pundle hufkos the woggle,the pundle wa the woggle o hufkos,Tom wa the pundle ga the woggle o hufkos to gnobo,the woggle wa the pundle ni to hufko reru,the pundle wa the woggle nai o hufkos,hufkos o woggle the wa pundle the,the pundle wa the woggle o-ta to hufko
0,the pundle chuskos the woggle,the pundle wa the woggle o chuskos,Tom wa the pundle ga the woggle o chuskos to s...,the woggle wa the pundle ni to chusko reru,the pundle wa the woggle nai o chuskos,chuskos o woggle the wa pundle the,the pundle wa the woggle o-ta to chusko
0,the pundle stoffles a woggle,the pundle wa a woggle o stoffles,Sheela wa the pundle ga the woggle o stoffles ...,a woggle wa the pundle ni to stoffle reru,the pundle wa a woggle nai o stoffles,stoffles o woggle a wa pundle the,the pundle wa a woggle o-ta to stoffle
0,the pundle chunges the woggle,the pundle wa the woggle o chunges,John wa the pundle ga the woggle o chunges to ...,the woggle wa the pundle ni to chunge reru,the pundle wa the woggle nai o chunges,chunges o woggle the wa pundle the,the pundle wa the woggle o-ta to chunge
0,the pundle hufkos a gongle,the pundle wa a gongle o hufkos,Leela wa the pundle ga the gongle o hufkos to ...,a gongle wa the pundle ni to hufko reru,the pundle wa a gongle nai o hufkos,hufkos o gongle a wa pundle the,the pundle wa a gongle o-ta to hufko
...,...,...,...,...,...,...,...
0,the arcuplos gurdle the skerpo,the arcuplos wa the skerpo o gurdle,Tom wa the arcuplos ga the skerpo o gurdle to ...,the skerpo wa the arcuplos ni to gurdle reru,the arcuplos wa the skerpo nai o gurdle,gurdle o skerpo the wa arcuplos the,the arcuplos wa the skerpo o-ta to gurdle
0,the arcuplos gurdle a wrindle,the arcuplos wa a wrindle o gurdle,Maria wa the arcuplos ga the wrindle o gurdle ...,a wrindle wa the arcuplos ni to gurdle reru,the arcuplos wa a wrindle nai o gurdle,gurdle o wrindle a wa arcuplos the,the arcuplos wa a wrindle o-ta to gurdle
0,the fuzzles gurdle the dringle,the fuzzles wa the dringle o gurdle,Gomu wa the fuzzles ga the dringle o gurdle to...,the dringle wa the fuzzles ni to gurdle reru,the fuzzles wa the dringle nai o gurdle,gurdle o dringle the wa fuzzles the,the fuzzles wa the dringle o-ta to gurdle
0,the fuzzles gurdle the skerpo,the fuzzles wa the skerpo o gurdle,Sheela wa the fuzzles ga the skerpo o gurdle t...,the skerpo wa the fuzzles ni to gurdle reru,the fuzzles wa the skerpo nai o gurdle,gurdle o skerpo the wa fuzzles the,the fuzzles wa the skerpo o-ta to gurdle


# Non-grammatical sentences

In [None]:
# import re
# def swap_words(sentence, col):
#     # print(col, sentence)
    

#     swapProhibited = {
#         'en': [ [0,3], [1,4] ],
#         'en-r-1-subordinate': [ [0,4], [0,-1], [4,-1], [3,-2], [1,5] ],
#         'en-r-2-passive': [ [0,-2], [1,-1] ],
#         'en-u-1-negation': [ [0,3], [1,5] ],
#         'en-u-2-inversion': [ [0,-2], [1,-1] ],
#         'en-u-3-qsubordinate': [ [4,7], [5,8], [1,6], [0,4], [0, 7] ],
#         'en-u-3-wh': [ [1,4], [2,-1] ],
#         'it': [ [0,3], [1,4] ],
#         'it-r-1-null_subject': [ ],
#         'it-r-2-passive': [ [0,5], [1,6] ],
#         'it-r-3-subordinate': [ [0,5], [1,6] ],
#         'it-u-1-negation': [ [0,3], [1,5] ],
#         'it-u-2-invert': [ [0,3], [1,4] ],
#         'it-u-3-gender': [ [0,3], [1,4] ],
#         'ita-r-1': [ [1,-1], [0,-2] ],
#         'ita-r-1-null_subject': [ ],
#         'ita-r-2-subordinate': [ [0,4], [0,-1], [4,-1], [3,-2], [1,-3] ],
#         'ita-r-3-passive': [ [1,-1] ],
#         'ita-u-1-negation': [ [0,3], [1,-1] ],
#         'ita-u-2-invert': [ [0,-2], [1,-1] ],
#         'ita-u-3-gender': [ [0,-2], [1,-1] ],
#         'jp-r-1-sov': [ [0,3], [1,4] ],
#         'jp-r-2-passive': [ [0,3], [1,4] ],
#         'jp-r-3-subordinate': [ [0,3], [0,6], [3,6] ],
#         'jp-u-1-negation': [ [0,4], [1,5] ],
#         'jp-u-2-invert': [ [3,6], [2,5] ],
#         'jp-u-3-past-tense': [ [0,3], [1,4] ],
#         'jap-r-1': [ [0,2] ],
#         'jap-r-3-passive': [ [0,2]],
#         'jap-r-2-subordinate': [ [0,2], [0,4], [2,4], [6,-1] ],
#         'jap-u-1-negation': [ [0,2] ],
#         'jap-u-2-inversion': [ [2,-1] ],
#         'jap-u-3-past-tense': [ [0,2] ]
#     }
#     # if (col == 'jap-r-1'):
#     #     print(sentence, len(sentence), len(sentence.split(" ")))
#     if col == 'en-u-1-negation':
#         sentence = re.split(" ", sentence)
#     else:
#         sentence = re.split("'| ", sentence)
        
#     # if (col == 'jap-r-1'):
#     #     print(sentence, len(sentence))
#     numWords = len(sentence)
#     # toSwap = random.sample(range(0, numWords), 2)
#     toSwap = [-1,-2]
#     toSwapWords = set([sentence[toSwap[0]],sentence[toSwap[1]]])
#     # prohibitedWords = [set([sentence[pos[0]],sentence[pos[1]]]) for pos in swapProhibited[col]]
#     # while toSwapWords in prohibitedWords:
#     #     toSwap = random.sample(range(0, numWords), 2)
#     #     toSwapWords = set([sentence[toSwap[0]],sentence[toSwap[1]]])
#     swap1 = sentence[toSwap[0]]
#     swap2 = sentence[toSwap[1]]
#     sentence[toSwap[0]] = swap2
#     sentence[toSwap[1]] = swap1
#     sentence = " ".join(sentence)
#     if '?' in sentence and sentence.index('?') != len(sentence) - 1:
#         sentence = sentence.replace('?', '')
#         sentence = sentence +'?'
#     return sentence

# for col in list(df.columns):
#     print(' Now processing.... ', col)
#     df[f'ng-{col}'] = df.progress_apply(lambda row: swap_words(row[col], col), axis=1)

# df['ng-it-u-3-gender'] = df['it']
# df['ng-ita-u-3-gender'] = df['ita-r-1']
# df['ng-it-u-1-negation'] = df['it']
# df['ng-ita-u-1-negation'] = df['ita-r-1']
# df['ng-jap-u-1-negation'] = df['jap-r-1']
# df['ng-jap-u-3-past-tense'] = df['jap-r-1']
# df['ng-jp-u-1-negation'] = df['jp-r-1-sov']
# df['ng-jp-u-3-past-tense'] = df['jp-r-1-sov']
# df['ng-en-u-1-negation'] = df['en']
# df['ng-en-u-4-wh'] = df['en']


# # df['ng-it-u-1-negation'] = df.apply(lambda row: row['it'].replace(" no ", " ") + " no", axis=1)
# # df['ng-ita-u-1-negation'] = df.apply(lambda row: row['ita-r-1'].replace(" no ", " ") + " no", axis=1)
# # df['ng-jap-u-1-negation'] = df.apply(lambda row: row['jap-u-1-negation'].replace(f" {particleDict_jp["nai"]} ", " ") + f" {particleDict_jp["nai"]}", axis=1)
# # df['ng-jap-u-3-past-tense'] = df.progress_apply(lambda row: row['jap-u-3-past-tense'].replace(f" {particleDict_jp['o']}{particleDict_jp['ta']} ", " ") + f" {particleDict_jp['o']}{particleDict_jp['ta']}", axis=1)
# # df['ng-jp-u-1-negation'] = df.apply(lambda row: row['jp-u-1-negation'].replace(f" nai ", " ") + f" nai", axis=1)
# # df['ng-jp-u-3-past-tense'] = df.progress_apply(lambda row: row['jp-u-3-past-tense'].replace(f" o-ta ", " ") + f" o-ta", axis=1)
# # df['ng-en-u-1-negation'] = df.apply(lambda row: row['en-u-1-negation'].replace(" doesn't ", " ") + " doesn't", axis=1)
# # df['ng-en-u-4-wh'] = df.apply(lambda row: row['en-u-3-wh'].replace(f" {row['en-u-3-wh'].split(' ')[5]} ", " ")[:-1] + f" {row['en-u-3-wh'].split(' ')[5]}?", axis=1) 


## Non grammatical sentences random shuffles

In [506]:
import re
def swap_words(sentence, col):
    # print(col, sentence)

    # swapProhibited = {
    #     'en': [ [0,3], [1,4] ],
    #     'en-r-1-subordinate': [ [0,4], [0,-1], [4,-1], [3,-2], [1,5] ],
    #     'en-r-2-passive': [ [0,-2], [1,-1] ],
    #     'en-u-1-negation': [ [0,3], [1,5] ],
    #     'en-u-2-inversion': [ [0,-2], [1,-1] ],
    #     'en-u-3-qsubordinate': [ [4,7], [5,8], [1,6], [0,4], [0, 7] ],
    #     'en-u-3-wh': [ [1,4], [2,-1] ],
    #     'it': [ [0,3], [1,4] ],
    #     'it-r-1-null_subject': [ ],
    #     'it-r-2-passive': [ [0,5], [1,6] ],
    #     'it-r-3-subordinate': [ [0,5], [1,6] ],
    #     'it-u-1-negation': [ [0,3], [1,5] ],
    #     'it-u-2-invert': [ [0,3], [1,4] ],
    #     'it-u-3-gender': [ [0,3], [1,4] ],
    #     'ita-r-1': [ [1,-1], [0,-2] ],
    #     'ita-r-1-null_subject': [ ],
    #     'ita-r-2-subordinate': [ [0,4], [0,-1], [4,-1], [3,-2], [1,-3] ],
    #     'ita-r-3-passive': [ [1,-1] ],
    #     'ita-u-1-negation': [ [0,3], [1,-1] ],
    #     'ita-u-2-invert': [ [0,-2], [1,-1] ],
    #     'ita-u-3-gender': [ [0,-2], [1,-1] ],
    #     'jp-r-1-sov': [ [0,3], [1,4] ],
    #     'jp-r-2-passive': [ [0,3], [1,4] ],
    #     'jp-r-3-subordinate': [ [0,3], [0,6], [3,6] ],
    #     'jp-u-1-negation': [ [0,4], [1,5] ],
    #     'jp-u-2-invert': [ [3,6], [2,5] ],
    #     'jp-u-3-past-tense': [ [0,3], [1,4] ],
    #     'jap-r-1': [ [0,2] ],
    #     'jap-r-3-passive': [ [0,2]],
    #     'jap-r-2-subordinate': [ [0,2], [0,4], [2,4], [6,-1] ],
    #     'jap-u-1-negation': [ [0,2] ],
    #     'jap-u-2-inversion': [ [2,-1] ],
    #     'jap-u-3-past-tense': [ [0,2] ]
    # }
    # if (col == 'jap-r-1'):
    #     print(sentence, len(sentence), len(sentence.split(" ")))
    if col == 'en-u-1-negation':
        sentence = re.split(" ", sentence)
    else:
        sentence = re.split("'| ", sentence)
        
    # if (col == 'jap-r-1'):
    #     print(sentence, len(sentence))
    numWords = len(sentence)
    # toSwap = random.sample(range(0, numWords), 2)
    toSwap = [-1,-2]
    toSwapWords = set([sentence[toSwap[0]],sentence[toSwap[1]]])
    # prohibitedWords = [set([sentence[pos[0]],sentence[pos[1]]]) for pos in swapProhibited[col]]
    # while toSwapWords in prohibitedWords:
    #     toSwap = random.sample(range(0, numWords), 2)
    #     toSwapWords = set([sentence[toSwap[0]],sentence[toSwap[1]]])
    swap1 = sentence[toSwap[0]]
    swap2 = sentence[toSwap[1]]
    sentence[toSwap[0]] = swap2
    sentence[toSwap[1]] = swap1
    sentence = " ".join(sentence)
    # if (not '-u-' in sentence):
    #     sentence = random.sample(sentence, numWords)
    #     sentence = " ".join(sentence)
    if '?' in sentence and sentence.index('?') != len(sentence) - 1:
        sentence = sentence.replace('?', '')
        sentence = sentence +'?'
    return sentence

for col in list(df.columns):
    print(' Now processing.... ', col)
    df[f'ng-{col}'] = df.progress_apply(lambda row: swap_words(row[col], col), axis=1)

# df['it-u-1-negation'] = df.apply(lambda row: " ".join(random.sample(row['it'].split(" "), len(row['it'].split(" "))).insert('no', 4)), axis=1)
# df['ita-u-1-negation'] = df.apply(lambda row: " ".join(random.sample(row['ita-r-1'].split(" "), len(row['ita-r-1'].split(" "))).insert('no', 4)), axis=1)
# df['jap-u-1-negation'] = df.apply(lambda row: " ".join(random.sample(row['jap-r-1'].split(" "), len(row['jap-r-1'].split(" "))).insert(particleDict_jp['nai'], 4)), axis=1)
# df['jap-u-3-past-tense'] = df.apply(lambda row: " ".join(random.sample(row['jap-r-1'].split(" "), len(row['jap-r-1'].split(" "))).insert(f"{particleDict_jp['o']}{particleDict_jp['ta']}", 3)), axis=1)
# df['jp-u-1-negation'] = df.apply(lambda row: " ".join(random.sample(row['jp-r-1-sov'].split(" "), len(row['jp-r-1-sov'].split(" "))).insert('nai', 4)), axis=1)
# df['jp-u-3-past-tense'] = df.apply(lambda row: " ".join(random.sample(row['jp-r-1-sov'].split(" "), len(row['jp-r-1-sov'].split(" "))).insert('o-ta', 4)), axis=1)
# df['en-u-1-negation'] = df.apply(lambda row: " ".join(random.sample(row['en'].split(" "), len(row['en'].split(" "))).insert("doesn't", 4)), axis=1)
# df['en-u-3-wh'] = df.apply(lambda row: " ".join(random.sample(row['en'].split(" "), len(row['en'].split(" "))).insert(random.choice(['when', 'what', 'who', 'which', 'why']), 5)), axis=1)


df['ng-ita_S-u-3-gender'] = df['ita_S-r-1']
df['ng-ita-u-3-gender'] = df['ita-r-1']
df['ng-ita_S-u-1-negation'] = df.apply(lambda row: row['ita_S-r-1'].replace(" no ", " ") + " no", axis=1)
df['ng-ita-u-1-negation'] = df.apply(lambda row: row['ita-r-1'].replace(" no ", " ") + " no", axis=1)
df['ng-jap-u-1-negation'] = df.apply(lambda row: row['jap-u-1-negation'].replace(f" {particleDict_jp["nai"]} ", " ") + f" {particleDict_jp["nai"]}", axis=1)
df['ng-jap-u-3-past-tense'] = df.progress_apply(lambda row: row['jap-u-3-past-tense'].replace(f" {particleDict_jp['o']}{particleDict_jp['ta']} ", " ") + f" {particleDict_jp['o']}{particleDict_jp['ta']}", axis=1)
df['ng-jp-u-1-negation'] = df.apply(lambda row: row['jp-u-1-negation'].replace(f" nai ", " ") + f" nai", axis=1)
df['ng-jp-u-3-past-tense'] = df.progress_apply(lambda row: row['jp-u-3-past-tense'].replace(f" o-ta ", " ") + f" o-ta", axis=1)
df['ng-jap_S-u-1-negation'] = df.apply(lambda row: row['jap_S-u-1-negation'].replace(f" nai ", " ") + f" nai", axis=1)
df['ng-jap_S-u-3-past-tense'] = df.progress_apply(lambda row: row['jap_S-u-3-past-tense'].replace(f" o-ta ", " ") + f" o-ta", axis=1)
df['ng-en-u-1-negation'] = df.apply(lambda row: row['en-u-1-negation'].replace(" doesn't ", " ") + " doesn't", axis=1)
df['ng-en-u-3-wh'] = df.apply(lambda row: row['en-u-3-wh'].replace(f" {row['en-u-3-wh'].split(' ')[5]} ", " ")[:-1] + f" {row['en-u-3-wh'].split(' ')[5]}?", axis=1) 
df['ng-en_S-u-1-negation'] = df.apply(lambda row: row['en_S-u-1-negation'].replace(" doesn't ", " ") + " doesn't", axis=1)
df['ng-en_S-u-3-wh'] = df.apply(lambda row: row['en_S-u-3-wh'].replace(f" {row['en_S-u-3-wh'].split(' ')[5]} ", " ")[:-1] + f" {row['en_S-u-3-wh'].split(' ')[5]}?", axis=1) 


 Now processing....  ita-r-1


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  ita-r-4-null_subject


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  ita-r-2-subordinate


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  ita-r-3-passive


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  ita-u-1-negation


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  ita-u-2-inversion


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  ita-u-3-gender


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  en-r-1


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  en-r-2-subordinate


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  en-r-3-passive


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  en-u-1-negation


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  en-u-2-inversion


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  en-u-4-qsubordinate


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  en-u-3-wh


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  en_S-r-1


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  en_S-r-2-subordinate


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  en_S-r-3-passive


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  en_S-u-1-negation


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  en_S-u-2-inversion


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  en_S-u-4-qsubordinate


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  en_S-u-3-wh


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  ita_S-r-1


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  ita_S-r-4-null_subject


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  ita_S-r-3-passive


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  ita_S-r-2-subordinate


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  ita_S-u-1-negation


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  ita_S-u-2-inversion


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  ita_S-u-3-gender


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  jap_S-r-1


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  jp-r-1-sov


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  jap-r-1


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  jap_S-r-3-passive


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  jp-r-2-passive


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  jap-r-3-passive


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  jap_S-r-2-subordinate


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  jp-r-3-subordinate


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  jap-r-2-subordinate


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  jap_S-u-1-negation


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  jp-u-1-negation


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  jap-u-1-negation


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  jap_S-u-2-inversion


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  jp-u-2-invert


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  jap-u-2-inversion


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  jap_S-u-3-past-tense


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  jp-u-3-past-tense


  0%|          | 0/1106 [00:00<?, ?it/s]

 Now processing....  jap-u-3-past-tense


  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

In [507]:
# Remove spaces from kanji
japCols = [col for col in df.columns if 'jap-' in col]
for jap in japCols:
    df[jap] = df.progress_apply(lambda row: "".join(row[jap].split(" ")), axis=1)

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

  0%|          | 0/1106 [00:00<?, ?it/s]

In [508]:
df[['ita-r-1', 'ita-r-2-subordinate', 'ita-r-3-passive', 'ita-u-1-negation', 'ita-u-2-inversion', 'ita-u-3-gender', 'en-r-1', 'en-r-2-subordinate', 'en-r-3-passive', 'en-u-1-negation', 'en-u-2-inversion', 'en-u-3-wh', 'en_S-r-1', 'en_S-r-2-subordinate', 'en_S-r-3-passive', 'en_S-u-1-negation', 'en_S-u-2-inversion', 'en_S-u-3-wh', 'ita_S-r-1', 'ita_S-r-3-passive', 'ita_S-r-2-subordinate', 'ita_S-u-1-negation', 'ita_S-u-2-inversion', 'ita_S-u-3-gender', 'jap_S-r-1', 'jap-r-1', 'jap_S-r-3-passive', 'jap-r-3-passive', 'jap_S-r-2-subordinate', 'jap-r-2-subordinate', 'jap_S-u-1-negation', 'jap-u-1-negation', 'jap_S-u-2-inversion', 'jap-u-2-inversion', 'jap_S-u-3-past-tense', 'jap-u-3-past-tense', 'ng-ita-r-1', 'ng-ita-r-2-subordinate', 'ng-ita-r-3-passive', 'ng-ita-u-1-negation', 'ng-ita-u-2-inversion', 'ng-ita-u-3-gender', 'ng-en-r-1', 'ng-en-r-2-subordinate', 'ng-en-r-3-passive', 'ng-en-u-1-negation', 'ng-en-u-2-inversion', 'ng-en-u-3-wh', 'ng-en_S-r-1', 'ng-en_S-r-2-subordinate', 'ng-en_S-r-3-passive', 'ng-en_S-u-1-negation', 'ng-en_S-u-2-inversion', 'ng-en_S-u-3-wh', 'ng-ita_S-r-1', 'ng-ita_S-r-3-passive', 'ng-ita_S-r-2-subordinate', 'ng-ita_S-u-1-negation', 'ng-ita_S-u-2-inversion', 'ng-ita_S-u-3-gender', 'ng-jap_S-r-1', 'ng-jap-r-1', 'ng-jap_S-r-3-passive', 'ng-jap-r-3-passive', 'ng-jap_S-r-2-subordinate', 'ng-jap-r-2-subordinate', 'ng-jap_S-u-1-negation', 'ng-jap-u-1-negation', 'ng-jap_S-u-2-inversion', 'ng-jap-u-2-inversion', 'ng-jap_S-u-3-past-tense', 'ng-jap-u-3-past-tense']].to_csv('ngs-08-01-2024-synthetic-grammars-nonce.csv',  index=False)