# Lithuanian 

https://docs.google.com/spreadsheets/d/1chGs5Aj4rS38_R6Fl7B5a5sfH8MWvvuYCI1peioGW64/edit?gid=1132830952#gid=1132830952


In [231]:
import random
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import pandas as pd

# Set up the credentials
scope = [
    "https://spreadsheets.google.com/feeds",
    "https://www.googleapis.com/auth/drive",
]
creds = ServiceAccountCredentials.from_json_keyfile_name(
    "lithuanian-427002-52bf25ae04cd.json", scope
)
client = gspread.authorize(creds)

# Open the Google Sheet
sheet_url = "https://docs.google.com/spreadsheets/d/1chGs5Aj4rS38_R6Fl7B5a5sfH8MWvvuYCI1peioGW64/edit#gid=786163238"
sheet = client.open_by_url(sheet_url)

In [347]:
# Get the list of sheet names
sheet_names = [ws.title for ws in sheet.worksheets()]

# Create a dictionary to store the DataFrames
dataframes = {}

# Iterate over each sheet and create a DataFrame
for sheet_name in sheet_names:
    worksheet = sheet.worksheet(sheet_name)
    data = worksheet.get_all_values()
    headers = data.pop(0)
    df = pd.DataFrame(data, columns=headers)
    dataframes[sheet_name] = df

In [348]:
# Print the sheet names and DataFrames
for sheet_name, df in dataframes.items():
    sheet_name = sheet_name.replace("-", "_")
    print(f"Sheet Name: {sheet_name}")
    df.to_parquet(f"{sheet_name}.parquet")

nouns = pd.read_parquet("nouns.parquet")
sentence_templates = pd.read_parquet("sentence_templates.parquet")
adjectives = pd.read_parquet("adjectives.parquet")
pronouns = pd.read_parquet("pronouns.parquet")
verb_information = pd.read_parquet("verb_information.parquet")
verb_conjugation = pd.read_parquet("verb_conjugation.parquet")
nouns_with = pd.read_parquet("nouns_with.parquet")
questions = pd.read_parquet("questions.parquet")
numbers = pd.read_parquet("numbers.parquet")
nouns_with_extra = pd.read_parquet("nouns_with_extra.parquet")

Sheet Name: adjectives
Sheet Name: nouns
Sheet Name: pronouns
Sheet Name: verb_conjugation
Sheet Name: nouns_with_extra
Sheet Name: sentence_templates
Sheet Name: verb_information
Sheet Name: nouns_with
Sheet Name: questions
Sheet Name: numbers


In [350]:
numbers.head(15)

Unnamed: 0,number,neoficialiai,compound,years,kokia_kaina,kokia_kaina_compound,euro_nom,cent_nom,kiek_kainuoja,kiek_kainuoja_compound,euro_acc,cent_acc
0,1,vieni,,metai,vienas,,euras,centas,vieną,,eurą,centą
1,2,du,,metai,du,,eurai,centai,du,,eurus,centus
2,3,trys,,metai,trys,,eurai,centai,tris,,eurus,centus
3,4,keturi,,metai,keturi,,eurai,centai,keturis,,eurus,centus
4,5,penki,,metai,penki,,eurai,centai,penkis,,eurus,centus
5,6,šeši,,metai,šeši,,eurai,centai,šešis,,eurus,centus
6,7,septyni,,metai,septyni,,eurai,centai,septynis,,eurus,centus
7,8,aštuoni,,metai,aštuoni,,eurai,centai,aštuonis,,eurus,centus
8,9,devyni,,metai,devyni,,eurai,centai,devynis,,eurus,centus
9,10,dešimt,,metų,dešimt,,eurų,centų,dešimt,,eurų,centų


In [267]:
nouns.tail()

Unnamed: 0,nominative,plural,countable,gender,genitive singular,genitive plural,accusative singular,accusative plural,instrumental singular,instrumental plural,dative singular,dative plural,declension,category
43,avižinė košė,avižinės košės,countable,feminine,avižinės košės,avižinių košių,avižinę košę,avižines košes,avižine koše,avižinėmis košėmis,avižinei košei,avižinėms košėms,III,food
44,makaronai,,uncountable,masculine,makaronų,,makaronus,,makaronais,,makaronams,,I,food
45,pica,picos,countable,feminine,picos,picų,picą,picas,pica,picomis,picai,picoms,I,food
46,salotos,,uncountable,feminine,salotų,,salotas,,salotomis,,salotoms,,I,food
47,kepsnys su daržovėmis,kepsniai su daržovėmis,countable,masculine,kepsnio su daržovėmis,kepsnių su daržovėmis,kepsnį su daržovėmis,kepsnius su daržovėmis,kepsniu su daržovėmis,kepsniais su daržovėmis,kepsniui su daržovėmis,kepsniams su daržovėmis,II,food


In [237]:
nouns.sample(10)

Unnamed: 0,nominative,plural,countable,gender,genitive singular,genitive plural,accusative singular,accusative plural,instrumental singular,instrumental plural,dative singular,dative plural,declension,category
26,riestainis,riestainiai,countable,masculine,riestainio,riestainų,riestainį,riestainius,riestainiu,riestainiu,riestainiui,riestainiams,III,food
1,brolis,broliai,countable,masculine,,,,,,,broliui,broliams,I,person
42,tako,tako,countable,masculine,tako,takų,tako,takus,taku,takais,takui,takams,I,food
22,,ledai,uncountable,masculine,,ledų,,ledus,,ledais,,ledams,I,food
37,grietinėlė,,,,grietinėlės,,,,grietinėles,,,,,
35,,sultys,uncountable,feminine,,sulčių,,sultis,,sultimis,,sultims,III,food
31,saldainis,saldainiai,countable,masculine,saldainio,saldainių,saldainį,saldainius,saldainiu,saldainiais,saldainiui,saldainiams,III,food
27,batonas,batonai,countable,masculine,batono,batonų,batoną,batonus,batonu,batonais,batonui,batonams,I,food
36,daržovė,daržovės,countable,feminine,daržovės,daržovių,daržovę,daržoves,daržove,daržovėmis,daržovei,daržovėms,III,food
14,kiauliena,,uncountable,feminine,kiaulienos,,kiaulieną,,kiauliena,,kiaulienai,,I,food


In [351]:
pwd

'/Users/jonathan/projects/lithuanianapp'

In [160]:
def csv_to_food_combinations(df):
    
    # Initialize the food_combinations dictionary
    food_combinations = {}

    # Iterate through the DataFrame
    for _, row in df.iterrows():
        food = row["base"]
        category = row["category"]
        elements = [
            elem for elem in row[2:] if (pd.notna(elem)) and (elem != "")
        ]  # Get non-empty elements

        # If this food isn't in the dictionary yet, add it
        if food not in food_combinations:
            food_combinations[food] = {}

        # Add the category and its elements to the food
        food_combinations[food][category] = elements

    return food_combinations


food_combinations = csv_to_food_combinations(nouns_with_extra)

In [161]:
food_combinations

{'pica': {'pagrindas': ['pomidorų padažas', 'sūris'],
  'priedai': ['grybai',
   'svogūnai',
   'dešra',
   'alyvuogės',
   'paprikos',
   'dešrelės',
   'kumpis',
   'ananasai',
   'ančiuviai'],
  'prieskoniai': ['raudonėlis',
   'bazilikas',
   'česnakas',
   'raudonieji pipirai']},
 'salotos': {'pagrindas': ['salotos lapai', 'įvairios žalumynų'],
  'priedai': ['pomidorai',
   'agurkai',
   'morkos',
   'svogūnai',
   'skrebučiai',
   'sūris',
   'alyvuogės',
   'riešutai',
   'avokadas'],
  'padažai': ['vinegretas',
   'majonezo padažas',
   'balzamiko padažas',
   'alyvuogių aliejus',
   'citrinų sultys']},
 'mėsainis': {'pagrindas': ['jautienos kotletas', 'bandelė'],
  'priedai': ['sūris',
   'salotos lapai',
   'pomidoras',
   'svogūnas',
   'marinuoti agurkėliai',
   'šoninė',
   'grybai',
   'avokadas'],
  'padažai': ['kečupas', 'garstyčios', 'majonezas', 'barbekiu padažas']},
 'makaronai': {'pagrindas': ['spagečiai', 'penne', 'fettuccine'],
  'padažai': ['pomidorų padažas', 'g

In [162]:
import pprint

pprint.pprint(food_combinations)

{'avižinė košė': {'pagardai': ['medus', 'rudasis cukrus', 'pienas', 'jogurtas'],
                  'pagrindas': ['avižos'],
                  'priedai': ['bananas',
                              'uogos',
                              'obuolys',
                              'razinos',
                              'riešutai',
                              'cinamonas']},
 'kepsnys su daržovėmis': {'baltymai': ['vištiena',
                                        'jautiena',
                                        'tofu',
                                        'krevetės'],
                           'daržovės': ['paprikos',
                                        'brokoliai',
                                        'morkos',
                                        'svogūnai',
                                        'grybai',
                                        'žirniai'],
                           'padažai': ['sojos padažas',
                                       'teriyaki padažas'

In [15]:
infinitives = verb_conjugation["infinitive"].unique()

In [16]:
infinitives

array(['matyti', 'būti', 'mėgti', 'norėti', 'prašyti', 'valgyti'],
      dtype=object)

In [17]:
pronouns

Unnamed: 0,nominative,possessive,dative,genitive,accusative,instrumental,locative
0,aš,mano,man,manęs,mane,manimi,manyje
1,tu,tavo,tau,tavęs,tave,tavimi,tavyje
2,jis,jo,jam,jo,jį,juo,jame
3,ji,jos,jai,jos,ją,ja,joje
4,mes,mūsų,mums,mūsų,mus,mumis,mumyse
5,jūs,jūsų,jums,jūsų,jus,jumis,jumyse
6,jie,jų,jiems,jų,juos,jais,juose
7,jos,jų,joms,jų,jas,jomis,jose


In [59]:
def generate_sentence(
    nouns,
    pronouns,
    template="{pronoun-dative} patinka {noun-food-countable-nominative}.",
):
    # Filter countable food nouns
    food_nouns = nouns[
        (nouns["category"] == "food") & (nouns["countable"] == "countable")
    ]

    # Select a random pronoun and food noun
    pronoun = pronouns.sample(n=1).squeeze()
    noun = food_nouns.sample(n=1).squeeze()

    # Generate the prompt
    prompt = f"{pronoun['nominative']}, patinka, {noun['nominative']}"

    # Generate the answer
    # answer = template.format(pronoun=pronoun['dative'], noun=noun['nominative'])
    answer = f"{pronoun['dative'].title()} patinka {noun['nominative']}."
    # Get the category (declension) of the noun
    category = noun["declension"]

    return prompt, answer, category

In [58]:
pronouns.sample(n=1).squeeze()["nominative"]

'tu'

In [61]:
generate_sentence(
    nouns, pronouns, template="{pronoun['dative']} patinka {noun['nominative']}."
)

('jis, patinka, sūris', 'Jam patinka sūris.', 'III')

In [27]:
food_nouns = nouns[(nouns["category"] == "food") & (nouns["countable"] == "countable")]
food_nouns

Unnamed: 0,nominative,plural,countable,gender,genitive singular,genitive plural,accusative singular,accusative plural,instrumental singular,instrumental plural,dative singular,dative plural,declension,category
13,žuvis,žuvys,countable,feminine,žuvies,žuvų,žuvį,žuvis,žuvimi,žuvimis,žuviai,žuvims,III,food
15,kumpis,kumpiai,countable,masculine,kumpio,kumpių,kumpį,kumpius,kumpiu,kumpiais,kumpiui,kumpiams,III,food
16,dešrelė,dešrelės,countable,feminine,dešrelės,dešrelių,dešrelę,dešreles,dešrele,dešrelėmis,dešrelei,dešrelėms,III,food
17,dešra,dešros,countable,feminine,dešros,dešrų,dešrą,dešras,dešra,dešromis,dešrai,dešroms,I,food
20,sūris,sūriai,countable,masculine,sūrio,sūrių,sūrį,sūrius,sūriu,sūriais,sūriui,sūriams,III,food
23,karpis,karpiai,countable,masculine,karpio,karpių,karpį,karpius,karpiu,karpiais,karpiui,karpiams,III,food
24,duona,duonos,countable,feminine,duonos,duonų,duoną,duonas,duona,duonomis,duonai,duonoms,I,food
25,bandelė,bandelės,countable,feminine,bandelės,bandelių,bandelę,bandeles,bandele,bandelėmis,bandelei,bandelėms,III,food
26,riestainis,riestainiai,countable,masculine,riestainio,riestainų,riestainį,riestainius,riestainiu,riestainiu,riestainiui,riestainiams,III,food
27,batonas,batonai,countable,masculine,batono,batonų,batoną,batonus,batonu,batonais,batonui,batonams,I,food


In [36]:
index_bool = nouns_with["base"].isin(food_nouns["nominative"])

In [38]:
nouns_with.loc[index_bool]

Unnamed: 0,base,extra,all_options
8,duona,sviestas,dripsniai
9,duona,sūris,duona
39,žuvis,sojos padažas,
40,žuvis,migdolai,
41,žuvis,ryžiai,
42,žuvis,daržovės,


In [51]:
# Generate the prompt
prompt = f"{pronoun['nominative']}, patinka, {noun['nominative']}"
answer

In [52]:
prompt

'jis, patinka, pomidoras'

In [65]:
pronoun["dative"]

'tau'

In [66]:
noun["nominative"]

'agurkas'

In [67]:
f"{pronoun['dative'].title()} patinka {noun['nominative']}."

'Tau patinka agurkas.'

In [75]:
import random


def generate_sentence(nouns, pronouns, verbs, adjectives, templates):
    # Filter food nouns
    food_nouns = nouns[nouns["category"] == "food"]

    # Select random elements
    pronoun = pronouns.sample(n=1).squeeze()
    noun = food_nouns.sample(n=1).squeeze()
    verb = verbs.sample(n=1).squeeze()
    adj1 = adjectives.sample(n=1).squeeze()
    adj2 = adjectives.sample(n=1).squeeze()

    # Select a random template
    template = random.choice(templates)

    # Generate the prompt
    prompt = f"{pronoun['nominative']}, {verb['infinitive']}, {noun['nominative']}"

    # Ensure subject-verb agreement
    verb_pres = verb["present"][pronoun["person"]]
    verb_past = verb["past"][pronoun["person"]]

    # Generate the answer
    answer = template.format(
        pronoun_nom=pronoun["nominative"].title(),
        pronoun_dat=pronoun["dative"].title(),
        noun_nom=noun["nominative"],
        noun_acc=noun["accusative"],
        noun_gen=noun["genitive"],
        noun_ins=noun["instrumental"],
        verb_pres=verb_pres,
        verb_past=verb_past,
        adj1_nom=adj1["nominative"],
        adj2_nom=adj2["nominative"],
        adj1_ins=adj1["instrumental"],
        adj2_ins=adj2["instrumental"],
        adj1_gen=adj1["genitive"],
        adj2_gen=adj2["genitive"],
    )

    # Get the category (declension) of the noun
    category = noun["declension"]

    return prompt, answer, category

In [86]:
pronouns = pd.DataFrame(
    {
        "nominative": ["aš", "tu", "jis", "ji", "mes", "jūs", "jie", "jos"],
        "dative": ["man", "tau", "jam", "jai", "mums", "jums", "jiems", "joms"],
        "person": ["1sg", "2sg", "3", "3", "1pl", "2pl", "3", "3"],
    }
)

verbs = pd.DataFrame(
    {
        "infinitive": ["valgyti", "gerti", "mėgti"],
        "present": [
            {
                "1sg": "valgau",
                "2sg": "valgai",
                "3": "valgo",
                "1pl": "valgome",
                "2pl": "valgote",
            },
            {
                "1sg": "geriu",
                "2sg": "geri",
                "3": "geria",
                "1pl": "geriame",
                "2pl": "geriate",
            },
            {
                "1sg": "mėgstu",
                "2sg": "mėgsti",
                "3": "mėgsta",
                "1pl": "mėgstame",
                "2pl": "mėgstate",
            },
        ],
        "past": [
            {
                "1sg": "valgiau",
                "2sg": "valgei",
                "3": "valgė",
                "1pl": "valgėme",
                "2pl": "valgėte",
            },
            {
                "1sg": "gėriau",
                "2sg": "gėrei",
                "3": "gėrė",
                "1pl": "gėrėme",
                "2pl": "gėrėte",
            },
            {
                "1sg": "mėgau",
                "2sg": "mėgai",
                "3": "mėgo",
                "1pl": "mėgome",
                "2pl": "mėgote",
            },
        ],
    }
)

adjectives = pd.DataFrame(
    {
        "nominative": ["skanus", "šviežias", "karštas"],
        "instrumental": ["skanių", "šviežiu", "karštu"],
        "genitive": ["skanaus", "šviežio", "karšto"],
    }
)

prompt, answer, category = generate_sentence(
    nouns, pronouns, verbs, adjectives, templates
)
print(f"Prompt: {prompt}")
print(f"Answer: {answer}")
print(f"Noun category: {category}")

Prompt: aš, mėgti, obuolys
Answer: Man atrodo, kad obuolys yra skanus.
Noun category: I


In [87]:
verbs

Unnamed: 0,infinitive,present,past
0,valgyti,"{'1sg': 'valgau', '2sg': 'valgai', '3': 'valgo...","{'1sg': 'valgiau', '2sg': 'valgei', '3': 'valg..."
1,gerti,"{'1sg': 'geriu', '2sg': 'geri', '3': 'geria', ...","{'1sg': 'gėriau', '2sg': 'gėrei', '3': 'gėrė',..."
2,mėgti,"{'1sg': 'mėgstu', '2sg': 'mėgsti', '3': 'mėgst...","{'1sg': 'mėgau', '2sg': 'mėgai', '3': 'mėgo', ..."


In [89]:
verb_conjugation.head(20)

Unnamed: 0,index,subject,tense,conjugation,infinitive
0,0,aš,present_tense,matau,matyti
1,1,tu,present_tense,matai,matyti
2,2,jis,present_tense,mato,matyti
3,3,ji,present_tense,mato,matyti
4,4,mes,present_tense,matome,matyti
5,5,jūs,present_tense,matote,matyti
6,6,jie,present_tense,mato,matyti
7,7,jos,present_tense,mato,matyti
8,8,aš,past_tense,mačiau,matyti
9,9,tu,past_tense,matei,matyti


In [90]:
def generate_sentence(nouns, pronouns, verbs, adjectives, templates):
    # Filter food nouns
    food_nouns = nouns[nouns["category"] == "food"]

    # Select random elements
    pronoun = pronouns.sample(n=1).squeeze()
    noun = food_nouns.sample(n=1).squeeze()
    verb_infinitive = random.choice(verbs["infinitive"].unique())
    adj1 = adjectives.sample(n=1).squeeze()
    adj2 = adjectives.sample(n=1).squeeze()

    # Select a random template
    template = random.choice(templates)

    # Generate the prompt
    prompt = f"{pronoun['nominative']}, {verb_infinitive}, {noun['nominative']}"

    # Ensure subject-verb agreement
    verb_pres = verbs[
        (verbs["subject"] == pronoun["nominative"])
        & (verbs["tense"] == "present_tense")
        & (verbs["infinitive"] == verb_infinitive)
    ]["conjugation"].values[0]

    verb_past = verbs[
        (verbs["subject"] == pronoun["nominative"])
        & (verbs["tense"] == "past_tense")
        & (verbs["infinitive"] == verb_infinitive)
    ]["conjugation"].values[0]

    verb_future = verbs[
        (verbs["subject"] == pronoun["nominative"])
        & (verbs["tense"] == "future_tense")
        & (verbs["infinitive"] == verb_infinitive)
    ]["conjugation"].values[0]

    # Generate the answer
    answer = template.format(
        pronoun_nom=pronoun["nominative"].title(),
        pronoun_dat=pronoun["dative"].title(),
        noun_nom=noun["nominative"],
        noun_acc=noun["accusative"],
        noun_gen=noun["genitive"],
        noun_ins=noun["instrumental"],
        verb_pres=verb_pres,
        verb_past=verb_past,
        verb_future=verb_future,
        adj1_nom=adj1["nominative"],
        adj2_nom=adj2["nominative"],
        adj1_ins=adj1["instrumental"],
        adj2_ins=adj2["instrumental"],
        adj1_gen=adj1["genitive"],
        adj2_gen=adj2["genitive"],
    )

    # Get the category (declension) of the noun
    category = noun["declension"]

    return prompt, answer, category

In [98]:
nouns

Unnamed: 0,nominative,plural,countable,gender,genitive singular,genitive plural,accusative singular,accusative plural,instrumental singular,instrumental plural,dative singular,dative plural,declension,category
0,tėvas,tėvai,countable,masculine,,,,,,,tėvui,tėvams,I,person
1,brolis,broliai,countable,masculine,,,,,,,broliui,broliams,I,person
2,pabrolys,pabroliai,countable,masculine,,,,,,,pabroliui,pabroliams,I,person
3,mama,mamos,countable,feminine,,,,,,,mamai,mamoms,II,person
4,dukra,dukros,countable,feminine,,,,,,,dukrai,dukroms,II,person
5,sesė,sesės,countable,feminine,,,,,,,sesei,sesėms,II,person
6,akis,akys,countable,feminine,,,,,,,akiai,akims,III,thing
7,dantis,dantys,countable,masculine,,,,,,,dančiui,dantims,III,thing
8,sūnus,sūnūs,countable,masculine,,,,,,,sūnui,sūnums,IV,person
9,profesorius,profesoriai,countable,masculine,,,,,,,profesoriui,profesoriams,IV,person


In [103]:
templates = [
    "{pronoun_dat} patinka {noun_nom}.",
    "{pronoun_nom} {verb_pres} {noun_acc}.",
    "{pronoun_dat} nepatinka {noun_nom}.",
    "{pronoun_nom} {verb_pres} {noun_acc} su {adj1_ins}, bet be {adj2_gen}.",
    "{pronoun_nom} mėgsta {noun_acc}, bet nemėgsta {noun_gen}.",
    "{pronoun_dat} atrodo, kad {noun_nom} yra {adj1_nom}.",
    "{pronoun_nom} {verb_past} {noun_acc} vakar.",
    "{pronoun_nom} norėtų {noun_gen} su {adj1_ins}.",
    "{pronoun_dat} patinka {noun_nom} be {adj1_gen}, bet su {adj2_ins}.",
    "{pronoun_nom} dažnai {verb_pres} {noun_acc}, nes tai {adj1_nom}.",
    "{pronoun_nom} niekada ne{verb_pres} {noun_gen}, nes {pronoun_dat} nepatinka {adj1_nom} maistas.",
    "{pronoun_nom} {verb_pres} {noun_acc} tik su {adj1_ins} ir {adj2_ins}.",
    "{pronoun_dat} patinka {adj1_nom} {noun_nom}, bet nepatinka {adj2_nom}.",
    "{pronoun_nom} {verb_pres} {noun_acc}, kai {noun_nom} yra {adj1_nom}.",
    "{pronoun_nom} mieliau {verb_pres} {noun_acc} nei {noun_gen}.",
    "{pronoun_nom} {verb_future} {noun_acc} rytoj.",
    "{pronoun_nom} niekada ne{verb_future} {noun_gen} ateityje.",
    "{pronoun_gen} nuomone, {noun_nom} yra {adj1_nom}.",
    "{pronoun_acc} domina {noun_nom} su {adj1_ins}.",
    "{pronoun_ins} {verb_past} {noun_acc} praeitą savaitę.",
    "{pronoun_loc} kilo mintis {verb_inf} {noun_acc}.",
    "{pronoun_pos} mėgstamiausias patiekalas yra {noun_nom}.",
    "Ar {pronoun_dat} patinka {noun_nom} labiau nei {pronoun_dat}?",
    "{pronoun_nom} {verb_pres} {noun_acc} {pronoun_pos} namuose.",
    "{pronoun_dat} atrodo, kad {pronoun_pos} {noun_nom} yra {adj1_nom}.",
    "{pronoun_nom} mėgsta valgyti {noun_acc_pl} su {noun_ins}.",
    "{pronoun_nom} niekada nevalgė {noun_gen_pl}, bet {verb_future} {noun_acc} rytoj.",
    "{pronoun_dat} patinka {noun_nom_pl}, bet nepatinka {noun_nom}.",
    "{pronoun_nom} {verb_past} {noun_dat} {adj1_ins} padažą.",
    "Ar galėtumėte man duoti {noun_gen} ir {noun_gen_pl}?",
    "{pronoun_nom} dažnai perka {noun_acc_pl} {noun_dat}.",
    "{pronoun_nom} {verb_pres} {noun_ins_pl} su {noun_ins}.",
    "{pronoun_dat} labiau patinka {noun_nom_pl} nei {noun_nom}.",
    "{pronoun_nom} {verb_future} {noun_acc_pl} su {noun_ins_pl}.",
    "Kodėl {pronoun_nom} nemėgsta {noun_gen_pl}?",
]

In [99]:
def generate_sentence(nouns, pronouns, verbs, adjectives, templates):
    # Filter food nouns
    food_nouns = nouns[nouns["category"] == "food"]

    # Select random elements
    pronoun = pronouns.sample(n=1).squeeze()
    noun = food_nouns.sample(n=1).squeeze()
    verb_infinitive = random.choice(verbs["infinitive"].unique())
    adj1 = adjectives.sample(n=1).squeeze()
    adj2 = adjectives.sample(n=1).squeeze()

    # Select a random template
    template = random.choice(templates)

    # Generate the prompt
    prompt = f"{pronoun['nominative']}, {verb_infinitive}, {noun['nominative']}"

    # Ensure subject-verb agreement
    verb_pres = verbs[
        (verbs["subject"] == pronoun["nominative"])
        & (verbs["tense"] == "present_tense")
        & (verbs["infinitive"] == verb_infinitive)
    ]["conjugation"].values[0]

    verb_past = verbs[
        (verbs["subject"] == pronoun["nominative"])
        & (verbs["tense"] == "past_tense")
        & (verbs["infinitive"] == verb_infinitive)
    ]["conjugation"].values[0]

    verb_future = verbs[
        (verbs["subject"] == pronoun["nominative"])
        & (verbs["tense"] == "future_tense")
        & (verbs["infinitive"] == verb_infinitive)
    ]["conjugation"].values[0]

    # Generate the answer
    answer = template.format(
        pronoun_nom=pronoun["nominative"].title(),
        pronoun_dat=pronoun["dative"].title(),
        pronoun_gen=pronoun["genitive"].title(),
        pronoun_acc=pronoun["accusative"].title(),
        pronoun_ins=pronoun["instrumental"].title(),
        pronoun_loc=pronoun["locative"].title(),
        pronoun_pos=pronoun["possessive"].title(),
        noun_nom=noun["nominative"],
        noun_gen=noun["genitive singular"],
        noun_dat=noun["dative singular"],
        noun_acc=noun["accusative singular"],
        noun_ins=noun["instrumental singular"],
        noun_nom_pl=noun["plural"],
        noun_gen_pl=noun["genitive plural"],
        noun_dat_pl=noun["dative plural"],
        noun_acc_pl=noun["accusative plural"],
        noun_ins_pl=noun["instrumental plural"],
        verb_pres=verb_pres,
        verb_past=verb_past,
        verb_future=verb_future,
        adj1_nom=adj1["nominative"],
        adj2_nom=adj2["nominative"],
        adj1_ins=adj1["instrumental"],
        adj2_ins=adj2["instrumental"],
        adj1_gen=adj1["genitive"],
        adj2_gen=adj2["genitive"],
    )

    # Get the category (declension) of the noun
    category = noun["declension"]

    return prompt, answer, category

In [101]:
verb_conjugation

Unnamed: 0,index,subject,tense,conjugation,infinitive
0,0,aš,present_tense,matau,matyti
1,1,tu,present_tense,matai,matyti
2,2,jis,present_tense,mato,matyti
3,3,ji,present_tense,mato,matyti
4,4,mes,present_tense,matome,matyti
...,...,...,...,...,...
247,37,ji,past_freq_tense,valgydavo,valgyti
248,38,mes,past_freq_tense,valgydavome,valgyti
249,39,jūs,past_freq_tense,valgydavote,valgyti
250,40,jie,past_freq_tense,valgydavo,valgyti


In [179]:
import pandas as pd

adjectives_data = [
    {
        "nominative": "skanus",
        "genitive": "skanaus",
        "dative": "skaniam",
        "accusative": "skanų",
        "instrumental": "skaniu",
        "locative": "skaniame",
        "nominative_f": "skani",
        "genitive_f": "skanios",
        "dative_f": "skaniai",
        "accusative_f": "skanią",
        "instrumental_f": "skania",
        "locative_f": "skanioje",
        "meaning": "tasty",
    },
    {
        "nominative": "karštas",
        "genitive": "karšto",
        "dative": "karštam",
        "accusative": "karštą",
        "instrumental": "karštu",
        "locative": "karštame",
        "nominative_f": "karšta",
        "genitive_f": "karštos",
        "dative_f": "karštai",
        "accusative_f": "karštą",
        "instrumental_f": "karšta",
        "locative_f": "karštoje",
        "meaning": "hot",
    },
    {
        "nominative": "šaltas",
        "genitive": "šalto",
        "dative": "šaltam",
        "accusative": "šaltą",
        "instrumental": "šaltu",
        "locative": "šaltame",
        "nominative_f": "šalta",
        "genitive_f": "šaltos",
        "dative_f": "šaltai",
        "accusative_f": "šaltą",
        "instrumental_f": "šalta",
        "locative_f": "šaltoje",
        "meaning": "cold",
    },
    {
        "nominative": "gardus",
        "genitive": "gardaus",
        "dative": "gardžiam",
        "accusative": "gardų",
        "instrumental": "gardžiu",
        "locative": "gardžiame",
        "nominative_f": "gardi",
        "genitive_f": "gardžios",
        "dative_f": "gardžiai",
        "accusative_f": "gardžią",
        "instrumental_f": "gardžia",
        "locative_f": "gardžioje",
        "meaning": "delicious",
    },
    {
        "nominative": "saldus",
        "genitive": "saldaus",
        "dative": "saldžiam",
        "accusative": "saldų",
        "instrumental": "saldžiu",
        "locative": "saldžiame",
        "nominative_f": "saldi",
        "genitive_f": "saldžios",
        "dative_f": "saldžiai",
        "accusative_f": "saldžią",
        "instrumental_f": "saldžia",
        "locative_f": "saldžioje",
        "meaning": "sweet",
    },
]

adjectives = pd.DataFrame(adjectives_data)

In [113]:
def generate_sentence(nouns, pronouns, verbs, adjectives, templates):
    try:
        # Filter food nouns
        food_nouns = nouns[nouns["category"] == "food"]

        # Select random elements
        pronoun = pronouns.sample(n=1).squeeze()
        noun = food_nouns.sample(n=1).squeeze()
        verb_infinitive = random.choice(verbs["infinitive"].unique())
        adj1 = adjectives.sample(n=1).iloc[0]
        adj2 = adjectives.sample(n=1).iloc[0]

        # Select a random template
        template = random.choice(templates)

        # Generate the prompt
        prompt = f"{pronoun['nominative']}, {verb_infinitive}, {noun['nominative']}"

        # Ensure subject-verb agreement
        verb_pres = verbs[
            (verbs["subject"] == pronoun["nominative"])
            & (verbs["tense"] == "present_tense")
            & (verbs["infinitive"] == verb_infinitive)
        ]["conjugation"].values[0]

        verb_past = verbs[
            (verbs["subject"] == pronoun["nominative"])
            & (verbs["tense"] == "past_tense")
            & (verbs["infinitive"] == verb_infinitive)
        ]["conjugation"].values[0]

        verb_future = verbs[
            (verbs["subject"] == pronoun["nominative"])
            & (verbs["tense"] == "future_tense")
            & (verbs["infinitive"] == verb_infinitive)
        ]["conjugation"].values[0]

        # Determine the gender of the noun for adjective agreement
        noun_gender = "_f" if noun["gender"] == "feminine" else ""

        # Generate the answer
        answer = template.format(
            pronoun_nom=pronoun["nominative"].title(),
            pronoun_dat=pronoun["dative"].title(),
            pronoun_gen=pronoun["genitive"].title(),
            pronoun_acc=pronoun["accusative"].title(),
            pronoun_ins=pronoun["instrumental"].title(),
            pronoun_loc=pronoun["locative"].title(),
            pronoun_pos=pronoun["possessive"].title(),
            noun_nom=noun["nominative"],
            noun_gen=noun["genitive singular"],
            noun_dat=noun["dative singular"],
            noun_acc=noun["accusative singular"],
            noun_ins=noun["instrumental singular"],
            noun_nom_pl=noun["plural"],
            noun_gen_pl=noun["genitive plural"],
            noun_dat_pl=noun["dative plural"],
            noun_acc_pl=noun["accusative plural"],
            noun_ins_pl=noun["instrumental plural"],
            verb_pres=verb_pres,
            verb_past=verb_past,
            verb_future=verb_future,
            adj1_nom=adj1[f"nominative{noun_gender}"],
            adj2_nom=adj2[f"nominative{noun_gender}"],
            adj1_gen=adj1[f"genitive{noun_gender}"],
            adj2_gen=adj2[f"genitive{noun_gender}"],
            adj1_dat=adj1[f"dative{noun_gender}"],
            adj2_dat=adj2[f"dative{noun_gender}"],
            adj1_acc=adj1[f"accusative{noun_gender}"],
            adj2_acc=adj2[f"accusative{noun_gender}"],
            adj1_ins=adj1[f"instrumental{noun_gender}"],
            adj2_ins=adj2[f"instrumental{noun_gender}"],
            adj1_loc=adj1[f"locative{noun_gender}"],
            adj2_loc=adj2[f"locative{noun_gender}"],
        )

        # Get the category (declension) of the noun
        category = noun["declension"]

        return prompt, answer, category

    except KeyError as e:
        print(f"KeyError: {e}. This key is missing in one of the dataframes.")
        return None, None, None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None, None, None

In [135]:
for x in range(10):
    prompt, answer, case = generate_sentence(
        nouns, pronouns, verb_conjugation, adjectives, templates
    )
    print(prompt)
    print(answer)
    print()

jūs, prašyti, avokadas
Jūs mėgsta valgyti avokadus su avokadu.

mes, būti, karpis
Mes niekada nebūsime karpio ateityje.

jie, būti, vanduo
Jie bus vandenį rytoj.

jis, valgyti, daržovė
Jis valgys daržovę rytoj.

tu, mėgti, daržovė
Tu mėgai daržovę vakar.

KeyError: 'verb_inf'. This key is missing in one of the dataframes.
None
None

tu, prašyti, bandelė
Tu prašei bandelę vakar.

tu, mėgti, batonas
Tau atrodo, kad Tavo batonas yra gardus.

aš, mėgti, šokoladas
Aš mieliau mėgstu šokoladą nei šokolado.

jos, valgyti, vanduo
Ar Joms patinka vanduo labiau nei Joms?



In [136]:
starting_string = """| Maistas    | Kategorija  | Elementas 1     | Elementas 2     | Elementas 3     | Elementas 4     | Elementas 5     | Elementas 6     | Elementas 7     | Elementas 8     | Elementas 9     |
|------------|-------------|-----------------|-----------------|-----------------|-----------------|-----------------|-----------------|-----------------|-----------------|-----------------|
| pica       | pagrindas   | pomidorų padažas| sūris           |                 |                 |                 |                 |                 |                 |                 |
| pica       | priedai     | grybai          | svogūnai        | dešra           | alyvuogės       | paprikos        | dešrelės        | kumpis          | ananasai        | ančiuviai       |
| pica       | prieskoniai | raudonėlis      | bazilikas       | česnakas        | raudonieji pipirai |              |                 |                 |                 |                 |
| salotos    | pagrindas   | salotos lapai   | įvairios žalumynų |               |                 |                 |                 |                 |                 |                 |
| salotos    | priedai     | pomidorai       | agurkai         | morkos          | svogūnai        | skrebučiai      | sūris           | alyvuogės       | riešutai        | avokadas        |
| salotos    | padažai     | vinegretas      | majonezo padažas | balzamiko padažas | alyvuogių aliejus | citrinų sultys |              |                 |                 |                 |
| mėsainis   | pagrindas   | jautienos kotletas | bandelė      |                 |                 |                 |                 |                 |                 |                 |
| mėsainis   | priedai     | sūris           | salotos lapai   | pomidoras       | svogūnas        | marinuoti agurkėliai | šoninė    | grybai          | avokadas        |                 |
| mėsainis   | padažai     | kečupas         | garstyčios      | majonezas       | barbekiu padažas |                |                 |                 |                 |                 |
| makaronai  | pagrindas   | spagečiai       | penne           | fettuccine      |                 |                 |                 |                 |                 |                 |
| makaronai  | padažai     | pomidorų padažas | grietinėlės padažas | pesto      |                 |                 |                 |                 |                 |                 |
| makaronai  | priedai     | grybai          | vištiena        | krevetės        | mėsos kukuliai  | špinatai        | paprikos        |                 |                 |                 |
| makaronai  | pagardai    | parmezano sūris | bazilikas       | raudonieji pipirai |              |                 |                 |                 |                 |                 |
| ledai      | pagrindas   | vanilinis       | šokoladinis     | braškinis       |                 |                 |                 |                 |                 |                 |
| ledai      | pagardai    | spalvoti pabarstai | šokolado gabaliukai | riešutai | plakta grietinėlė | vyšnia        | karamelės padažas | karštas šokoladas |              |                 |
| sušiai     | pagrindas   | ryžiai          | jūros dumblių lapai |             |                 |                 |                 |                 |                 |                 |
| sušiai     | įdaras      | lašiša          | tunas           | avokadas        | agurkas         | krabas          | krevetės        |                 |                 |                 |
| sušiai     | priedai     | vasabis         | sojos padažas   | marinuotas imbieras |             |                 |                 |                 |                 |                 |
| sumuštinis | pagrindas   | duona           |                 |                 |                 |                 |                 |                 |                 |                 |
| sumuštinis | mėsa        | kumpis          | kalakutiena     | jautiena        | vištiena        |                 |                 |                 |                 |                 |
| sumuštinis | priedai     | sūris           | salotos lapai   | pomidoras       | svogūnas        | marinuoti agurkėliai | avokadas   |                 |                 |                 |
| sumuštinis | padažai     | majonezas       | garstyčios      | kečupas         | pesto           |                 |                 |                 |                 |                 |
| avižinė košė | pagrindas | avižos          |                 |                 |                 |                 |                 |                 |                 |                 |
| avižinė košė | priedai   | bananas         | uogos           | obuolys         | razinos         | riešutai        | cinamonas       |                 |                 |                 |
| avižinė košė | pagardai  | medus           | rudasis cukrus  | pienas          | jogurtas        |                 |                 |                 |                 |                 |
| kepsnys su daržovėmis | pagrindas | ryžiai | makaronai       |                 |                 |                 |                 |                 |                 |                 |
| kepsnys su daržovėmis | baltymai | vištiena | jautiena       | tofu            | krevetės        |                 |                 |                 |                 |                 |
| kepsnys su daržovėmis | daržovės | paprikos | brokoliai      | morkos          | svogūnai        | grybai          | žirniai         |                 |                 |                 |
| kepsnys su daržovėmis | padažai | sojos padažas | teriyaki padažas | austrių padažas |           |                 |                 |                 |                 |                 |
| tako       | pagrindas   | tortilija       |                 |                 |                 |                 |                 |                 |                 |                 |
| tako       | baltymai    | malta jautiena  | vištiena        | žuvis           | pupelės         |                 |                 |                 |                 |                 |
| tako       | priedai     | salotos lapai   | pomidoras       | sūris           | svogūnas        | avokadas        | kalendros       |                 |                 |                 |
| tako       | padažai     | salsa           | grietinė        | gvakamolė       |                 |                 |                 |                 |                 |                 |"""

In [142]:
for line in starting_string.splitlines()[2:]:
    new_line = line.strip("|").split("|")
    newest_line = [word.strip() for word in new_line]
    print("\t".join(newest_line))

pica	pagrindas	pomidorų padažas	sūris							
pica	priedai	grybai	svogūnai	dešra	alyvuogės	paprikos	dešrelės	kumpis	ananasai	ančiuviai
pica	prieskoniai	raudonėlis	bazilikas	česnakas	raudonieji pipirai					
salotos	pagrindas	salotos lapai	įvairios žalumynų							
salotos	priedai	pomidorai	agurkai	morkos	svogūnai	skrebučiai	sūris	alyvuogės	riešutai	avokadas
salotos	padažai	vinegretas	majonezo padažas	balzamiko padažas	alyvuogių aliejus	citrinų sultys				
mėsainis	pagrindas	jautienos kotletas	bandelė							
mėsainis	priedai	sūris	salotos lapai	pomidoras	svogūnas	marinuoti agurkėliai	šoninė	grybai	avokadas	
mėsainis	padažai	kečupas	garstyčios	majonezas	barbekiu padažas					
makaronai	pagrindas	spagečiai	penne	fettuccine						
makaronai	padažai	pomidorų padažas	grietinėlės padažas	pesto						
makaronai	priedai	grybai	vištiena	krevetės	mėsos kukuliai	špinatai	paprikos			
makaronai	pagardai	parmezano sūris	bazilikas	raudonieji pipirai						
ledai	pagrindas	vanilinis	šokoladinis	braškinis						

In [167]:
import random


def generate_sentence(nouns, pronouns, verbs, adjectives, templates, food_combinations):
    try:
        # Select random elements
        pronoun = pronouns.sample(n=1).squeeze()
        food = random.choice(list(food_combinations.keys()))
        food_items = food_combinations[food]
        verb_infinitive = random.choice(verbs["infinitive"].unique())
        adj1 = adjectives.sample(n=1).iloc[0]
        adj2 = adjectives.sample(n=1).iloc[0]

        # Select random items from food categories
        base = (
            random.choice(food_items["pagrindas"])
            if "pagrindas" in food_items
            else None
        )
        addition = (
            random.choice(food_items["priedai"]) if "priedai" in food_items else None
        )
        topping = None
        if "pagardai" in food_items:
            topping = random.choice(food_items["pagardai"])
        elif "prieskoniai" in food_items:
            topping = random.choice(food_items["prieskoniai"])
        elif "padažai" in food_items:
            topping = random.choice(food_items["padažai"])

        # Select a random template
        template = random.choice(templates)

        # Generate the prompt
        prompt = f"{pronoun['nominative']}, {verb_infinitive}, {food}"

        # Ensure subject-verb agreement
        verb_pres = verbs[
            (verbs["subject"] == pronoun["nominative"])
            & (verbs["tense"] == "present_tense")
            & (verbs["infinitive"] == verb_infinitive)
        ]["conjugation"].values[0]

        verb_past = verbs[
            (verbs["subject"] == pronoun["nominative"])
            & (verbs["tense"] == "past_tense")
            & (verbs["infinitive"] == verb_infinitive)
        ]["conjugation"].values[0]

        verb_future = verbs[
            (verbs["subject"] == pronoun["nominative"])
            & (verbs["tense"] == "future_tense")
            & (verbs["infinitive"] == verb_infinitive)
        ]["conjugation"].values[0]

        # Find the noun entry for the food
        noun = nouns[nouns["nominative"] == food].squeeze()

        # Determine the gender of the noun for adjective agreement
        noun_gender = "_f" if noun["gender"] == "feminine" else ""

        # Generate the answer
        answer = template.format(
            pronoun_nom=pronoun["nominative"].title(),
            pronoun_dat=pronoun["dative"].title(),
            pronoun_gen=pronoun["genitive"].title(),
            pronoun_acc=pronoun["accusative"].title(),
            pronoun_ins=pronoun["instrumental"].title(),
            pronoun_loc=pronoun["locative"].title(),
            pronoun_pos=pronoun["possessive"].title(),
            noun_nom=noun["nominative"],
            noun_gen=noun["genitive singular"],
            noun_dat=noun["dative singular"],
            noun_acc=noun["accusative singular"],
            noun_ins=noun["instrumental singular"],
            noun_nom_pl=noun["plural"],
            noun_gen_pl=noun["genitive plural"],
            noun_dat_pl=noun["dative plural"],
            noun_acc_pl=noun["accusative plural"],
            noun_ins_pl=noun["instrumental plural"],
            verb_pres=verb_pres,
            verb_past=verb_past,
            verb_future=verb_future,
            adj1_nom=adj1[f"nominative{noun_gender}"],
            adj2_nom=adj2[f"nominative{noun_gender}"],
            adj1_gen=adj1[f"genitive{noun_gender}"],
            adj2_gen=adj2[f"genitive{noun_gender}"],
            adj1_dat=adj1[f"dative{noun_gender}"],
            adj2_dat=adj2[f"dative{noun_gender}"],
            adj1_acc=adj1[f"accusative{noun_gender}"],
            adj2_acc=adj2[f"accusative{noun_gender}"],
            adj1_ins=adj1[f"instrumental{noun_gender}"],
            adj2_ins=adj2[f"instrumental{noun_gender}"],
            adj1_loc=adj1[f"locative{noun_gender}"],
            adj2_loc=adj2[f"locative{noun_gender}"],
            food=food,
            base=base,
            addition=addition,
            topping=topping,
        )

        # Get the category (declension) of the noun
        category = noun["declension"]

        return prompt, answer, category

    except KeyError as e:
        print(f"KeyError: {e}. This key is missing in one of the dataframes.")
        return None, None, None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None, None, None

In [261]:
import random


def generate_sentence(
    nouns, pronouns, verb_conjugation, adjectives, templates, food_combinations
):
    try:
        # Select random elements
        pronoun = pronouns.sample(n=1).squeeze()
        food = random.choice(list(food_combinations.keys()))
        food_items = food_combinations[food]
        verb_infinitive = random.choice(verb_conjugation["infinitive"].unique())
        adj1 = random.choice(adjectives)
        adj2 = random.choice(adjectives)

        # Select random items from food categories
        base = (
            random.choice(food_items["pagrindas"])
            if "pagrindas" in food_items
            else None
        )
        addition = (
            random.choice(food_items["priedai"]) if "priedai" in food_items else None
        )
        topping = None
        if "pagardai" in food_items:
            topping = random.choice(food_items["pagardai"])
        elif "prieskoniai" in food_items:
            topping = random.choice(food_items["prieskoniai"])
        elif "padažai" in food_items:
            topping = random.choice(food_items["padažai"])

        # Select a random template
        template = random.choice(templates)

        # Generate the prompt
        prompt = f"{pronoun['nominative']}, {verb_infinitive}, {food}"

        # Ensure subject-verb agreement
        verb_pres = verb_conjugation[
            (verb_conjugation["subject"] == pronoun["nominative"])
            & (verb_conjugation["tense"] == "present_tense")
            & (verb_conjugation["infinitive"] == verb_infinitive)
        ]["conjugation"].values[0]

        verb_past = verb_conjugation[
            (verb_conjugation["subject"] == pronoun["nominative"])
            & (verb_conjugation["tense"] == "past_tense")
            & (verb_conjugation["infinitive"] == verb_infinitive)
        ]["conjugation"].values[0]

        verb_future = verb_conjugation[
            (verb_conjugation["subject"] == pronoun["nominative"])
            & (verb_conjugation["tense"] == "future_tense")
            & (verb_conjugation["infinitive"] == verb_infinitive)
        ]["conjugation"].values[0]

        # Find the noun entry for the food
        noun = nouns[nouns["nominative"] == food]
        if noun.empty:
            raise KeyError(f"No noun entry found for {food}")
        noun = noun.iloc[0]

        # Determine the gender of the noun for adjective agreement
        noun_gender = "_f" if noun["gender"] == "feminine" else ""

        # Helper function to get the genitive form of a noun
        def get_genitive(noun_name):
            noun_entry = nouns[nouns['nominative'] == noun_name]
            if not noun_entry.empty:
                return noun_entry['genitive singular'].values[0]
            else:
                return noun_name  # Return the original name if not found

        # Generate the answer
        answer = template.format(
            pronoun_nom=pronoun['nominative'].title(),
            pronoun_dat=pronoun['dative'].title(),
            pronoun_gen=pronoun['genitive'].title(),
            pronoun_acc=pronoun['accusative'].title(),
            pronoun_ins=pronoun['instrumental'].title(),
            pronoun_loc=pronoun['locative'].title(),
            pronoun_pos=pronoun['possessive'].title(),
            noun_nom=noun['nominative'],
            noun_gen=noun['genitive singular'],
            noun_dat=noun['dative singular'],
            noun_acc=noun['accusative singular'],
            noun_ins=noun['instrumental singular'],
            noun_nom_pl=noun['plural'],
            noun_gen_pl=noun['genitive plural'],
            noun_dat_pl=noun['dative plural'],
            noun_acc_pl=noun['accusative plural'],
            noun_ins_pl=noun['instrumental plural'],
            verb_pres=verb_pres,
            verb_past=verb_past,
            verb_future=verb_future,
            adj1_nom=adj1[f'nominative{noun_gender}'],
            adj2_nom=adj2[f'nominative{noun_gender}'],
            adj1_gen=adj1[f'genitive{noun_gender}'],
            adj2_gen=adj2[f'genitive{noun_gender}'],
            adj1_dat=adj1[f'dative{noun_gender}'],
            adj2_dat=adj2[f'dative{noun_gender}'],
            adj1_acc=adj1[f'accusative{noun_gender}'],
            adj2_acc=adj2[f'accusative{noun_gender}'],
            adj1_ins=adj1[f'instrumental{noun_gender}'],
            adj2_ins=adj2[f'instrumental{noun_gender}'],
            adj1_loc=adj1[f'locative{noun_gender}'],
            adj2_loc=adj2[f'locative{noun_gender}'],
            food=food,
            base=base or '',
            addition=get_genitive(addition) if addition else '',
            topping=get_genitive(topping) if topping else ''
        )

        # Get the category (declension) of the noun
        category = noun['declension']
        
        return prompt, answer, category

    except KeyError as e:
        print(f"KeyError: {e}. This key is missing in one of the dataframes.")
        return None, None, None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None, None, None

In [262]:
templates = [
    "{pronoun_nom} {verb_pres} {noun_acc} su {addition}, bet be {topping}.",
    "{pronoun_dat} patinka {noun_nom} su {addition} ir {topping}.",
    "{pronoun_nom} mėgsta {noun_acc}, ypač kai {base} yra {adj1_nom}.",
    "{pronoun_nom} {verb_past} {noun_acc} su {addition} vakar.",
    "{pronoun_nom} niekada ne{verb_pres} {noun_gen} su {topping}.",
    "{pronoun_dat} atrodo, kad {noun_nom} su {addition} yra {adj1_nom}.",
    "{pronoun_nom} {verb_future} {noun_acc} su {base} ir {addition} rytoj.",
    "{pronoun_gen} nuomone, geriausia {noun_nom} visada turi {topping}.",
    "{pronoun_nom} mieliau {verb_pres} {noun_acc} su {addition} nei su {topping}.",
    "{pronoun_dat} patinka {adj1_nom} {noun_nom}, bet nepatinka su {topping}."
]

In [263]:
prompt, answer, category = generate_sentence(
    nouns, pronouns, verb_conjugation, adjectives_data, templates, food_combinations
)
print(f"Prompt: {prompt}")
print(f"Answer: {answer}")
print(f"Noun category: {category}")

Prompt: jie, prašyti, ledai
Answer: Jie niekada neprašo ledų su šokolado gabaliukai.
Noun category: I


In [269]:
for x in range(10):
    prompt, answer, category = generate_sentence(
        nouns, pronouns, verb_conjugation, adjectives_data, templates, food_combinations
    )
    print(f"Prompt: {prompt}")
    print(f"Answer: {answer}")
    print()

Prompt: jūs, būti, sumuštinis
Answer: Jūs niekada neesate sumuštinio su kečupas.

Prompt: jos, valgyti, ledai
Answer: Joms patinka ledai su  ir plakta grietinėlė.

Prompt: jis, mėgti, makaronai
Answer: Jam atrodo, kad makaronai su mėsos kukuliai yra šaltas.

Prompt: jūs, mėgti, tako
Answer: Jums patinka karštas tako, bet nepatinka su grietinės.

Prompt: jūs, mėgti, avižinė košė
Answer: Jūs mėgstate avižinę košę su uogos, bet be jogurtas.

Prompt: jūs, norėti, avižinė košė
Answer: Jūs norėsite avižinę košę su avižos ir obuolys rytoj.

Prompt: mes, prašyti, makaronai
Answer: Mums atrodo, kad makaronai su krevetės yra saldus.

Prompt: aš, matyti, tako
Answer: Man patinka saldus tako, bet nepatinka su grietinės.

Prompt: aš, matyti, ledai
Answer: Man patinka gardus ledai, bet nepatinka su riešutai.

Prompt: tu, norėti, pica
Answer: Tu mėgsta picą, ypač kai sūris yra saldi.



## new 

In [290]:
adjectives_data = [
    {
        'nominative': 'skanus',
        'genitive': 'skanaus',
        'dative': 'skaniam',
        'accusative': 'skanų',
        'instrumental': 'skaniu',
        'locative': 'skaniame',
        'nominative_f': 'skani',
        'genitive_f': 'skanios',
        'dative_f': 'skaniai',
        'accusative_f': 'skanią',
        'instrumental_f': 'skania',
        'locative_f': 'skanioje',
        'nominative_pl': 'skanūs',
        'nominative_f_pl': 'skanios',
        'meaning': 'tasty'
    },
    {
        'nominative': 'gardus',
        'genitive': 'gardaus',
        'dative': 'gardžiam',
        'accusative': 'gardų',
        'instrumental': 'gardžiu',
        'locative': 'gardžiame',
        'nominative_f': 'gardi',
        'genitive_f': 'gardžios',
        'dative_f': 'gardžiai',
        'accusative_f': 'gardžią',
        'instrumental_f': 'gardžia',
        'locative_f': 'gardžioje',
        'nominative_pl': 'gardūs',
        'nominative_f_pl': 'gardžios',
        'meaning': 'delicious'
    },
    {
        'nominative': 'šviežias',
        'genitive': 'šviežio',
        'dative': 'šviežiam',
        'accusative': 'šviežią',
        'instrumental': 'šviežiu',
        'locative': 'šviežiame',
        'nominative_f': 'šviežia',
        'genitive_f': 'šviežios',
        'dative_f': 'šviežiai',
        'accusative_f': 'šviežią',
        'instrumental_f': 'šviežia',
        'locative_f': 'šviežioje',
        'nominative_pl': 'švieži',
        'nominative_f_pl': 'šviežios',
        'meaning': 'fresh'
    },
    {
        'nominative': 'puikus',
        'genitive': 'puikaus',
        'dative': 'puikiam',
        'accusative': 'puikų',
        'instrumental': 'puikiu',
        'locative': 'puikiame',
        'nominative_f': 'puiki',
        'genitive_f': 'puikios',
        'dative_f': 'puikiai',
        'accusative_f': 'puikią',
        'instrumental_f': 'puikia',
        'locative_f': 'puikioje',
        'nominative_pl': 'puikūs',
        'nominative_f_pl': 'puikios',
        'meaning': 'excellent'
    },
    {
        'nominative': 'nuostabus',
        'genitive': 'nuostabaus',
        'dative': 'nuostabiam',
        'accusative': 'nuostabų',
        'instrumental': 'nuostabiu',
        'locative': 'nuostabiame',
        'nominative_f': 'nuostabi',
        'genitive_f': 'nuostabios',
        'dative_f': 'nuostabiai',
        'accusative_f': 'nuostabią',
        'instrumental_f': 'nuostabia',
        'locative_f': 'nuostabioje',
        'nominative_pl': 'nuostabūs',
        'nominative_f_pl': 'nuostabios',
        'meaning': 'wonderful'
    }
]

In [325]:
templates = [
    "{pronoun_nom} {verb_pres} {noun_acc} su {addition}.",
    "{pronoun_dat} patinka {noun_nom} su {addition}.",
    "{pronoun_nom} mėgsta {noun_acc}, ypač kai {base} yra {adj1}.",
    "{pronoun_nom} {verb_past} {noun_acc} su {addition} vakar.",
    "{pronoun_nom} niekada ne{verb_pres} {noun_gen} be {topping}.",
    "{pronoun_dat} atrodo, kad {noun_nom} su {addition} yra {adj1}.",
    "{pronoun_nom} {verb_future} {noun_acc} su {addition} rytoj.",
    "{pronoun_gen} nuomone, geriausi {noun_nom_pl} visada turi {topping}.",
    "{pronoun_nom} mieliau {verb_pres} {noun_acc} su {addition} nei be {topping}.",
    "{pronoun_dat} patinka {adj1} {noun_nom} su {addition}."
]

In [344]:
def generate_sentence(nouns, pronouns, verb_conjugation, adjectives, templates, food_combinations, verbose=False):
    try:
        # Select random elements
        pronoun = pronouns.sample(n=1).squeeze()
        food = random.choice(list(food_combinations.keys()))
        food_items = food_combinations[food]
        verb_infinitive = random.choice(verb_conjugation['infinitive'].unique())
        
        # Generate the prompt
        prompt = f"{pronoun['nominative']}, {verb_infinitive}, {food}"
        
        if verbose:
            print(f"Selected pronoun: {pronoun['nominative']}")
            print(f"Selected food: {food}")
            print(f"Selected verb: {verb_infinitive}")

        # Ensure subject-verb agreement
        verb_form = verb_conjugation[(verb_conjugation['subject'] == pronoun['nominative']) &
                                     (verb_conjugation['tense'] == random.choice(['present_tense', 'past_tense', 'future_tense'])) &
                                     (verb_conjugation['infinitive'] == verb_infinitive)]['conjugation'].squeeze()

        # Find the noun entry for the food
        noun = nouns[nouns['nominative'] == food].squeeze()
        if noun.empty:
            raise KeyError(f"No noun entry found for {food}")

        # Determine if the noun is plural
        is_plural = noun['countable'] == 'countable' and noun['nominative'] != noun['plural']

        # Helper function to get a suitable adjective
        def get_suitable_adjective(noun, is_plural):
            suitable_adjs = ['skanus', 'gardus', 'šviežias', 'puikus', 'nuostabus']
            adj = random.choice(suitable_adjs)
            adj_entry = adjectives[adjectives['nominative'] == adj].squeeze()
            
            if not adj_entry.empty:
                if noun['gender'] == 'feminine':
                    return adj_entry['nominative_f_pl'] if is_plural else adj_entry['nominative_f']
                else:
                    return adj_entry['nominative_pl'] if is_plural else adj_entry['nominative']
            return adj

        # Select random items from food categories
        base = random.choice(food_items['pagrindas']) if 'pagrindas' in food_items else None
        addition = random.choice(food_items['priedai']) if 'priedai' in food_items else None
        topping = None
        if 'pagardai' in food_items:
            topping = random.choice(food_items['pagardai'])
        elif 'prieskoniai' in food_items:
            topping = random.choice(food_items['prieskoniai'])
        elif 'padažai' in food_items:
            topping = random.choice(food_items['padažai'])

        # Select a random template
        template = random.choice(templates)

        # Generate the answer
        answer = template.format(
            pronoun_nom=pronoun['nominative'].capitalize(),
            pronoun_dat=pronoun['dative'].capitalize(),
            pronoun_gen=pronoun['genitive'].capitalize(),
            pronoun_acc=pronoun['accusative'].capitalize(),
            pronoun_ins=pronoun['instrumental'].capitalize(),
            pronoun_loc=pronoun['locative'].capitalize(),
            pronoun_pos=pronoun['possessive'].capitalize(),
            noun_nom=noun['plural'] if is_plural else noun['nominative'],
            noun_gen=noun['genitive plural'] if is_plural else noun['genitive singular'],
            noun_dat=noun['dative plural'] if is_plural else noun['dative singular'],
            noun_acc=noun['accusative plural'] if is_plural else noun['accusative singular'],
            noun_ins=noun['instrumental plural'] if is_plural else noun['instrumental singular'],
            noun_nom_pl=noun['plural'],
            noun_gen_pl=noun['genitive plural'],
            noun_dat_pl=noun['dative plural'],
            noun_acc_pl=noun['accusative plural'],
            noun_ins_pl=noun['instrumental plural'],
            verb=verb_form,
            adj1=get_suitable_adjective(noun, is_plural),
            adj2=get_suitable_adjective(noun, is_plural),
            food=noun['plural'] if is_plural else noun['nominative'],
            base=base or '',
            addition=noun['instrumental singular'] if addition else '',
            topping=noun['genitive singular'] if topping else ''
        )

        # Remove sentences with empty prepositional phrases
        if ' su ,' in answer or ' su .' in answer or ' be .' in answer or ' su  ' in answer:
            return None, None, None

        # Get the category (declension) of the noun
        category = noun['declension']
        
        if verbose:
            print(f"Generated sentence: {answer}")
        
        return prompt, answer, category

    except KeyError as e:
        print(f"KeyError: {e}. This key is missing in one of the dataframes.")
        return None, None, None
    except Exception as e:
        print(f"An error occurred: {e}")
        import traceback
        traceback.print_exc()
        return None, None, None

In [345]:
def generate_valid_sentence(nouns, pronouns, verb_conjugation, adjectives, templates, food_combinations, max_attempts=10, verbose=False):
    for attempt in range(max_attempts):
        if verbose:
            print(f"Attempt {attempt + 1}")
        prompt, answer, category = generate_sentence(nouns, pronouns, verb_conjugation, adjectives, templates, food_combinations, verbose)
        if prompt is not None and answer is not None:
            return prompt, answer, category
    print("Failed to generate a valid sentence after multiple attempts.")
    return None, None, None

# Usage example
for _ in range(10):
    prompt, answer, category = generate_valid_sentence(nouns, pronouns, verb_conjugation, adjectives, templates, food_combinations, verbose=True)
    if prompt is not None and answer is not None:
        print("Prompt:", prompt)
        print("Answer:", answer)
        print("Category:", category)
        print()
    else:
        print("Failed to generate a valid sentence.")
        print()

Attempt 1
Selected pronoun: jie
Selected food: salotos
Selected verb: prašyti
KeyError: 'nominative'. This key is missing in one of the dataframes.
Attempt 2
Selected pronoun: ji
Selected food: salotos
Selected verb: mėgti
KeyError: 'nominative'. This key is missing in one of the dataframes.
Attempt 3
Selected pronoun: jūs
Selected food: avižinė košė
Selected verb: mėgti
KeyError: 'nominative'. This key is missing in one of the dataframes.
Attempt 4
Selected pronoun: aš
Selected food: avižinė košė
Selected verb: mėgti
KeyError: 'nominative'. This key is missing in one of the dataframes.
Attempt 5
Selected pronoun: ji
Selected food: kepsnys su daržovėmis
Selected verb: norėti
KeyError: 'nominative'. This key is missing in one of the dataframes.
Attempt 6
Selected pronoun: jos
Selected food: sušiai
Selected verb: norėti
KeyError: 'nominative'. This key is missing in one of the dataframes.
Attempt 7
Selected pronoun: jis
Selected food: tako
Selected verb: norėti
KeyError: 'nominative'. Th

In [328]:
def generate_valid_sentence(nouns, pronouns, verb_conjugation, adjectives, templates, food_combinations, max_attempts=10):
    for _ in range(max_attempts):
        prompt, answer, category = generate_sentence(nouns, pronouns, verb_conjugation, adjectives, templates, food_combinations)
        if prompt is not None and answer is not None:
            return prompt, answer, category
    print("Failed to generate a valid sentence after multiple attempts.")
    return None, None, None

In [329]:
for _ in range(10):
    prompt, answer, category = generate_valid_sentence(nouns, pronouns, verb_conjugation, adjectives, templates, food_combinations)
    if prompt is not None and answer is not None:
        print("Prompt:")
        print(prompt)
        print("Answer:", answer)
        print("Category:", category)
        print()
    else:
        print("Failed to generate a valid sentence.")
        print()

An error occurred: string indices must be integers
An error occurred: string indices must be integers
An error occurred: string indices must be integers
An error occurred: string indices must be integers
An error occurred: string indices must be integers
An error occurred: string indices must be integers
An error occurred: string indices must be integers
An error occurred: string indices must be integers
An error occurred: string indices must be integers
An error occurred: string indices must be integers
Failed to generate a valid sentence after multiple attempts.
Failed to generate a valid sentence.

An error occurred: string indices must be integers
An error occurred: string indices must be integers
An error occurred: string indices must be integers
An error occurred: string indices must be integers
An error occurred: string indices must be integers
An error occurred: string indices must be integers
An error occurred: string indices must be integers
An error occurred: string indices 

In [346]:
food_combinations

{'pica': {'pagrindas': ['pomidorų padažas', 'sūris'],
  'priedai': ['grybai',
   'svogūnai',
   'dešra',
   'alyvuogės',
   'paprikos',
   'dešrelės',
   'kumpis',
   'ananasai',
   'ančiuviai'],
  'prieskoniai': ['raudonėlis',
   'bazilikas',
   'česnakas',
   'raudonieji pipirai']},
 'salotos': {'pagrindas': ['salotos lapai', 'įvairios žalumynų'],
  'priedai': ['pomidorai',
   'agurkai',
   'morkos',
   'svogūnai',
   'skrebučiai',
   'sūris',
   'alyvuogės',
   'riešutai',
   'avokadas'],
  'padažai': ['vinegretas',
   'majonezo padažas',
   'balzamiko padažas',
   'alyvuogių aliejus',
   'citrinų sultys']},
 'mėsainis': {'pagrindas': ['jautienos kotletas', 'bandelė'],
  'priedai': ['sūris',
   'salotos lapai',
   'pomidoras',
   'svogūnas',
   'marinuoti agurkėliai',
   'šoninė',
   'grybai',
   'avokadas'],
  'padažai': ['kečupas', 'garstyčios', 'majonezas', 'barbekiu padažas']},
 'makaronai': {'pagrindas': ['spagečiai', 'penne', 'fettuccine'],
  'padažai': ['pomidorų padažas', 'g