In [1]:
import re
import math
import time
import requests
import datetime
import urllib.parse

from datetime import date
from pandas import DataFrame 
from pandas import read_csv
from pandas import concat
from pandas import merge
from tqdm.auto import tqdm

In [2]:
def generate_all_worldforms(lemmas: str):
    """
    Uses web service to generate all wordforms from the list of lemmas separated by spaces

    Returns a two column table with columns wordform and lemma.
    The number of rows corresponding to a single lemma varies as duplicated wordforms are omitted.
    """
    assert lemmas.find('+') == -1, 'Input cannot contain + sign. It corrupts the output'

    GENERATOR_QUERY = "https://smart-search.tartunlp.ai/api/generator/process"
    HEADERS = {"Content-Type": "application/json; charset=utf-8"}
    POST_DATA_TEMPLATE = {'type': 'text', 'content': lemmas}

    response = requests.post(GENERATOR_QUERY, json=POST_DATA_TEMPLATE, headers=HEADERS)
    assert response.ok, "Webservice failed"
    response = response.json()
    assert response['response']['type'] == 'texts', "Unexpected response type"

    token_count = len(response['response']['texts'])
    tbl = DataFrame({'lemma': [None] * token_count, 'wordform': [None] * token_count})
    for i, token in enumerate(response['response']['texts']):
        generated_ = token['features']
        tbl.loc[i, 'lemma'] = token['content']
        tbl.loc[i, 'wordform'] = list(set(map(lambda x: x['token'].replace('+', ''), token['features']['generated_forms'])))

    return tbl.explode('wordform').reset_index(drop=True)

display(generate_all_worldforms('Tere').head())

Unnamed: 0,lemma,wordform
0,Tere,tered
1,Tere,teredena
2,Tere,teredes
3,Tere,teredest
4,Tere,terena
