In [7]:
import uuid
import random
import pandas as pd

LEAF_TYPES = [
    "Simple",
    "Pinnately compound (single)",
    "Pinnately compound (double)",
    "Pinnately compound (triple)",
    "Palmately compound",
]

FRUIT_TYPES = [
    "Drupe",
    "Capsule",
    "Follicle",
    "Pod",
]

ETYM_TEMPLATES = [
    "This name came from the local roots",
    "This name came from foreign",
    "This name came from some tranditional aspects",
]

HABITAT_TEMPLATES = [
    "Mostly planted and sometimes naturalised on dry hills ",
    "Mostly planted and sometimes  limestone ridges",
    "A lowland tree of hot, dry coastal plains and rocky slopes",
]

PHENO_TEMPLATES = [
    "June to August",
    "Dry seasonal Times",
    "Wet seasonal Times",
]

IDENT_TEMPLATES = [
    "A medium–large tree",
    "A tall, deciduous tree;  trunk often buttressed",
    "A large, straight, deciduous tree",
]

GERM_TEMPLATES = [
    "Seed germination happens in 1 to 2 weeks in the moist soil",
    "Need of little bit water and shade",
    "To improve the germination possiblities soak the seed in the water overnight",
]

PEST_TEMPLATES = [
    "Aphids and leaf miners can be seen on the leaves",
    "Some caterillars can be seen.",
    "Termites are the main stem problem in teak.",
]

def pick_or_empty(templates, empty_prob=0.2):
    return "" if random.random() < empty_prob else random.choice(templates)

def generate_record(sr_no, language="en"):
    return {
        "id": str(uuid.uuid4()),
        "Sr No": sr_no,
        "language": language,
        "Scientific name": f"Synthetica plantensis {sr_no}",
        "Etymology": pick_or_empty(ETYM_TEMPLATES),
        "Common name": f"Synthetic Plant {sr_no}",
        "Habitat": pick_or_empty(HABITAT_TEMPLATES),
        "Phenology": pick_or_empty(PHENO_TEMPLATES),
        "Identification Characters": pick_or_empty(IDENT_TEMPLATES),
        "Leaf type": random.choice(LEAF_TYPES),
        "Fruit type": random.choice(FRUIT_TYPES),
        "Seed Germination": pick_or_empty(GERM_TEMPLATES),
        "Pest": pick_or_empty(PEST_TEMPLATES),
        "Image": f"https://example.com/species/images/{sr_no}.jpg",
        "Video": "[]"
    }

def generate_synthetic_excel(n_rows=20, language="en", output_file="synthetic_species.xlsx"):
    records = []
    for i in range(1, n_rows + 1):
        records.append(generate_record(i, language))

    df = pd.DataFrame(records)

    column_order = [
        "id",
        "Sr No",
        "language",
        "Scientific name",
        "Etymology",
        "Common name",
        "Habitat",
        "Phenology",
        "Identification Characters",
        "Leaf type",
        "Fruit type",
        "Seed Germination",
        "Pest",
        "Image",
        "Video",
    ]

    df = df[column_order]
    df.to_excel(output_file, index=False)
    print("Synthetic Excel file generated English Version:", output_file)

if __name__ == "__main__":
    generate_synthetic_excel(n_rows=20, language="en", output_file="synthetic_species_en.xlsx")


Synthetic Excel file generated English Version: synthetic_species_en.xlsx


In [9]:
import uuid
import random
import pandas as pd

LEAF_TYPES = [
    "Simple",
    "Pinnately compound (single)",
    "Pinnately compound (double)",
    "Pinnately compound (triple)",
    "Palmately compound",
]

FRUIT_TYPES = [
    "Drupe",
    "Capsule",
    "Follicle",
    "Pod",
]

ETYM_TEMPLATES_TET = [
    "Naran ne'e mai hosi abut lokál",
    "Naran ne'e mai hosi estranjeiru",
    "Naran ne'e mai hosi aspetu tradisionál balun",
]

HABITAT_TEMPLATES_TET = [
    "Barak liu kuda no dalaruma naturaliza iha foho maran sira ",
    "Barak liu kuda no dalaruma foho-lolon kalkáriu sira",
    "Ai-hun rai-tetuk ida ho rai-tetuk tasi-ibun ne'ebé manas no maran no foho-lolon sira ho fatuk",
]

PHENO_TEMPLATES_TET = [
    "Juñu to'o Agostu",
    "Tempu sazonál maran",
    "Tempu sazonál sira ne'ebé udan",
]

IDENT_TEMPLATES_TET = [
    "Ai-hun ida ne'ebé boot-médiu",
    "Ai-hun ida ne'ebé aas no monu; nia tahan dala barak iha apoiu",
    "Ai-hun ida ne'ebé boot, loos, monu",
]

GERM_TEMPLATES_TET = [
    "Fini nia jerminasaun akontese iha semana 1 to'o 2 nia laran iha rai ne'ebé úmidu",
    "Presiza bee uitoan no mahon",
    "Atu hadi'a posibilidade jerminasaun nian tau fini iha bee laran durante kalan tomak",
]

PEST_TEMPLATES_TET = [  
    "Áfidu sira no mineiru sira bele haree iha ai-funan sira",
    "Bele haree lalar balu.",
    "Térmite sira maka problema prinsipál ba ai-teka nia tahan.",
]

def pick_or_empty(values, empty_prob=0.2):
    return "" if random.random() < empty_prob else random.choice(values)

def generate_record_tetum(sr_no):
    return {
        "id": str(uuid.uuid4()),
        "Sr No": sr_no,
        "language": "tet",
        "Scientific name": f"Synthetica plantensis {sr_no}",
        "Etymology": pick_or_empty(ETYM_TEMPLATES_TET),
        "Common name": f"Planta Sintétika {sr_no}",
        "Habitat": pick_or_empty(HABITAT_TEMPLATES_TET),
        "Phenology": pick_or_empty(PHENO_TEMPLATES_TET),
        "Identification Characters": pick_or_empty(IDENT_TEMPLATES_TET),
        "Leaf type": random.choice(LEAF_TYPES),
        "Fruit type": random.choice(FRUIT_TYPES),
        "Seed Germination": pick_or_empty(GERM_TEMPLATES_TET),
        "Pest": pick_or_empty(PEST_TEMPLATES_TET),
        "Image": f"https://example.com/species/images/{sr_no}.jpg",
        "Video": "[]",
    }

def generate_synthetic_excel_tetum(n_rows=20, output_file="synthetic_species_tet.xlsx"):
    records = [generate_record_tetum(i) for i in range(1, n_rows + 1)]
    df = pd.DataFrame(records)
    df = df[
        [
            "id",
            "Sr No",
            "language",
            "Scientific name",
            "Etymology",
            "Common name",
            "Habitat",
            "Phenology",
            "Identification Characters",
            "Leaf type",
            "Fruit type",
            "Seed Germination",
            "Pest",
            "Image",
            "Video",
        ]
    ]
    df.to_excel(output_file, index=False)

if __name__ == "__main__":
    generate_synthetic_excel_tetum()

print("Synthetic Excel file generated Tetum Version ")


Synthetic Excel file generated Tetum Version 
