In [23]:
import json
from datetime import datetime, timedelta
import random

def generate_unique_name(used_names, org_type):
    """Generate a unique organization name based on type."""
    while True:
        name = None
        if org_type == "UNIVERSITY":
            prefixes = ["", "State", "Technical", "International", "National"]
            locations = ["North", "South", "East", "West", "Central", ""]
            suffixes = ["University", "College", "Institute of Technology"]
            name = f"{random.choice(prefixes)} {random.choice(locations)}".strip() + f" {random.choice(suffixes)}"
        
        elif org_type == "RESEARCH_INSTITUTE":
            domains = ["Biomedical", "Quantum", "Neural", "Climate", "Space", "Ocean", "Materials", "AI", "Robotics"]
            suffixes = ["Research Institute", "Research Center", "Institute for Advanced Studies"]
            name = f"{random.choice(domains)} {random.choice(suffixes)}"
        
        elif org_type == "PUBLIC_SECTOR":
            domains = ["Environmental", "Health", "Education", "Transport", "Energy", "Defense", "Agriculture"]
            suffixes = ["Department", "Agency", "Authority", "Commission", "Bureau"]
            name = f"National {random.choice(domains)} {random.choice(suffixes)}"
        
        elif org_type == "NON_PROFIT_ORGANIZATION":
            causes = ["Global Health", "Education", "Climate Action", "Wildlife", "Humanitarian", "Children's"]
            suffixes = ["Foundation", "Trust", "Alliance", "Initiative", "Association"]
            name = f"{random.choice(causes)} {random.choice(suffixes)}"
        
        elif org_type == "CORPORATION":
            domains = ["Tech", "Bio", "Med", "Data", "AI", "Quantum", "Green", "Digital"]
            suffixes = ["Solutions", "Systems", "Technologies", "Industries", "Corporation"]
            name = f"{random.choice(domains)}{random.choice(suffixes)}"
        
        elif org_type == "SCHOOL":
            types = ["Elementary", "Middle", "High", "International", "Preparatory"]
            suffixes = ["School", "Academy"]
            name = f"{random.choice(types)} {random.choice(suffixes)}"
        
        elif org_type == "FREELANCER":
            domains = ["Software", "Design", "Content", "Marketing", "Research", "Consulting"]
            names = ["Solutions", "Services", "Consulting", "Studio"]
            name = f"Independent {random.choice(domains)} {random.choice(names)}"
        
        else:  # OTHER
            domains = ["General", "Specialized", "Professional", "Advanced"]
            suffixes = ["Services", "Solutions", "Group", "Organization"]
            name = f"{random.choice(domains)} {random.choice(suffixes)}"

        # Sometimes add a numeric identifier if name is taken
        if name in used_names:
            name = f"{name} {random.randint(1, 999)}"
            
        if name not in used_names:
            used_names.add(name)
            return name

def generate_org_data(count=500):
    org_types = [
        "UNIVERSITY", "RESEARCH_INSTITUTE", "PUBLIC_SECTOR",
        "NON_PROFIT_ORGANIZATION", "CORPORATION", "SCHOOL",
        "FREELANCER", "OTHER"
    ]
    
    base_time = datetime(2020, 1, 1)
    organizations = []
    used_names = set()

    for i in range(count):
        # Generate timestamps as before...
        created_at = base_time + timedelta(
            days=random.randint(0, 1000),
            hours=random.randint(0, 23),
            minutes=random.randint(0, 59)
        )
        
        updated_at = created_at + timedelta(
            days=random.randint(0, 365),
            hours=random.randint(0, 23),
            minutes=random.randint(0, 59)
        )

        # First select type, then generate appropriate name
        org_type = random.choice(org_types)
        name = generate_unique_name(used_names, org_type)
        
        # Generate aliases
        aliases = None
        if random.random() < 0.6:
            alias_count = random.randint(1, 3)
            possible_aliases = [
                name.upper(),
                name.replace(' ', '_'),
                ''.join(word[0] for word in name.split()).upper()  # Better acronym generation
            ]
            aliases = json.dumps(possible_aliases[:alias_count])

        org = {
            "name": name,
            "type": org_type,
            "description": f"Leading {org_type.lower().replace('_', ' ')} "
                         f"focused on excellence and innovation in the field.",
            "created_at": created_at.isoformat(),
            "updated_at": updated_at.isoformat(),
            "aliases": aliases
        }
        organizations.append(org)

    return organizations

# Generate the data
orgs = generate_org_data(500)


# Create SQL insert statements
sql_statements = []
for org in orgs:
    values = [
        f"""'{org['name'].replace("'", "''")}'""",
        f"""'{org['type']}'""",
        f"""'{org['description'].replace("'", "''")}'""",
        f"""'{org['created_at']}'""",
        f"""'{org['updated_at']}'""",
        'NULL' if org['aliases'] is None else f"""'{org['aliases'].replace("'", "''")}'""",
    ]
    sql = f"""INSERT INTO "public"."Organization" (name, type, description, created_at, updated_at, aliases) VALUES ({', '.join(values)});"""
    sql_statements.append(sql)

# Print a few example statements
print("-- Example INSERT statements (showing first 5):")
print("\n".join(sql_statements[:5]))
print(f"\n-- Total statements generated: {len(sql_statements)}")

# Verify uniqueness
names = [org['name'] for org in orgs]
unique_names = set(names)
print(f"\n-- Unique names verification:")
print(f"Total names: {len(names)}")
print(f"Unique names: {len(unique_names)}")
assert len(names) == len(unique_names), "Duplicate names found!"

-- Example INSERT statements (showing first 5):
INSERT INTO "public"."Organization" (name, type, description, created_at, updated_at, aliases) VALUES ('Climate Action Alliance', 'NON_PROFIT_ORGANIZATION', 'Leading non profit organization focused on excellence and innovation in the field.', '2020-11-07T09:01:00', '2021-05-02T22:29:00', NULL);
INSERT INTO "public"."Organization" (name, type, description, created_at, updated_at, aliases) VALUES ('AIIndustries', 'CORPORATION', 'Leading corporation focused on excellence and innovation in the field.', '2022-03-26T14:05:00', '2022-07-08T02:12:00', '["AIINDUSTRIES"]');
INSERT INTO "public"."Organization" (name, type, description, created_at, updated_at, aliases) VALUES ('Climate Action Initiative', 'NON_PROFIT_ORGANIZATION', 'Leading non profit organization focused on excellence and innovation in the field.', '2020-01-10T23:53:00', '2020-04-30T00:31:00', NULL);
INSERT INTO "public"."Organization" (name, type, description, created_at, updated_a

In [24]:
# Write SQL statements to file
with open('organization_inserts.sql', 'w', encoding='utf-8') as f:
    # Add a header comment with timestamp
    from datetime import datetime
    f.write(f"-- Organization INSERT statements generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
    
    # Write each statement
    for sql in sql_statements:
        f.write(sql + '\n')
    
    # Add a final comment with count
    f.write(f"\n-- Total INSERT statements: {len(sql_statements)}")

print(f"Successfully wrote {len(sql_statements)} INSERT statements to 'organization_inserts.sql'")

Successfully wrote 500 INSERT statements to 'organization_inserts.sql'
