In [1]:
import os
from pathlib import Path
import zipfile
import random

base_dir = Path("insurellm_knowledge_base")
zip_path = Path("insurellm_documents.zip")

# カテゴリごとのテンプレートとファイル数
categories = {
    "company": {
        "template": "InsureLLM株式会社は{year}年に設立され、{location}に本社を構えるAI企業です。",
        "count": 3
    },
    "products": {
        "template": "製品名：{product}\n特徴：{feature}",
        "count": 4
    },
    "customer": {
        "template": "事例：{client}社\n課題：{problem}\n成果：{result}",
        "count": 5
    },
    "news": {
        "template": "【{month}月号社内報】\nトピック：{topic}",
        "count": 3
    },
    "sales": {
        "template": "年度：{year}\n製品：{product}\n売上：{revenue}百万円\n顧客数：{clients}社",
        "count": 4
    },
    "contracts": {
        "template": "契約種別：{type}\n有効期間：{duration}年\n対象：{target}",
        "count": 3
    },
    "employees": {
        "template": "社員名：{name}\n所属：{department}\n役職：{role}",
        "count": 3
    },
    "analytics": {
        "template": "月：{month}\n製品：{product}\n売上：{revenue}百万円\n新規顧客：{new_clients}社",
        "count": 3
    },
    "research": {
        "template": "研究テーマ：{theme}\n開始時期：{quarter} {year}\n責任者：{lead}",
        "count": 3
    },
    "training": {
        "template": "研修名：{title}\n対象部門：{department}\n講師：{instructor}",
        "count": 3
    }
}

# ダミーデータ
products = ["InsureGPT API", "EduInsight", "ClaimVision"]
locations = ["東京都港区", "大阪市北区", "名古屋市中区"]
clients = ["SOMPO", "ベネッセ", "第一生命", "楽天損保"]
topics = ["GPT-4o-mini開発", "社内ハッカソン", "新卒採用"]
roles = ["エンジニア", "データサイエンティスト", "営業マネージャー"]
departments = ["技術部", "営業部", "人事部"]
themes = ["保険金請求の自動化", "教育AIの最適化", "LLMの多言語対応"]
instructors = ["CTO直伝", "外部講師", "AIチーム"]
months = ["1月", "2月", "3月", "4月", "5月"]
years = ["2022", "2023", "2024", "2025"]
quarters = ["Q1", "Q2", "Q3", "Q4"]

# フォルダとファイル生成
for category, config in categories.items():
    folder_path = base_dir / category
    folder_path.mkdir(parents=True, exist_ok=True)
    for i in range(1, config["count"] + 1):
        filename = f"{category}_{i}.txt"
        template = config["template"]
        content = template.format(
            year=random.choice(years),
            location=random.choice(locations),
            product=random.choice(products),
            feature=random.choice(["自然言語処理", "OCR分類", "学習分析"]),
            client=random.choice(clients),
            problem=random.choice(["処理遅延", "顧客満足度低下", "手作業の多さ"]),
            result=random.choice(["処理時間短縮", "満足度向上", "業務効率化"]),
            month=random.choice(months),
            topic=random.choice(topics),
            revenue=random.randint(100, 600),
            clients=random.randint(5, 25),
            type=random.choice(["NDA", "業務委託契約", "利用規約"]),
            duration=random.randint(1, 5),
            target=random.choice(["顧客", "パートナー企業", "社員"]),
            name=f"社員{i}",
            department=random.choice(departments),
            role=random.choice(roles),
            new_clients=random.randint(1, 5),
            theme=random.choice(themes),
            quarter=random.choice(quarters),
            lead=f"研究員{i}",
            title=f"AI研修{i}",
            instructor=random.choice(instructors)
        )
        with open(folder_path / filename, "w", encoding="utf-8") as f:
            f.write(content)

# zip化
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
    for file_path in base_dir.rglob("*"):
        zipf.write(file_path, arcname=file_path.relative_to(base_dir))

print(f"✅ 完了：{zip_path} に保存されました（{sum(c['count'] for c in categories.values())}ファイル生成）")

✅ 完了：insurellm_documents.zip に保存されました（34ファイル生成）
