In [1]:
import random
import os
from datetime import datetime
from chunk import Chunk
from chunk.Visualize_Operations_Number import visualize_operations_number
from category_integration import Category_Integration 
from replace import Replace
from production import Production
from topsim import TopSim
from topsim.Normalized_TopSim import Normalized_TopSim
from topsim import Visualize_TopSim
from topsim.Ibuki_1_TopSim import Ibuki_1_TopSim
from expressivity import Expressivity
from expressivity.Visualize_Expressivity import visualize_expressivity
from expressivity.Visualize_TopSim_and_Expressivity import visualize_combined_metrics


class Agent:
    def __init__(self, name, learning_algorithms=None):
        self.name = name
        self.memory = []
        self.meanings_for_production = []
        self.production = []
        self.learning_algorithms = learning_algorithms if learning_algorithms else ['chunk', 'category_integration', 'replace']
        self.chunk_application_count = 0  # 各世代のチャンク適用回数をカウント
        self.category_integration_count = 0
        self.replace_application_count = 0
        self.label_conversion_history = {}  # カテゴリー統合の変換履歴を保持

    def learn_language(self, rule_set, iterations=3, batch_size=5):
        learned_rules = []
        chunk_application_count = 0  # 世代ごとのチャンク適用回数
        category_integration_count = 0
        replace_application_count = 0
        
        for i in range(0, len(rule_set), batch_size):
            batch = rule_set[i:i + batch_size]
            current_batch = learned_rules + batch

            for _ in range(iterations):
                for algorithm in self.learning_algorithms:
                    if algorithm == 'chunk':
                        current_batch, chunk_applications = Chunk.chunk_learning(current_batch)
                        chunk_application_count += chunk_applications
                    elif algorithm == 'category_integration':
                        # `label_conversion_history` を追加で渡す
                        current_batch, category_applications = Category_Integration.category_integration_learning(
                            current_batch, self.label_conversion_history
                        )
                        category_integration_count += category_applications
                    elif algorithm == 'replace':
                        current_batch, replace_applications = Replace.replace_learning(current_batch)
                        replace_application_count += replace_applications

            learned_rules = current_batch

        self.memory = learned_rules
        self.chunk_application_count = chunk_application_count  # 最終的な適用回数を保存
        self.category_integration_count = category_integration_count
        self.replace_application_count = replace_application_count

    def produce_language(self, rule_set, all_meanings, holistic_rule_invention_length, word_rule_invention_length, max_form_length, front_keep_length, shortening_interval, generation):
        # 全ての意味空間に基づいて発話を生成する
        self.production = Production.produce(
            rule_set, 
            all_meanings,  # ここでは意味空間全体に基づいて発話を行う
            holistic_rule_invention_length, 
            word_rule_invention_length,
            max_form_length, 
            front_keep_length,
            shortening_interval,
            generation
        )
        return self.production

def sample_meaning_space(file_path):
    """意味空間全体を読み込む"""
    with open(file_path) as f:
        s = f.read()
        meanings = s.split(",\n")
    return meanings

def simulate_language_evolution(
    n_gens, 
    n_samples, 
    initial_language_file, 
    semantic_space_file, 
    holistic_rule_invention_length, 
    word_rule_invention_length, 
    random_seed=1, 
    iterations=3, 
    batch_size=5,
    max_form_length=9,
    front_keep_length=3,
    shortening_interval=30
):
    # 乱数のシードを設定
    random.seed(random_seed)
    
    parent = Agent(name="Parent")
    child = Agent(name="Child", learning_algorithms=['chunk', 'category_integration', 'replace'])

    now = datetime.now()
    timestamp = now.strftime('%Y%m%d_%H%M%S')
    base_folder = f"out/exp{timestamp}"
    os.makedirs(base_folder, exist_ok=True)
    
    generation_folder = os.path.join(base_folder, "generations")
    os.makedirs(generation_folder, exist_ok=True)

    topsim_values = []
    normalized_topsim_values = []  # Normalized_TopSimのリスト
    ibuki_1_topsim_values = []
    expressivity_values = [0]
    chunk_application_counts = [0]  # 各世代のチャンク適用回数を保持するリスト
    category_integration_counts = [0]
    replace_application_counts = [0]
    knowledge_counts = [0]  # 各世代の言語知識の数を保持するリスト

    # 1世代目の親の発話に必要な意味空間全体を取得（初期言語ファイルから）
    all_meanings = sample_meaning_space(file_path=initial_language_file)
    
    
    parent.production = sample_meaning_space(initial_language_file)
    child_samples = parent.production
    
    
    # print(f"世代1の親の発話: {parent.production}")#------------------------1111
    with open(os.path.join(generation_folder, "gen-1.txt"), 'w') as f:
        f.write("\n".join(parent.production))

    # print(f"世代1の子の受け取った発話: {child_samples}")#------------------------1111
    child.learn_language(child_samples, iterations=iterations)
    
    # 各世代のチャンク適用回数をリストに追加
    chunk_application_counts.append(child.chunk_application_count)
    category_integration_counts.append(child.category_integration_count)
    replace_application_counts.append(child.replace_application_count)
    knowledge_counts.append(len(child.memory))  # 言語知識の数を記録
    # print(f"世代 1 の子の言語知識: {child.memory}") #------------------------1111

    # 全ての発話に対してTopSimを計算
    topsim_values.append(TopSim.TopSim(parent.production))
    normalized_topsim_values.append(Normalized_TopSim(parent.production))
    ibuki_1_topsim_values.append(Ibuki_1_TopSim(parent.production))  # Ibuki_1_TopSim値の追加


    # 2世代目以降の処理
    for generation in range(2, n_gens + 1):
        # 2世代目以降の意味空間を全て取得
        all_meanings = sample_meaning_space(file_path=semantic_space_file)

        # 親が意味空間全体に基づいて発話する
        parent.production = parent.produce_language(
            rule_set=child.memory,  # 子供の学習結果が親のルールセットになる
            all_meanings=all_meanings,
            holistic_rule_invention_length=holistic_rule_invention_length,
            word_rule_invention_length=word_rule_invention_length,
            max_form_length=max_form_length,
            front_keep_length=front_keep_length,
            shortening_interval=shortening_interval,
            generation=generation
        )
        with open(os.path.join(generation_folder, f"gen-{generation}.txt"), 'w') as f:
            f.write("\n".join(parent.production))
            
        # print(f"世代 {generation} の親の発話: {parent.production}") #------------------------1111


        # 子供に学習データを渡す（n_samplesだけランダムに選ぶ）
        child_samples = random.sample(parent.production, n_samples)
        # print(f"世代 {generation} の子の受け取った発話: {child_samples}")#------------------------1111
        child.learn_language(child_samples, iterations=iterations)
        # print(f"世代 {generation} の 子の言語知識: {child.memory}")#------------------------1111

        
        # 各世代のチャンク適用回数をリストに追加
        chunk_application_counts.append(child.chunk_application_count)
        category_integration_counts.append(child.category_integration_count)
        replace_application_counts.append(child.replace_application_count)
        knowledge_counts.append(len(child.memory))  # 言語知識の数を記録

        # 各世代のTopSim値を計算
        topsim_values.append(TopSim.TopSim(parent.production))
        normalized_topsim_values.append(Normalized_TopSim(parent.production))
        ibuki_1_topsim_values.append(Ibuki_1_TopSim(parent.production))  # Ibuki_1_TopSim値の追加


        # 各世代のExpressivity値を計算
        all_meanings = sample_meaning_space(semantic_space_file)
        expressivity_value = Expressivity.expressivity(child.memory, all_meanings)  # child.memoryをrule_setとして扱う
        expressivity_values.append(expressivity_value)

    
    # Combined TopSim and Expressivity の結果をプロット
    combined_plot_path = os.path.join(base_folder, "combined_transition.png")
    visualize_combined_metrics(topsim_values, normalized_topsim_values, ibuki_1_topsim_values, expressivity_values, n_gens, combined_plot_path)
    
    # 各アルゴリズムの適用回数の推移をプロット
    operations_plot_path = os.path.join(base_folder, "operations_count_transition.png")
    visualize_operations_number(chunk_application_counts, category_integration_counts, replace_application_counts, knowledge_counts, n_gens, operations_plot_path)



    # 必要なすべての変数を返す
    return parent.production, child_samples, child.memory, topsim_values, normalized_topsim_values, ibuki_1_topsim_values, expressivity_values, chunk_application_counts, category_integration_counts, replace_application_counts

# 実行部分 (__main__)
if __name__ == "__main__":
    
    # パラメータ
    N_GENS = 200  # 世代数
    N_SAMPLES = 24  # 子供が学習するデータ数
    INITIAL_LANGUAGE_FILE = "data/debug_48_1110.txt"  # 1世代目の初期言語ファイル
    SEMANTIC_SPACE_FILE = "data/48_Semantic_Space_0.txt"  # 2世代目以降の意味空間ファイル
    HOLISTIC_RULE_INVENTION_LENGTH = 6  # 文全体のルールの最大長さ
    WORD_RULE_INVENTION_LENGTH = 1  # 単語ルールの最大長さ
    ITERATIONS = 4  # 学習アルゴリズムの反復回数
    BATCH_SIZE = 12  # 何個ずつバッチに分けて学習するか
    MAX_FORM_LENGTH = 9  # 形式の最大長さ
    FRONT_KEEP_LENGTH = 6  # 前半に残す長さ
    SHORTENING_INTERVAL = 10
    SEED_MAX = 5  # シードの最大値（1～5）

    # SEED_RANGE の各シードで実験を実行
    for seed in range(1, SEED_MAX + 1):
        print(f"Experiment with seed: {seed}")  # シードの表示

        # シミュレーションの実行
        parent_production, child_samples, child_memory, topsim_values, normalized_topsim_values, ibuki_1_topsim_values, expressivity_values, chunk_application_counts, category_integration_counts, replace_application_counts = simulate_language_evolution(
            n_gens=N_GENS, 
            n_samples=N_SAMPLES, 
            initial_language_file=INITIAL_LANGUAGE_FILE, 
            semantic_space_file=SEMANTIC_SPACE_FILE,
            holistic_rule_invention_length=HOLISTIC_RULE_INVENTION_LENGTH,
            word_rule_invention_length=WORD_RULE_INVENTION_LENGTH,
            random_seed=seed,  # 各シードで実行
            iterations=ITERATIONS,
            batch_size=BATCH_SIZE,
            max_form_length=MAX_FORM_LENGTH,  # 形式の最大長さ
            front_keep_length=FRONT_KEEP_LENGTH,  # 前半に残す長さ
            shortening_interval=SHORTENING_INTERVAL
        )

        # 結果の表示
        print(f"最終世代の発話seed{seed}:", parent_production)
        print(f"最終世代の子供が受け取った発話seed{seed}:", child_samples)
        print(f"最終世代の子供の記憶seed{seed}:", child_memory)
        print(f"各世代のTopSim値seed{seed}:", topsim_values)
        print(f"各世代のNormalized_TopSim値seed{seed}:", normalized_topsim_values)
        print(f"各世代のIbuki_1_TopSim値seed{seed}:", ibuki_1_topsim_values)
        print(f"各世代のExpressivity値seed{seed}:", expressivity_values)
        print(f"各世代のチャンク適用回数seed{seed}:", chunk_application_counts)
        print(f"各世代のカテゴリ統合適用回数seed{seed}:", category_integration_counts)
        print(f"各世代のリプレース適用回数seed{seed}:", replace_application_counts)
        print("-" * 50)  # 区切りを追加

Experiment with seed: 1
word_rule_set []
rule_set ['S/_eat(_david,_bob)/0->quw', 'S/_eat(_david,_alice)/0->lcm', 'S/_get(_alice,_bob)/0->aha', 'S/_eat(_carol,_david)/0->jvu', 'S/_eat(_carol,_bob)/0->kng']
compare_form_pair_results [('1', ['quw'], ['lcm']), ('1', ['quw'], ['kng']), ('1', ['jvu'], ['kng'])]
filtered_indices_set []
can_chunk_rule_set []
split_sem_pairs []
index_sem_difference_sets []
transform_only_form_chunk_pair_sets []
split_form_pairs []
index_form_difference_sets []
used_labels set()
チャンクしたペア []
チャンク！ ['S/_eat(_david,_bob)/0->quw', 'S/_eat(_david,_alice)/0->lcm', 'S/_get(_alice,_bob)/0->aha', 'S/_eat(_carol,_david)/0->jvu', 'S/_eat(_carol,_bob)/0->kng']
word_rule_set []
rule_set ['S/_eat(_david,_bob)/0->quw', 'S/_eat(_david,_alice)/0->lcm', 'S/_get(_alice,_bob)/0->aha', 'S/_eat(_carol,_david)/0->jvu', 'S/_eat(_carol,_bob)/0->kng']
compare_form_pair_results [('1', ['quw'], ['lcm']), ('1', ['quw'], ['kng']), ('1', ['jvu'], ['kng'])]
filtered_indices_set []
can_chunk_ru

KeyboardInterrupt: 