# Data Analysis 

# Data Partitionning 

In [1]:
import os
import shutil
import re
import random

def partition_data(kbpath, num_parts=2, shuffle=True):
    """
    Partitionne exs.pl en fichiers balanced, avec POS avant NEG dans chaque partition.
    """
    ex_file = os.path.join(kbpath, "exs.pl")
    bk_file = os.path.join(kbpath, "bk.pl")
    bias_file = os.path.join(kbpath, "bias.pl")

    # --- Lire tous les exemples POS / NEG ---
    with open(ex_file, "r") as f:
        lines = f.readlines()

    pos = [l for l in lines if l.strip().startswith("pos")]
    neg = [l for l in lines if l.strip().startswith("neg")]

    # --- Optionnel : mÃ©langer lâ€™ordre des exemples ---
    if shuffle:
        random.shuffle(pos)
        random.shuffle(neg)

    # --- Split Ã©quilibrÃ© ---
    pos_splits = [pos[i::num_parts] for i in range(num_parts)]
    neg_splits = [neg[i::num_parts] for i in range(num_parts)]

    # --- CrÃ©er les dossiers partitions ---
    new_dirs = []
    for i in range(num_parts):
        part_dir = f"{kbpath}_part{i+1}"
        os.makedirs(part_dir, exist_ok=True)

        # Copy bk.pl & bias.pl
        shutil.copy(bk_file, os.path.join(part_dir, "bk.pl"))
        shutil.copy(bias_file, os.path.join(part_dir, "bias.pl"))

        # Write balanced exs.pl (POS first then NEG)
        out_file = os.path.join(part_dir, "exs.pl")
        with open(out_file, "w") as f:
            # always POS first, then NEG
            for l in pos_splits[i]:
                f.write(l)
            for l in neg_splits[i]:
                f.write(l)

        new_dirs.append(part_dir)

        print(f"ðŸ“‚ Created {part_dir}: {len(pos_splits[i])} POS, {len(neg_splits[i])} NEG")

    return new_dirs


In [5]:
partition_data("zendo1", num_parts=3)

ðŸ“‚ Created zendo1_part1: 7 POS, 7 NEG
ðŸ“‚ Created zendo1_part2: 7 POS, 7 NEG
ðŸ“‚ Created zendo1_part3: 6 POS, 6 NEG


['zendo1_part1', 'zendo1_part2', 'zendo1_part3']