In [2]:
import glob
import os 
import re

In [21]:
# Define the directory containing the scripts, adjust as necessary
folders_to_process = ['RandomForest/']  # Current directory; change this to the directory containing your scripts

# List of seeds to create new config files for
seeds = [34, 42, 777]

In [20]:
# Function to determine the appropriate config filename based on conditions
def generate_config_filename(original_name, seed):
    """
    Generate the new configuration filename based on specific naming criteria.
    """
    # Regex pattern handling "I:4,[5],12:i:-" and "Kentucky"
    if "I:4,[5],12:i:-" in original_name:
        return f"model_config_{seed}_f10_4512.yml"
    elif "Kentucky" in original_name:
        return f"model_config_{seed}_f10_kentucky.yml"
    else:
        return f"model_config_{seed}_f10.yml"
    
# Mapping of folders to patterns
folder_patterns = {
    "GBC/": "gbc_allele_*_FN.py",
    "SVM/": "svm_allele_*_FN.py",
    "RandomForest/": "RF_allele_*_FN.py",
    "LogitBoost/": "lb_allele_*_FN.py"
}

In [22]:
# Iterate over each folder and its specific pattern
for folder, pattern in folder_patterns.items():
    # Skip folders not in the explicitly specified list
    if folder not in folders_to_process:
        continue

    # Wildcard pattern to identify the required files in each folder
    script_pattern = os.path.join(folder, pattern)
    scripts = glob.glob(script_pattern)

    # Extract the model prefix (first part of the pattern)
    model_prefix = pattern.split('_')[0]

    # Loop through each file and create the new copies as needed
    for script in scripts:
        original_name = os.path.basename(script)
        base_name = re.sub(r"^.*_allele_", "", original_name).replace(".py", "")

        for seed in seeds:
            # Generate the new script name prefixed with the model name
            new_script_name = f"{model_prefix}_f10_{seed}_allele_{base_name}_FN.py"
            new_script_path = os.path.join(folder, new_script_name)

            # Read the original file content
            with open(script, 'r') as infile:
                content = infile.read()

            # Replace the config path according to the seed number
            new_config_path = generate_config_filename(base_name, seed)
            new_content = re.sub(
                r"config_path\s*=\s*'.+\.yml'",
                f"config_path = '/Drives/K/ake/python_ML/TW_SourceAttribution/ML_ake/FoodNet/{new_config_path}'",
                content
            )

            # Write the modified content into the new script file
            with open(new_script_path, 'w') as outfile:
                outfile.write(new_content)

            print(f"Created: {new_script_path}")

Created: RandomForest/RF_f10_34_allele_I:4,[5],12:i:-_FN_FN.py
Created: RandomForest/RF_f10_42_allele_I:4,[5],12:i:-_FN_FN.py
Created: RandomForest/RF_f10_777_allele_I:4,[5],12:i:-_FN_FN.py
Created: RandomForest/RF_f10_34_allele_Infantis_FN_FN.py
Created: RandomForest/RF_f10_42_allele_Infantis_FN_FN.py
Created: RandomForest/RF_f10_777_allele_Infantis_FN_FN.py
Created: RandomForest/RF_f10_34_allele_Typhimurium_FN_FN.py
Created: RandomForest/RF_f10_42_allele_Typhimurium_FN_FN.py
Created: RandomForest/RF_f10_777_allele_Typhimurium_FN_FN.py
Created: RandomForest/RF_f10_34_allele_Heidelberg_FN_FN.py
Created: RandomForest/RF_f10_42_allele_Heidelberg_FN_FN.py
Created: RandomForest/RF_f10_777_allele_Heidelberg_FN_FN.py
Created: RandomForest/RF_f10_34_allele_Enteritidis_FN_FN.py
Created: RandomForest/RF_f10_42_allele_Enteritidis_FN_FN.py
Created: RandomForest/RF_f10_777_allele_Enteritidis_FN_FN.py
Created: RandomForest/RF_f10_34_allele_Kentucky_FN_FN.py
Created: RandomForest/RF_f10_42_allele_Ke