In [2]:
import re

def remove_double_triple_commas(text):
    # Remove triple commas first
    text = re.sub(r',,,', ',', text)
    # Then remove double commas
    text = re.sub(r',,', ',', text)
    return text

In [3]:
import os
import glob
import pandas as pd

def process_csv_files(root_dirs):
    for root_dir in root_dirs:
        # Use glob to find all watermarked_text.csv files in the directory and its subdirectories
        pattern = os.path.join(root_dir, '**', 'watermarked_text.csv')
        csv_files = glob.glob(pattern, recursive=True)
        
        for csv_file in csv_files:
            try:
                # Load the CSV file into a pandas DataFrame
                df = pd.read_csv(csv_file)
                
                # Modify the text column by applying the function
                if 'text' in df.columns:
                    df['text'] = df['text'].apply(remove_double_triple_commas)
                
                # Create the new file name
                new_file_path = csv_file.replace('watermarked_text.csv', 'watermarked_text_without_commas.csv')
                
                # Save the modified DataFrame to a new CSV file
                df.to_csv(new_file_path, index=False)
                
                print(f"Processed and saved: {new_file_path}")
            except Exception as e:
                print(f"Failed to process {csv_file}: {e}")

# Define the root directories to search
root_directories = ['inputs/prompt_based_saves', 'inputs/c4_saves']

# Process the CSV files
process_csv_files(root_directories)


Processed and saved: inputs/prompt_based_saves/prompt_1_temp_100_divp_15_attempt_1/watermarked_text_without_commas.csv
Processed and saved: inputs/prompt_based_saves/prompt_1_temp_100_divp_15_attempt_2/watermarked_text_without_commas.csv
Processed and saved: inputs/prompt_based_saves/prompt_1_temp_100_divp_15_attempt_3/watermarked_text_without_commas.csv
Processed and saved: inputs/prompt_based_saves/prompt_1_temp_100_divp_20_attempt_1/watermarked_text_without_commas.csv
Processed and saved: inputs/prompt_based_saves/prompt_1_temp_100_divp_20_attempt_2/watermarked_text_without_commas.csv
Processed and saved: inputs/prompt_based_saves/prompt_1_temp_100_divp_20_attempt_3/watermarked_text_without_commas.csv
Processed and saved: inputs/prompt_based_saves/prompt_1_temp_100_divp_5_attempt_1/watermarked_text_without_commas.csv
Processed and saved: inputs/prompt_based_saves/prompt_1_temp_100_divp_5_attempt_2/watermarked_text_without_commas.csv
Processed and saved: inputs/prompt_based_saves/pro