In [3]:
import os
import chardet

def detect_encoding(file_path):
    with open(file_path, 'rb') as f:
        raw_data = f.read()
    result = chardet.detect(raw_data)
    return result['encoding']

def convert_encoding(input_folder, output_folder, target_encoding='utf-8'):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for filename in os.listdir(input_folder):
        # Skip hidden files such as .DS_Store
        if filename.startswith('.'):
            continue
        
        file_path = os.path.join(input_folder, filename)

        if os.path.isfile(file_path):
            # Detect original encoding
            original_encoding = detect_encoding(file_path)
            print(f"Converting {filename}: Detected {original_encoding}")

            # Read file with the detected encoding
            with open(file_path, 'r', encoding=original_encoding, errors='ignore') as f:
                content = f.read()

            # Create a subfolder for this file inside the output folder
            file_base_name = os.path.splitext(filename)[0]
            file_subfolder = os.path.join(output_folder, file_base_name)
            if not os.path.exists(file_subfolder):
                os.makedirs(file_subfolder)
            
            # Save file with new encoding inside the new subfolder
            output_file_path = os.path.join(file_subfolder, filename)
            with open(output_file_path, 'w', encoding=target_encoding) as f:
                f.write(content)
            print(f"File {filename} converted to {target_encoding} and saved at {output_file_path}")

# Example usage:
input_folder = r'/Users/jean.barkoczy/Documents/BaseCompleta'
output_folder = r'/Users/jean.barkoczy/Documents/BaseCompleta/Trata'
target_encoding = 'utf-8'

convert_encoding(input_folder, output_folder, target_encoding)




Converting Ses_seg_prov_det.csv: Detected ascii
File Ses_seg_prov_det.csv converted to utf-8 and saved at /Users/jean.barkoczy/Documents/BaseCompleta/Trata/Ses_seg_prov_det/Ses_seg_prov_det.csv
Converting SES_Balanco.csv: Detected ascii
File SES_Balanco.csv converted to utf-8 and saved at /Users/jean.barkoczy/Documents/BaseCompleta/Trata/SES_Balanco/SES_Balanco.csv
Converting ses_valoresresmovgrupos.csv: Detected ascii
File ses_valoresresmovgrupos.csv converted to utf-8 and saved at /Users/jean.barkoczy/Documents/BaseCompleta/Trata/ses_valoresresmovgrupos/ses_valoresresmovgrupos.csv
Converting ses_provramos.csv: Detected ascii
File ses_provramos.csv converted to utf-8 and saved at /Users/jean.barkoczy/Documents/BaseCompleta/Trata/ses_provramos/ses_provramos.csv
Converting ses_pgbl_resgates.csv: Detected ascii
File ses_pgbl_resgates.csv converted to utf-8 and saved at /Users/jean.barkoczy/Documents/BaseCompleta/Trata/ses_pgbl_resgates/ses_pgbl_resgates.csv
Converting ses_prev_cap_uf.csv