In [1]:
import os
import json
import pandas as pd
from tqdm import tqdm

In [2]:
dir = [
    r'F:\Datasets\Other\02.라벨링메타데이터',
    r'F:\Datasets\Training\Source',
    r'F:\Datasets\Training\Label',
    r'F:\Datasets\Validation\Source',
    r'F:\Datasets\Validation\Label',
    r'F:\Datasets\Other\01.원천메타데이터', 
    ]

output_name = [
    'raw_meta_label.csv',
    'train_source.csv',
    'train_label.csv',
    'validation_source.csv',
    'validation_label.csv',
    'raw_meta.csv',
]

In [3]:
def merge_file(folder_path, output_file):
    '''
    Merges all files in a folder and its subfolders into a single CSV file.

    Parameters:
        folder_path (str) : Path to the folder containig JSON files.
        output_file (str) : Path to the output CSV file.
    
    Returns:
        None
    '''
    merged_data = [] # For JSON data
    merged_df = pd.DataFrame()  # For CSV data

    # Collect all files to process
    all_files = []
    for root, _, files in os.walk(folder_path):
        for file in files:
            if file.endswith(('.json', '.csv')):  # Target only JSON files
                all_files.append(os.path.join(root, file))
    print(f'Total number of files in {folder_path}: {len(all_files)}')

    # Recursively walk through all subfolders and files
    # Use tqdm to show progress
    for file_path in tqdm(all_files, desc="Processing files"):
        # Load and merge JSON data
        if file_path.endswith('.json'):
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f) # Load Data
                if isinstance(data, dict):
                    merged_data.append(data)
                elif isinstance(data, list):
                    merged_data.extend(data)

        # Load and merge CSV data
        elif file_path.endswith('.csv'):
            temp_df = pd.read_csv(file_path) # Load Data
            merged_df = pd.concat([merged_df, temp_df], ignore_index=True)
               
    
    # Convert merged data to DataFrame and save to CSV
    if merged_data:
        json_df = pd.DataFrame(merged_data)
        json_df.to_csv(output_file, index=False, encoding='utf-8-sig')
        print(f"JSON files merged and saved to {output_file}")
    else:
        print("No JSON data found to merge.")

    if not merged_df.empty:
        merged_df.to_csv(output_file, index=False, encoding='utf-8-sig')
        print(f"CSV files merged and saved to {output_file}")
    else:
        print("No CSV data found to merge.")

In [6]:
pd.DataFrame().to_csv('test.csv')

In [None]:
for root_dir, output in zip(dir, output_name):
    merge_file(folder_path=root_dir, output_file=output)

In [None]:
def merge_and_flatten_json(folder_path, output_file):
    """
    Merges and flattens all JSON files in a folder into a single CSV file, adding filenames as a column.

    Parameters:
        folder_path (str): Path to the folder containing JSON files.
        output_file (str): Path to the output CSV file.

    Returns:
        None
    """
    merged_data = []

    # Recursively find all JSON files
    for root, _, files in os.walk(folder_path):
        for file in files:
            if file.endswith('.json'):
                file_path = os.path.join(root, file)
                try:
                    # Load JSON data
                    with open(file_path, 'r', encoding='utf-8') as f:
                        data = json.load(f)
                    
                    # Normalize JSON structure
                    if isinstance(data, list):
                        for record in data:
                            # Flatten nested dictionary (e.g., Weather_Info)
                            flattened = pd.json_normalize(record)
                            flattened['Source_File'] = file  # Add source file name
                            merged_data.append(flattened)
                    elif isinstance(data, dict):
                        flattened = pd.json_normalize(data)
                        flattened['Source_File'] = file  # Add source file name
                        merged_data.append(flattened)

                except Exception as e:
                    print(f"Error processing {file_path}: {e}")

    # Convert merged data to a DataFrame
    if merged_data:
        merged_df = pd.concat(merged_data, ignore_index=True)
        # Save to CSV
        merged_df.to_csv(output_file, index=False, encoding='utf-8-sig')
        print(f"Merged and flattened JSON data saved to {output_file}")
    else:
        print("No JSON files found to merge.")

# Example usage
folder_path = "path_to/02.라벨링메타데이터"  # Update with your folder path
output_file = "raw_meta_label.csv"  # Desired output file
merge_and_flatten_json(folder_path, output_file)