# Notebook for tests and experiments

In [None]:
import pandas as pd
import numpy as np
import re
import pytz
import os
from pathlib import Path

import os
import csv
from collections import defaultdict

def get_csv_headers(directory):
    """
    Recursively scans a directory and its subdirectories for CSV files and extracts column headers.

    Args:
        directory (str): Path to the directory containing CSV files.

    Returns:
        list: A list of tuples where each tuple contains the file path and a single column header.
    """
    headers = []

    # Walk through the directory and subdirectories
    for root, _, files in os.walk(directory):
        for filename in files:
            if filename.endswith(".csv"):
                file_path = os.path.join(root, filename)
                try:
                    with open(file_path, mode="r", encoding="utf-8") as file:
                        reader = csv.reader(file, delimiter=';')
                        # Read the first row as headers
                        columns = next(reader)
                        for col in columns:
                            headers.append((file_path, col))
                except Exception as e:
                    print(f"Error reading {file_path}: {e}")

    return headers

def save_headers_to_csv(headers, output_file):
    """
    Appends the headers to a CSV file. If the file does not exist, it creates it.

    Args:
        headers (list): List of tuples with file paths and column headers.
        output_file (str): Path to the output CSV file.
    """
    file_exists = os.path.isfile(output_file)
    try:
        with open(output_file, mode="a", encoding="utf-8", newline='') as file:
            writer = csv.writer(file)
            if not file_exists:
                writer.writerow(["File Path", "Column Header"])
            writer.writerows(headers)
        print(f"Headers saved to {output_file}")
    except Exception as e:
        print(f"Error saving to {output_file}: {e}")

def main():
    directory = input("Enter the directory path containing CSV files: ")
    output_file = input("Enter the output file path (e.g., headers_output.csv): ")

    if not os.path.isdir(directory):
        print("Invalid directory path.")
        return

    headers = get_csv_headers(directory)
    save_headers_to_csv(headers, output_file)

if __name__ == "__main__":
    main()

