# HILDA Data Pre-processing

### Convert SAS files to csv format

In [1]:
import pandas as pd
from pathlib import Path

In [2]:
def convert_sas_to_csv(input_folder, output_folder):
    """
    Reads all .sas7bdat files from a folder and saves them in .csv format
    
    Parameters:
    -----------
    input_folder : str
        Path to the folder containing .sas7bdat files
    output_folder : str 
        Folder where the .csv files will be saved
    """
    folder_path = Path(input_folder)
    output_path = Path(output_folder)
    
    # Iterate over all .sas7bdat files
    for sas_file in folder_path.glob("*.sas7bdat"):
        try:
            print(f"Reading: {sas_file.name}")
            df = pd.read_sas(sas_file, format='sas7bdat')
            
            csv_file = output_path / f"{sas_file.stem}.csv"
            df.to_csv(csv_file, index=False)
            print(f"Saved: {csv_file.name}")
        
        except Exception as e:
            print(f"Error processing {sas_file.name}: {e}")

In [None]:
sas_folder = "../data/raw_sas/"
csv_folder = "../data/raw_csv/"
convert_sas_to_csv(sas_folder, csv_folder)

In [4]:
%load_ext watermark
%watermark -n -u -v -iv

Last updated: Sat Apr 12 2025

Python implementation: CPython
Python version       : 3.12.8
IPython version      : 8.31.0

pandas: 2.2.2

