In [7]:
import pandas as pd
import os

def process_and_save_csv(file_path, new_col_name, date_col='DATE', date_format='%Y-%m-%d', date_range=("1987-01-01", "2023-07-01"), resample=False):
    df = pd.read_csv(file_path)
    df.set_index(date_col, inplace=True)
    df.index = pd.to_datetime(df.index, format=date_format)
    df.rename(columns={df.columns[0]: new_col_name}, inplace=True)
    
    if resample:
        df = df.resample('M').ffill()
        df.index = df.index.map(lambda x: x.replace(day=1))
    
    df = df[date_range[0]:date_range[1]]
    output_path = os.path.join("CleanData", f"{new_col_name}.csv")
    df.to_csv(output_path)
    print(f"Processed and saved {new_col_name} to {output_path}")
    return df

directory_path = "CleanData"
os.makedirs(directory_path, exist_ok=True)

datasets = [
    ("Data/CSUSHPINSA.csv", "target"),
    ("Data/POPTHM.csv", "population"),
    ("Data/PI.csv", "income"),
    ("Data/GDP.csv", "gdp", True),
    ("Data/UNRATE.csv", "unemployed_rate"),
    ("Data/MORTGAGE30US.csv", "mortgage_rate"),
    ("Data/EMRATIO.csv", "emratio"),
    ("Data/PERMIT.csv", "permit"),
    ("Data/COMPUTSA.csv", "new_private_house"),
    ("Data/UNDCONTSA.csv", "new_private_hw_under"),
    ("Data/CIVPART.csv", "labor_percent"),
    ("Data/MSACSR.csv", "monthly_supply"),
    ("Data/HOUST.csv", "house_st"),
    ("Data/MSPUS.csv", "MSPUS", True),
    ("Data/PCU327310327310.csv", "PPI_Cement"),
    ("Data/PCU32733132733106.csv", "PPI_Concrete"),
    ("Data/CES2023610001.csv", "all_const_emp"),
    ("Data/USCONS.csv", "total_emp_cons"),
    ("Data/IPN32731S.csv", "IPI_Cement"),
    ("Data/PSAVERT.csv", "personal_saving_rate"),
    ("Data/RSAHORUSQ156S.csv", "home_ow_rate", True)
]

for dataset in datasets:
    resample = dataset[2] if len(dataset) > 2 else False
    process_and_save_csv(dataset[0], dataset[1], resample=resample)


Processed and saved target to CleanData\target.csv
Processed and saved population to CleanData\population.csv
Processed and saved income to CleanData\income.csv
Processed and saved gdp to CleanData\gdp.csv
Processed and saved unemployed_rate to CleanData\unemployed_rate.csv
Processed and saved mortgage_rate to CleanData\mortgage_rate.csv
Processed and saved emratio to CleanData\emratio.csv
Processed and saved permit to CleanData\permit.csv
Processed and saved new_private_house to CleanData\new_private_house.csv
Processed and saved new_private_hw_under to CleanData\new_private_hw_under.csv
Processed and saved labor_percent to CleanData\labor_percent.csv
Processed and saved monthly_supply to CleanData\monthly_supply.csv


  df = df.resample('M').ffill()
  df = df.resample('M').ffill()


Processed and saved house_st to CleanData\house_st.csv
Processed and saved MSPUS to CleanData\MSPUS.csv
Processed and saved PPI_Cement to CleanData\PPI_Cement.csv
Processed and saved PPI_Concrete to CleanData\PPI_Concrete.csv
Processed and saved all_const_emp to CleanData\all_const_emp.csv
Processed and saved total_emp_cons to CleanData\total_emp_cons.csv
Processed and saved IPI_Cement to CleanData\IPI_Cement.csv
Processed and saved personal_saving_rate to CleanData\personal_saving_rate.csv
Processed and saved home_ow_rate to CleanData\home_ow_rate.csv


  df = df.resample('M').ffill()
