In [1]:
import os
import pandas as pd
import sqlite3

# Define the base directory where your HPSDATA folder is located
base_directory = 'D:\\Desktop_New\\RUCI_LAB\\HPSDATA'

# Define the name for the SQLite database (you can customize the naming methodology)
db_name = 'HPS_Phases_Data.db'  # Example naming convention
db_path = os.path.join("D:\\Desktop_New\\RUCI_LAB\\SpatialMicrosimulation_RUCILAB", db_name)

# Create or connect to an SQLite database
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Iterate through each phase folder
for phase_folder in os.listdir(base_directory):
    phase_folder_path = os.path.join(base_directory, phase_folder)

    # Check if the path is a directory
    if os.path.isdir(phase_folder_path):
        # Iterate through each sub-folder within the phase folder
        for sub_folder in os.listdir(phase_folder_path):
            sub_folder_path = os.path.join(phase_folder_path, sub_folder)

            # Check if the path is a directory
            if os.path.isdir(sub_folder_path):
                # Iterate through each file in the sub-folder
                for file in os.listdir(sub_folder_path):
                    # Check if the file contains 'puf' but not 'repwgt'
                    if 'puf' in file.lower() and 'repwgt' not in file.lower():
                        file_path = os.path.join(sub_folder_path, file)
                        
                        # Read the CSV file into a pandas DataFrame
                        try:
                            df = pd.read_csv(file_path)
                            
                            # Clean the table name to be safe for SQLite
                            table_name = f"{phase_folder}_{sub_folder}_{os.path.splitext(file)[0]}"
                            table_name = table_name.replace("-", "_").replace(".", "_").replace(" ", "_")
                            
                            # Store the data in the SQLite database
                            df.to_sql(table_name, conn, if_exists='replace', index=False)
                            
                            print(f"Loaded data from {file} into table '{table_name}'.")
                        except Exception as e:
                            print(f"Error processing {file_path}: {e}")

# Close the database connection
conn.close()

print(f"Database saved at: {db_path}")


Loaded data from pulse2020_puf_13.csv into table 'Phase_2_HPS_Week13_PUF_CSV(August19_August31)2020_pulse2020_puf_13'.
Loaded data from pulse2020_puf_14.csv into table 'Phase_2_HPS_Week14_PUF_CSV(September2_September14)2020_pulse2020_puf_14'.
Loaded data from pulse2020_puf_15.csv into table 'Phase_2_HPS_Week15_PUF_CSV(September16_September28)2020_pulse2020_puf_15'.
Loaded data from pulse2020_puf_16.csv into table 'Phase_2_HPS_Week16_PUF_CSV(September30_October12)2020_pulse2020_puf_16'.
Loaded data from pulse2020_puf_17.csv into table 'Phase_2_HPS_Week17_PUF_CSV(October14_October26)2020_pulse2020_puf_17'.
Loaded data from pulse2020_puf_18.csv into table 'Phase_3_HPS_Week18_PUF_CSV(October28_November9)2020_pulse2020_puf_18'.
Loaded data from pulse2020_puf_19.csv into table 'Phase_3_HPS_Week19_PUF_CSV(November11_November23)2020_pulse2020_puf_19'.
Loaded data from pulse2020_puf_20.csv into table 'Phase_3_HPS_Week20_PUF_CSV(November25_December7)2020_pulse2020_puf_20'.
Loaded data from pulse