In [1]:
# Install necessary packages for interacting with PostgreSQL databases via SQLAlchemy
! pip install sqlalchemy psycopg2
# Install the psycopg2-binary package for PostgreSQL database connectivity
! pip install psycopg2-binary

import pandas as pd  # Importing Pandas provides powerful data structures for data analysis

import psycopg2  # Importing PostgreSQL database adapter for Python
from contextlib import contextmanager  # Importing contextmanager for creating context managers
from datetime import datetime  # Importing datetime for handling date and time data
from sqlalchemy import create_engine  # Importing create_engine for database connection engines from SQLAlchemy
import logging  # Importing Logging module for event tracking and debugging
import os  #Importing OS module for interacting with the operating system
import warnings  # Warnings module to control warning messages

# Suppress all warnings
warnings.filterwarnings("ignore")






In [2]:

notebook_name = 'Data Initial Loading' 

# Paths for the log directories
info_log_path = f'../Logs/info/{notebook_name}_info.log'

# Creating directories if they don't exist
os.makedirs(os.path.dirname(info_log_path), exist_ok=True)

# Clearing any previous handlers if re-running this setup
logger = logging.getLogger()
while logger.handlers:
    logger.handlers.pop()

# Configuring logging
info_logger = logging.getLogger('info_logger')

info_handler = logging.FileHandler(info_log_path, mode='a')  # Append mode

info_handler.setLevel(logging.INFO)

# Consistent formatter for both handlers
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
info_handler.setFormatter(formatter)

# Adding handlers to the loggers
info_logger.addHandler(info_handler)

info_logger.setLevel(logging.INFO)


In [3]:

# Loading the final selected features details from the saved Excel file
final_selected_features_details = pd.read_excel('../Data/Output/processed_selected_features.xlsx')

# Loading the data from the saved Excel file for loading into database
data_load_df = pd.read_excel('../Data/Output/Processed_data_load_df.xlsx')

info_logger.info("Reading Transformed data")

### Data Mart Setup with Fact and Dimension Table creations

In [4]:

# Configuration for database connection
DB_CREDENTIALS = {
    'dbname': 'UK Real Estate DB',
    'user': 'postgres',
    'password': '123!@*qweQWE',
    'host': 'localhost', 
    'port': '5432'
}

def main():
    # Connecting to PostgreSQL database
    connection = psycopg2.connect(
        dbname=DB_CREDENTIALS['dbname'],
        user=DB_CREDENTIALS['user'],
        password=DB_CREDENTIALS['password'],
        host=DB_CREDENTIALS['host'],
        port=DB_CREDENTIALS['port']
    )
    cursor = connection.cursor()

    # Dropping tables if it exists
    try:
        cursor.execute("""
            DROP TABLE IF EXISTS metadata; 
            DROP TABLE IF EXISTS sales_transactions_fact CASCADE;
            DROP TABLE IF EXISTS district_dimension CASCADE;
            DROP TABLE IF EXISTS property_type_dimension CASCADE;
            DROP TABLE IF EXISTS region_dimension CASCADE;
            DROP TABLE IF EXISTS date_dimension CASCADE;
            DROP TABLE IF EXISTS demographics_dimension CASCADE;
            DROP TABLE IF EXISTS education_employment_dimension CASCADE;
            DROP TABLE IF EXISTS rental_dimension CASCADE;
            DROP TABLE IF EXISTS vehicle_dimension CASCADE;
        """)
        print("All Tables dropped if existed")
        
        # droping partition if exists
        cursor.execute("""
            DO $$
            DECLARE
                partition RECORD;
            BEGIN
                FOR partition IN
                    SELECT inhrelid::regclass AS partition_name
                    FROM pg_inherits
                    JOIN pg_class ON pg_class.oid = inhrelid
                    WHERE pg_class.relnamespace = 'public'::regnamespace
                    AND pg_class.relname LIKE 'sales_transactions_fact%'
                LOOP
                    EXECUTE format('DROP TABLE IF EXISTS %I CASCADE', partition.partition_name);
                END LOOP;
            END $$;
        """)
        print("Partitioning deleted if existed")
        
        # Executing Create table DDL statements for all Dimensions and Fact table
        cursor.execute("""
            CREATE TABLE region_dimension (
                region_id SERIAL PRIMARY KEY,
                region_code VARCHAR(255),
                region_name VARCHAR(255),
                local_authority_code VARCHAR(255) UNIQUE,  -- SCD Type 1: unique based on local authority code
                local_authority_name VARCHAR(255)
            );
            
            CREATE TABLE date_dimension (
                date_key SERIAL PRIMARY KEY,
                date DATE,   -- SCD Type 0
                month INT,
                quarter INT,
                year INT,
                transfer_month_year VARCHAR(255)  
            );
            
            CREATE TABLE vehicle_dimension (
                vehicle_id SERIAL PRIMARY KEY,
                local_authority_code VARCHAR(255),  -- SCD Type 1: unique based on local authority code
                region_id INT REFERENCES region_dimension(region_id),
                buses_total FLOAT,
                petrol_cars_total FLOAT,
                hgv_total FLOAT,
                petrol_lgv_total FLOAT,
                lpg_lgv_total FLOAT,             
                hgv_motorways FLOAT,             
                personal_transport FLOAT         
            );
            
            CREATE TABLE district_dimension (
                district_id SERIAL PRIMARY KEY,
                local_authority_code VARCHAR(255),
                date DATE,
                district VARCHAR(255),
                town_city VARCHAR(255),
                county VARCHAR(255),
                start_date TIMESTAMP,  -- SCD Type 2: Start date of the record validity
                end_date TIMESTAMP,    -- SCD Type 2: End date of the record validity (NULL if current)
                is_current BOOLEAN DEFAULT TRUE,  -- SCD Type 2: Flag for active record
                UNIQUE (local_authority_code, date)  -- Ensure uniqueness per local authority per month-year
            );
            
            CREATE TABLE property_type_dimension (
                property_type_id SERIAL PRIMARY KEY,
                local_authority_code VARCHAR(255),
                date DATE,
                property_type VARCHAR(255),
                duration VARCHAR(255),  
                detached_price FLOAT,  
                semi_detached_price FLOAT,  
                terraced_price FLOAT,  
                flat_price FLOAT,  
                start_date TIMESTAMP,  -- SCD Type 2: Start date of the record validity
                end_date TIMESTAMP,    -- SCD Type 2: End date of the record validity (NULL if current)
                is_current BOOLEAN DEFAULT TRUE,  -- SCD Type 2: Flag for active record
                UNIQUE (local_authority_code, date)  -- Ensure uniqueness per local authority per month-year
            );
            
            CREATE TABLE education_employment_dimension (
                education_employment_id SERIAL PRIMARY KEY,
                local_authority_code VARCHAR(255),  
                date DATE,
                qualification_index_score FLOAT,
                qualification_index_rank FLOAT,  
                no_qualifications FLOAT,  
                level_1_and_entry_level_qualifications FLOAT, 
                level_2_qualifications FLOAT,  
                level_3_qualifications FLOAT, 
                apprenticeship FLOAT,  
                level_4_qualifications_and_above FLOAT,  
                other_qualifications FLOAT,  
                num_aged_16_plus_unemployed FLOAT, 
                num_aged_16_plus_employed FLOAT, 
                num_aged_16_plus_self_employed FLOAT, 
                deprivation_average_score FLOAT,
                deprivation_employment_ratio FLOAT,
                qualification_adjusted_employment_rate FLOAT,
                start_date TIMESTAMP,  -- SCD Type 2: Start date of the record validity
                end_date TIMESTAMP,    -- SCD Type 2: End date of the record validity (NULL if current)
                is_current BOOLEAN DEFAULT TRUE,  -- SCD Type 2: Flag for active record
                UNIQUE (local_authority_code, date)  -- Ensure uniqueness per local authority per month-year
            );
            
            CREATE TABLE demographics_dimension (
                demographics_id SERIAL PRIMARY KEY,
                local_authority_code VARCHAR(255), 
                date DATE,
                area_sq_km FLOAT,  
                age_0_20 FLOAT,  
                age_20_40 FLOAT, 
                age_40_60 FLOAT,  
                age_60_plus FLOAT,  
                female_population FLOAT,  
                all_ages FLOAT, 
                male_population FLOAT, 
                est_num_households_with_child FLOAT,  
                age_dependency_ratio FLOAT, 
                start_date TIMESTAMP,  -- SCD Type 2: Start date of the record validity
                end_date TIMESTAMP,    -- SCD Type 2: End date of the record validity (NULL if current)
                is_current BOOLEAN DEFAULT TRUE,  -- SCD Type 2: Flag for active record
                UNIQUE (local_authority_code, date)  -- Ensure uniqueness per local authority per month-year
            );
            
            CREATE TABLE rental_dimension (
                rental_id SERIAL PRIMARY KEY,
                local_authority_code VARCHAR(255),
                date DATE,
                rental_price FLOAT,
                one_bedroom_rent FLOAT,
                two_bedrooms_rent FLOAT,
                three_bedrooms_rent FLOAT,  
                four_or_more_bedrooms_rent FLOAT, 
                all_categories_rent FLOAT,
                start_date TIMESTAMP,  -- SCD Type 2: Start date of the record validity
                end_date TIMESTAMP,    -- SCD Type 2: End date of the record validity (NULL if current)
                is_current BOOLEAN DEFAULT TRUE,  -- SCD Type 2: Flag for active record
                UNIQUE (local_authority_code, date)  -- Ensure uniqueness per local authority per month-year
            );
            
            CREATE TABLE sales_transactions_fact (
                sales_id SERIAL,
                local_authority_code VARCHAR(255),
                date DATE,
                district_id INT REFERENCES district_dimension(district_id),
                region_id INT REFERENCES region_dimension(region_id),
                property_type_id INT REFERENCES property_type_dimension(property_type_id),
                vehicle_id INT REFERENCES vehicle_dimension(vehicle_id),
                rental_id INT REFERENCES rental_dimension(rental_id),
                demographics_id INT REFERENCES demographics_dimension(demographics_id),
                education_employment_id INT REFERENCES education_employment_dimension(education_employment_id),
                date_key INT REFERENCES date_dimension(date_key),
                price NUMERIC,
                average_price FLOAT,
                predicted_price_unscaled FLOAT,
                index FLOAT,
                average_price_pct_change FLOAT,
                annual_change_percent FLOAT,
                new_price FLOAT,
                old_price FLOAT,
                sales_volume FLOAT,
                sales_volume_log FLOAT,
                old_sales_volume FLOAT,
                detached_flat_ratio FLOAT,
                detached_terraced_ratio FLOAT,
                semi_detached_price_pct_change FLOAT,
                detached_semi_detached_ratio FLOAT,
                detached_price_log FLOAT,
                semi_detached_price_log FLOAT,
                flat_price_log FLOAT,
                terraced_price_pct_change FLOAT,
                terraced_price_log FLOAT,
                gdhi FLOAT,
                deprivation_adjusted_gdhi FLOAT,
                gdhi_per_capita FLOAT,
                foo_price FLOAT,
                cash_price FLOAT,
                mortgage_price FLOAT,
                housing_demand_indicator FLOAT,
                deprivation_reduction_potential FLOAT,
                flat_price_pct_change FLOAT,  -- Added new field
                detached_price_pct_change FLOAT,  -- Added new field
                average_price_log FLOAT,  -- Added new field
                ftb_price FLOAT,  -- Added new field
                start_date TIMESTAMP,  -- SCD Type 2: Start date of the record validity
                end_date TIMESTAMP,    -- SCD Type 2: End date of the record validity (NULL if current)
                is_current BOOLEAN DEFAULT TRUE,  -- SCD Type 2: Flag for active record
                PRIMARY KEY (sales_id, local_authority_code, date)  -- Ensure uniqueness per local authority per month-year
            )PARTITION BY RANGE (date);
            
            
            CREATE TABLE metadata (
                id SERIAL PRIMARY KEY,
                table_name VARCHAR(255),
                last_extracted_date TIMESTAMP WITHOUT TIME ZONE,
                scd_type INTEGER, 
                last_modified_date TIMESTAMP WITHOUT TIME ZONE,
                created_date TIMESTAMP WITHOUT TIME ZONE
            );


        """)
        print("Created all facts and dimension table.")
        cursor.execute("""
            -- January 2023
            CREATE TABLE sales_transactions_fact_2023_01 PARTITION OF sales_transactions_fact
                FOR VALUES FROM ('2023-01-01') TO ('2023-02-01');
            
            -- February 2023
            CREATE TABLE sales_transactions_fact_2023_02 PARTITION OF sales_transactions_fact
                FOR VALUES FROM ('2023-02-01') TO ('2023-03-01');
            
            -- March 2023
            CREATE TABLE sales_transactions_fact_2023_03 PARTITION OF sales_transactions_fact
                FOR VALUES FROM ('2023-03-01') TO ('2023-04-01');
            
            -- April 2023
            CREATE TABLE sales_transactions_fact_2023_04 PARTITION OF sales_transactions_fact
                FOR VALUES FROM ('2023-04-01') TO ('2023-05-01');
            
            -- May 2023
            CREATE TABLE sales_transactions_fact_2023_05 PARTITION OF sales_transactions_fact
                FOR VALUES FROM ('2023-05-01') TO ('2023-06-01');
            
            -- June 2023
            CREATE TABLE sales_transactions_fact_2023_06 PARTITION OF sales_transactions_fact
                FOR VALUES FROM ('2023-06-01') TO ('2023-07-01');
            
            -- July 2023
            CREATE TABLE sales_transactions_fact_2023_07 PARTITION OF sales_transactions_fact
                FOR VALUES FROM ('2023-07-01') TO ('2023-08-01');
            
            -- August 2023
            CREATE TABLE sales_transactions_fact_2023_08 PARTITION OF sales_transactions_fact
                FOR VALUES FROM ('2023-08-01') TO ('2023-09-01');
            
            -- September 2023
            CREATE TABLE sales_transactions_fact_2023_09 PARTITION OF sales_transactions_fact
                FOR VALUES FROM ('2023-09-01') TO ('2023-10-01');
            
            -- October 2023
            CREATE TABLE sales_transactions_fact_2023_10 PARTITION OF sales_transactions_fact
                FOR VALUES FROM ('2023-10-01') TO ('2023-11-01');
            
            -- November 2023
            CREATE TABLE sales_transactions_fact_2023_11 PARTITION OF sales_transactions_fact
                FOR VALUES FROM ('2023-11-01') TO ('2023-12-01');
            
            -- December 2023
            CREATE TABLE sales_transactions_fact_2023_12 PARTITION OF sales_transactions_fact
                FOR VALUES FROM ('2023-12-01') TO ('2024-01-01');

            -- January 2024
            CREATE TABLE sales_transactions_fact_2024_01 PARTITION OF sales_transactions_fact
                FOR VALUES FROM ('2024-01-01') TO ('2024-02-01');
            
            -- February 2024
            CREATE TABLE sales_transactions_fact_2024_02 PARTITION OF sales_transactions_fact
                FOR VALUES FROM ('2024-02-01') TO ('2024-03-01');
        """)
        print("Partitioning condition added in sales_transactions_fact")

    except psycopg2.Error as e:
        print("An error occurred:", e)
        connection.rollback()  # Rollback in case of error
    else:
        connection.commit()  # Commit the changes if all is well

    # Close the cursor and connection
    cursor.close()
    connection.close()
    print("Database connection closed.")

if __name__ == "__main__":
    main()
    
info_logger.info("Created all Fact table and Dimension Tables along with required Partitioning")

All Tables dropped if existed
Partitioning deleted if existed


Created all facts and dimension table.
Partitioning condition added in sales_transactions_fact
Database connection closed.


In [5]:

# Configuration for database connection
DB_CREDENTIALS = {
    'dbname': 'UK Real Estate DB',
    'user': 'postgres',
    'password': '123!@*qweQWE',
    'host': 'localhost', 
    'port': '5432'
}

def main():
    # Connect to PostgreSQL database
    connection = psycopg2.connect(
        dbname=DB_CREDENTIALS['dbname'],
        user=DB_CREDENTIALS['user'],
        password=DB_CREDENTIALS['password'],
        host=DB_CREDENTIALS['host'],
        port=DB_CREDENTIALS['port']
    )
    cursor = connection.cursor()

    # Dropping additional tables if already exists
    try:
        cursor.execute("""
            DROP TABLE IF EXISTS feature_importance_dimension; 
			DROP TABLE IF EXISTS governance_table; 
			DROP TABLE IF EXISTS initial_load_etl_source_data; 
			DROP TABLE IF EXISTS incremental_load_etl_source_data; 
        """)
        print("Dropped tables if existed")
        
    except psycopg2.Error as e:
        print("An error occurred:", e)
        connection.rollback()  # Rollback in case of error
    else:
        connection.commit()  # Commit the changes if all is well

    # Close the cursor and connection
    cursor.close()
    connection.close()
    print("Database connection closed.")

if __name__ == "__main__":
    main()


Dropped tables if existed
Database connection closed.


### Processing additional tables

In [6]:

# Configuration for database connection
DB_CREDENTIALS = {
    'user': 'postgres',
    'password': '123%21%40*qweQWE',
    'host': 'localhost', 
    'port': '5432',  
    'dbname': 'UK Real Estate DB'
}

def create_db_engine(credentials):
    """
    Create and return a SQLAlchemy engine for connecting to PostgreSQL.
    """
    connection_string = (
        f"postgresql+psycopg2://{credentials['user']}:{credentials['password']}@"
        f"{credentials['host']}:{credentials['port']}/{credentials['dbname']}"
    )
    return create_engine(connection_string)

def calculate_feature_importance(df):
    """
    Calculate and add percentage importance to the DataFrame.
    """
    total_importance = df['importance'].sum()
    df['importance_percentage'] = (df['importance'] / total_importance) * 100
    df['importance_percentage'] = df['importance_percentage'].round(2)
    return df

def save_to_postgresql(df, table_name, engine):
    """
    Save the DataFrame to a PostgreSQL table.
    """
    df.to_sql(table_name, engine, index=False, if_exists='replace')

def load_feature_importance(file_path, engine):
    """
    Load the feature importance data from an Excel file, process it, and save it to PostgreSQL.
    """
    feature_importance_df = pd.read_excel(file_path)
    processed_feature_df = calculate_feature_importance(feature_importance_df)
    save_to_postgresql(processed_feature_df, 'feature_importance_dimension', engine)

def load_governance_table(excel_path, engine):
    """
    Load data from the Excel file into a DataFrame and save it to PostgreSQL.
    """
    governance_table = pd.read_excel(excel_path)
    save_to_postgresql(governance_table, 'governance_table', engine)

def load_data_load(excel_path, engine):
    """
    Load data from an Excel file into a DataFrame and save it to PostgreSQL.
    """
    data_load = pd.read_excel(excel_path)
    save_to_postgresql(data_load, 'initial_load_etl_source_data', engine)

def main():
    """
    Main function to orchestrate the data loading and processing tasks.
    """
    # Paths to data files
    feature_importance_path = '../Data/Output/processed_selected_features.xlsx'
    governance_excel_path = '../Data/Input/governance_table.xlsx'
    data_load_path = '../Data/Output/Processed_data_load_df.xlsx'

    # Creating database engine
    engine = create_db_engine(DB_CREDENTIALS)

    # Loading and processing feature importance data
    load_feature_importance(feature_importance_path, engine)
    print("feature_importance_dimension loaded successfully.")

    # Loading and saving governance table data
    load_governance_table(governance_excel_path, engine)
    print("governance_table loaded successfully.")

    # Loading and saving Initial load source table data
    load_data_load(data_load_path, engine)
    print("data_load loaded successfully.")

# Script execution entry point
if __name__ == "__main__":
    main()

info_logger.info("Loaded Governance table and Feature Importance table")

feature_importance_dimension loaded successfully.


governance_table loaded successfully.


data_load loaded successfully.


In [7]:

# Configuration for database connection
DB_CREDENTIALS = {
    'dbname': 'UK Real Estate DB',
    'user': 'postgres',
    'password': '123!@*qweQWE',
    'host': 'localhost', 
    'port': '5432'
}

def main():
    # Connect to PostgreSQL database
    connection = psycopg2.connect(
        dbname=DB_CREDENTIALS['dbname'],
        user=DB_CREDENTIALS['user'],
        password=DB_CREDENTIALS['password'],
        host=DB_CREDENTIALS['host'],
        port=DB_CREDENTIALS['port']
    )
    cursor = connection.cursor()

    # Adding new column to the feature_importance_dimension table
    try:
        cursor.execute("""
            ALTER TABLE feature_importance_dimension
            ADD COLUMN table_name VARCHAR(255);;
        """)
        print("Column 'table_name' added to 'feature_importance_dimension'.")
        
        # Updating table_name for each feature
        cursor.execute("""
            UPDATE feature_importance_dimension
            SET table_name = 'rental_dimension'
            WHERE feature IN ('rental_price', 'all_categories_rent', 'one_bedroom_rent', 'two_bedrooms_rent', 
                              'three_bedrooms_rent', 'four_or_more_bedrooms_rent');

            UPDATE feature_importance_dimension
            SET table_name = 'demographics_dimension'
            WHERE feature IN ('age_20_40', 'area_sq_km', 'age_60_plus', 'age_0_20', 'age_40_60', 
                              'female_population', 'male_population', 'all_ages', 'est_num_households_with_child');

            UPDATE feature_importance_dimension
            SET table_name = 'education_employment_dimension'
            WHERE feature IN ('qualification_index_rank', 'no_qualifications', 'other_qualifications', 
                              'num_aged_16_plus_unemployed', 'num_aged_16_plus_self_employed', 
                              'qualification_index_score', 'num_aged_16_plus_employed', 
                              'deprivation_average_score', 'level_4_qualifications_and_above', 
                              'level_1_and_entry_level_qualifications', 'level_2_qualifications', 
                              'level_3_qualifications', 'apprenticeship');

            UPDATE feature_importance_dimension
            SET table_name = 'vehicle_dimension'
            WHERE feature IN ('petrol_lgv_total', 'hgv_total', 'buses_total', 'petrol_cars_total', 
                              'lpg_lgv_total', 'hgv_motorways', 'personal_transport');

            UPDATE feature_importance_dimension
            SET table_name = 'sales_transactions_fact'
            WHERE feature IN ('foo_price', 'detached_price', 'semi_detached_price', 'old_price', 
                              'sales_volume', 'new_price', 'cash_price', 'detached_flat_ratio', 
                              'detached_price_log', 'semi_detached_price_log', 'flat_price', 
                              'flat_price_log', 'average_price_pct_change', 'semi_detached_price_pct_change', 
                              'detached_terraced_ratio', 'detached_semi_detached_ratio', 
                              'terraced_price_pct_change', 'terraced_price_log', 'gdhi', 
                              'old_sales_volume', 'sales_volume_log', 'mortgage_price', 
                              'annual_change_percent', 'average_price', 'average_price_log', 
                              'ftb_price', 'index', 'detached_price_pct_change', 'terraced_price','flat_price_pct_change');

            UPDATE feature_importance_dimension
            SET table_name = 'region_dimension'
            WHERE feature IN ('local_authority_code', 'region_code');

            UPDATE feature_importance_dimension
            SET table_name = 'date_dimension'
            WHERE feature IN ('month', 'quarter');

            UPDATE feature_importance_dimension
            SET table_name = 'property_type_dimension'
            WHERE feature IN ('property_type', 'duration');

        """)
        print("Updated 'table_name' in 'feature_importance_dimension'.")

    except psycopg2.Error as e:
        print("An error occurred:", e)
        connection.rollback()  # Rollback in case of error
    else:
        connection.commit()  # Commit the changes if all is well

    # Close the cursor and connection
    cursor.close()
    connection.close()
    print("Database connection closed.")

if __name__ == "__main__":
    main()


Column 'table_name' added to 'feature_importance_dimension'.
Updated 'table_name' in 'feature_importance_dimension'.
Database connection closed.


##  Initial Load


In [8]:

# Database credentials
DB_CONFIG = {
    'dbname': 'UK Real Estate DB',
    'user': 'postgres',
    'password': '123!@*qweQWE',
    'host': 'localhost',
    'port': '5432'
}

@contextmanager
def get_db_connection():
    """Context manager for PostgreSQL database connection."""
    connection = psycopg2.connect(**DB_CONFIG)
    try:
        yield connection
    finally:
        connection.close()

def create_temp_table(cursor, table_name, schema):
    """Creates a temporary table."""
    create_query = f"CREATE TEMPORARY TABLE {table_name} ({schema});"
    cursor.execute(create_query)

def insert_data(cursor, table_name, columns, data_frame):
    """Inserts data into the temporary table."""
    columns_str = ', '.join(columns)
    values_str = ', '.join(['%s'] * len(columns))
    insert_query = f"INSERT INTO {table_name} ({columns_str}) VALUES ({values_str});"
    for _, row in data_frame[columns].drop_duplicates().iterrows():
        cursor.execute(insert_query, tuple(row))

def update_existing_records(cursor, update_query):
    """Updates existing records in the target table."""
    cursor.execute(update_query)

def insert_new_records(cursor, insert_query):
    """Inserts new records into the target table."""
    cursor.execute(insert_query)

def drop_temp_table(cursor, table_name):
    """Drops the temporary table."""
    cursor.execute(f"DROP TABLE {table_name};")

def load_region_dimension():
    """Loads data into region_dimension table."""
    with get_db_connection() as connection:
        cursor = connection.cursor()
        create_temp_table(cursor, 'region_dimension_temp',
            'region_code VARCHAR(255), region_name VARCHAR(255), local_authority_code VARCHAR(255), local_authority_name VARCHAR(255)')
        
        insert_data(cursor, 'region_dimension_temp', 
                    ['region_code', 'region_name', 'local_authority_code', 'local_authority_name'], 
                    initial_load_df)
              
        cursor.execute("""
        INSERT INTO region_dimension (region_code, region_name, local_authority_code, local_authority_name)
        SELECT temp.region_code, temp.region_name, temp.local_authority_code, temp.local_authority_name
        FROM region_dimension_temp temp
        LEFT JOIN region_dimension rd ON rd.local_authority_code = temp.local_authority_code
        WHERE rd.local_authority_code IS NULL;
        """)
        
        drop_temp_table(cursor, 'region_dimension_temp')
        connection.commit()
        print("Initial load for region_dimension completed.")
        info_logger.info("Initial load for region_dimension completed")

def load_date_dimension():
    """Loads data into date_dimension table."""
    with get_db_connection() as connection:
        cursor = connection.cursor()
        create_temp_table(cursor, 'date_dimension_temp',
            'date DATE, month INT, quarter INT, year INT, transfer_month_year VARCHAR(255)')
        
        insert_data(cursor, 'date_dimension_temp', 
                    ['date', 'month', 'quarter', 'year', 'transfer_month_year'], 
                    initial_load_df)
        
        cursor.execute("""
        INSERT INTO date_dimension (date, month, quarter, year, transfer_month_year)
        SELECT date, month, quarter, year, transfer_month_year
        FROM date_dimension_temp;
        """)
        
        drop_temp_table(cursor, 'date_dimension_temp')
        connection.commit()
        print("Initial load for date_dimension completed.")
        info_logger.info("Initial load for date_dimension completed")
        
def load_vehicle_dimension():
    """Loads data into vehicle_dimension table."""
    with get_db_connection() as connection:
        cursor = connection.cursor()
        create_temp_table(cursor, 'vehicle_dimension_temp',
            'local_authority_code VARCHAR(255), buses_total FLOAT, petrol_cars_total FLOAT, hgv_total FLOAT, petrol_lgv_total FLOAT, lpg_lgv_total FLOAT, hgv_motorways FLOAT, personal_transport FLOAT')
        
        insert_data(cursor, 'vehicle_dimension_temp', 
                    ['local_authority_code', 'buses_total', 'petrol_cars_total', 'hgv_total', 'petrol_lgv_total', 'lpg_lgv_total', 'hgv_motorways', 'personal_transport'], 
                    initial_load_df)
        
        cursor.execute("""
        INSERT INTO vehicle_dimension (local_authority_code, region_id, buses_total, petrol_cars_total, hgv_total, petrol_lgv_total, lpg_lgv_total, hgv_motorways, personal_transport)
        SELECT temp.local_authority_code, rd.region_id, temp.buses_total, temp.petrol_cars_total, temp.hgv_total, temp.petrol_lgv_total, temp.lpg_lgv_total, temp.hgv_motorways, temp.personal_transport
        FROM vehicle_dimension_temp temp
        JOIN region_dimension rd ON temp.local_authority_code = rd.local_authority_code;
        """)
        
        drop_temp_table(cursor, 'vehicle_dimension_temp')
        connection.commit()
        print("Initial load for vehicle_dimension completed.")
        info_logger.info("Initial load for vehicle_dimension completed")
        
def load_rental_dimension():
    """Loads data into rental_dimension table."""
    with get_db_connection() as connection:
        cursor = connection.cursor()
        create_temp_table(cursor, 'rental_dimension_temp',
            'local_authority_code VARCHAR(255), date DATE, rental_price FLOAT, one_bedroom_rent FLOAT, two_bedrooms_rent FLOAT, three_bedrooms_rent FLOAT, four_or_more_bedrooms_rent FLOAT, all_categories_rent FLOAT')
        
        insert_data(cursor, 'rental_dimension_temp',
                    ['local_authority_code', 'date', 'rental_price', 'one_bedroom_rent', 'two_bedrooms_rent', 'three_bedrooms_rent', 'four_or_more_bedrooms_rent', 'all_categories_rent'],
                    initial_load_df)
        
        cursor.execute("""
        INSERT INTO rental_dimension (local_authority_code, date, rental_price, one_bedroom_rent, two_bedrooms_rent, three_bedrooms_rent, four_or_more_bedrooms_rent, all_categories_rent, start_date, end_date, is_current)
        SELECT temp.local_authority_code, date, temp.rental_price, temp.one_bedroom_rent, temp.two_bedrooms_rent, temp.three_bedrooms_rent, temp.four_or_more_bedrooms_rent, temp.all_categories_rent, NOW() AS start_date, NULL AS end_date, TRUE AS is_current
        FROM (
            SELECT temp.*, 
                   ROW_NUMBER() OVER (PARTITION BY temp.local_authority_code ORDER BY temp.date DESC) AS rn
            FROM rental_dimension_temp temp
        ) temp
        WHERE temp.rn = 1;
        """)
        
        cursor.execute("""
        INSERT INTO rental_dimension (local_authority_code, date, rental_price, one_bedroom_rent, two_bedrooms_rent, three_bedrooms_rent, four_or_more_bedrooms_rent, all_categories_rent, start_date, end_date, is_current)
        SELECT temp.local_authority_code, temp.date, temp.rental_price, temp.one_bedroom_rent, temp.two_bedrooms_rent, temp.three_bedrooms_rent, temp.four_or_more_bedrooms_rent, temp.all_categories_rent, NOW() AS start_date, NOW() AS end_date, FALSE AS is_current
        FROM (
            SELECT temp.*, 
                   ROW_NUMBER() OVER (PARTITION BY temp.local_authority_code ORDER BY temp.date DESC) AS rn
            FROM rental_dimension_temp temp
        ) temp
        WHERE temp.rn > 1;
        """)
        
        drop_temp_table(cursor, 'rental_dimension_temp')
        connection.commit()
        print("Initial load for rental_dimension completed.")
        info_logger.info("Initial load for rental_dimension completed")
        
def load_district_dimension():
    """Loads data into district_dimension table."""
    with get_db_connection() as connection:
        cursor = connection.cursor()
        create_temp_table(cursor, 'district_dimension_temp',
            'local_authority_code VARCHAR(255), date DATE, district VARCHAR(255), town_city VARCHAR(255), county VARCHAR(255)')
        
        insert_data(cursor, 'district_dimension_temp',
                    ['local_authority_code', 'date', 'district', 'town_city', 'county'],
                    initial_load_df)
        
        cursor.execute("""
        INSERT INTO district_dimension (local_authority_code, date, district, town_city, county, start_date, end_date, is_current)
        SELECT temp.local_authority_code, temp.date, temp.district, temp.town_city, temp.county, NOW() AS start_date, NULL AS end_date, TRUE AS is_current
        FROM (
            SELECT temp.*, 
                   ROW_NUMBER() OVER (PARTITION BY temp.local_authority_code ORDER BY temp.date DESC) AS rn
            FROM district_dimension_temp temp
        ) temp
        WHERE temp.rn = 1;
        """)
        
        cursor.execute("""
        INSERT INTO district_dimension (local_authority_code, date, district, town_city, county, start_date, end_date, is_current)
        SELECT temp.local_authority_code, temp.date, temp.district, temp.town_city, temp.county, NOW() AS start_date, NOW() AS end_date, FALSE AS is_current
        FROM (
            SELECT temp.*, 
                   ROW_NUMBER() OVER (PARTITION BY temp.local_authority_code ORDER BY temp.date DESC) AS rn
            FROM district_dimension_temp temp
        ) temp
        WHERE temp.rn > 1;
        """)
        
        drop_temp_table(cursor, 'district_dimension_temp')
        connection.commit()
        print("Initial load for district_dimension completed.")
        info_logger.info("Initial load for district_dimension completed")
        
def load_property_type_dimension():
    """Loads data into property_type_dimension table."""
    with get_db_connection() as connection:
        cursor = connection.cursor()
        create_temp_table(cursor, 'property_type_dimension_temp',
            'local_authority_code VARCHAR(255), date DATE, property_type VARCHAR(255), duration VARCHAR(255), detached_price FLOAT, semi_detached_price FLOAT, terraced_price FLOAT, flat_price FLOAT')
        
        insert_data(cursor, 'property_type_dimension_temp',
                    ['local_authority_code', 'date', 'property_type', 'duration', 'detached_price', 'semi_detached_price', 'terraced_price', 'flat_price'],
                    initial_load_df)
        
        cursor.execute("""
        INSERT INTO property_type_dimension (local_authority_code, date, property_type, duration, detached_price, semi_detached_price, terraced_price, flat_price, start_date, end_date, is_current)
        SELECT temp.local_authority_code, temp.date, temp.property_type, temp.duration, temp.detached_price, temp.semi_detached_price, temp.terraced_price, temp.flat_price, NOW() AS start_date, NULL AS end_date, TRUE AS is_current
        FROM (
            SELECT temp.*, 
                   ROW_NUMBER() OVER (PARTITION BY temp.local_authority_code ORDER BY temp.date DESC) AS rn
            FROM property_type_dimension_temp temp
        ) temp
        WHERE temp.rn = 1;
        """)
        
        cursor.execute("""
        INSERT INTO property_type_dimension (local_authority_code, date, property_type, duration, detached_price, semi_detached_price, terraced_price, flat_price, start_date, end_date, is_current)
        SELECT temp.local_authority_code, temp.date, temp.property_type, temp.duration, temp.detached_price, temp.semi_detached_price, temp.terraced_price, temp.flat_price, NOW() AS start_date, NOW() AS end_date, FALSE AS is_current
        FROM (
            SELECT temp.*, 
                   ROW_NUMBER() OVER (PARTITION BY temp.local_authority_code ORDER BY temp.date DESC) AS rn
            FROM property_type_dimension_temp temp
        ) temp
        WHERE temp.rn > 1;
        """)
        
        drop_temp_table(cursor, 'property_type_dimension_temp')
        connection.commit()
        print("Initial load for property_type_dimension completed.")
        info_logger.info("Initial load for property_type_dimension completed")
                
def load_education_employment_dimension():
    """Loads data into education_employment_dimension table."""
    with get_db_connection() as connection:
        cursor = connection.cursor()
        create_temp_table(cursor, 'education_employment_dimension_temp',
            'local_authority_code VARCHAR(255), date DATE, qualification_index_score FLOAT, qualification_index_rank FLOAT, no_qualifications FLOAT, level_1_and_entry_level_qualifications FLOAT, level_2_qualifications FLOAT, level_3_qualifications FLOAT, apprenticeship FLOAT, level_4_qualifications_and_above FLOAT, other_qualifications FLOAT, num_aged_16_plus_unemployed FLOAT, num_aged_16_plus_employed FLOAT, num_aged_16_plus_self_employed FLOAT, deprivation_average_score FLOAT, deprivation_employment_ratio FLOAT, qualification_adjusted_employment_rate FLOAT')
        
        insert_data(cursor, 'education_employment_dimension_temp',
                    ['local_authority_code', 'date', 'qualification_index_score', 'qualification_index_rank', 'no_qualifications', 'level_1_and_entry_level_qualifications', 'level_2_qualifications', 'level_3_qualifications', 'apprenticeship', 'level_4_qualifications_and_above', 'other_qualifications', 'num_aged_16_plus_unemployed', 'num_aged_16_plus_employed', 'num_aged_16_plus_self_employed', 'deprivation_average_score', 'deprivation_employment_ratio', 'qualification_adjusted_employment_rate'],
                    initial_load_df)
        
        cursor.execute("""
        INSERT INTO education_employment_dimension (local_authority_code, date, qualification_index_score, 
            qualification_index_rank, no_qualifications, level_1_and_entry_level_qualifications, level_2_qualifications, 
            level_3_qualifications, apprenticeship, level_4_qualifications_and_above, other_qualifications, 
            num_aged_16_plus_unemployed, num_aged_16_plus_employed, num_aged_16_plus_self_employed, 
            deprivation_average_score, deprivation_employment_ratio, qualification_adjusted_employment_rate, 
            start_date, end_date, is_current)
        SELECT temp.local_authority_code, temp.date, temp.qualification_index_score, temp.qualification_index_rank, 
            temp.no_qualifications, temp.level_1_and_entry_level_qualifications, temp.level_2_qualifications, 
            temp.level_3_qualifications, temp.apprenticeship, temp.level_4_qualifications_and_above, temp.other_qualifications, 
            temp.num_aged_16_plus_unemployed, temp.num_aged_16_plus_employed, temp.num_aged_16_plus_self_employed, 
            temp.deprivation_average_score, temp.deprivation_employment_ratio, temp.qualification_adjusted_employment_rate, 
            NOW() AS start_date, NULL AS end_date, TRUE AS is_current
        FROM (
            SELECT temp.*, 
                   ROW_NUMBER() OVER (PARTITION BY temp.local_authority_code ORDER BY temp.date DESC) AS rn
            FROM education_employment_dimension_temp temp
        ) temp
        WHERE temp.rn = 1;
        """)
        
        cursor.execute("""
        INSERT INTO education_employment_dimension (local_authority_code, date, qualification_index_score, 
            qualification_index_rank, no_qualifications, level_1_and_entry_level_qualifications, level_2_qualifications, 
            level_3_qualifications, apprenticeship, level_4_qualifications_and_above, other_qualifications, 
            num_aged_16_plus_unemployed, num_aged_16_plus_employed, num_aged_16_plus_self_employed, 
            deprivation_average_score, deprivation_employment_ratio, qualification_adjusted_employment_rate, 
            start_date, end_date, is_current)
        SELECT temp.local_authority_code, temp.date, temp.qualification_index_score, temp.qualification_index_rank, 
            temp.no_qualifications, temp.level_1_and_entry_level_qualifications, temp.level_2_qualifications, 
            temp.level_3_qualifications, temp.apprenticeship, temp.level_4_qualifications_and_above, temp.other_qualifications, 
            temp.num_aged_16_plus_unemployed, temp.num_aged_16_plus_employed, temp.num_aged_16_plus_self_employed, 
            temp.deprivation_average_score, temp.deprivation_employment_ratio, temp.qualification_adjusted_employment_rate, 
            NOW() AS start_date, NOW() AS end_date, FALSE AS is_current
        FROM (
            SELECT temp.*, 
                   ROW_NUMBER() OVER (PARTITION BY temp.local_authority_code ORDER BY temp.date DESC) AS rn
            FROM education_employment_dimension_temp temp
        ) temp
        WHERE temp.rn > 1;
        """)
        
        drop_temp_table(cursor, 'education_employment_dimension_temp')
        connection.commit()
        print("Initial load for education_employment_dimension completed.")
        info_logger.info("Initial load for education_dimension completed")
        
def load_demographics_dimension():
    """Loads data into demographics_dimension table."""
    with get_db_connection() as connection:
        cursor = connection.cursor()
        create_temp_table(cursor, 'demographics_dimension_temp',
            'local_authority_code VARCHAR(255), date DATE, area_sq_km FLOAT, age_0_20 FLOAT, age_20_40 FLOAT, age_40_60 FLOAT, age_60_plus FLOAT, female_population FLOAT, all_ages FLOAT, male_population FLOAT, age_dependency_ratio FLOAT, est_num_households_with_child FLOAT')
        
        insert_data(cursor, 'demographics_dimension_temp',
                    ['local_authority_code', 'date', 'area_sq_km', 'age_0_20', 'age_20_40', 'age_40_60', 'age_60_plus', 'female_population', 'all_ages', 'male_population', 'age_dependency_ratio', 'est_num_households_with_child'],
                    initial_load_df)
        
        cursor.execute("""
        INSERT INTO demographics_dimension (local_authority_code, date, area_sq_km, age_0_20, age_20_40, age_40_60, age_60_plus, female_population, all_ages, male_population, age_dependency_ratio, est_num_households_with_child, start_date, end_date, is_current)
        SELECT temp.local_authority_code, temp.date, temp.area_sq_km, temp.age_0_20, temp.age_20_40, temp.age_40_60, temp.age_60_plus, temp.female_population, temp.all_ages, temp.male_population, temp.age_dependency_ratio, temp.est_num_households_with_child, NOW() AS start_date, NULL AS end_date, TRUE AS is_current
        FROM (
            SELECT temp.*, 
                   ROW_NUMBER() OVER (PARTITION BY temp.local_authority_code ORDER BY temp.date DESC) AS rn
            FROM demographics_dimension_temp temp
        ) temp
        WHERE temp.rn = 1;
        """)
        
        cursor.execute("""
        INSERT INTO demographics_dimension (local_authority_code, date, area_sq_km, age_0_20, age_20_40, age_40_60, age_60_plus, female_population, all_ages, male_population, age_dependency_ratio, est_num_households_with_child, start_date, end_date, is_current)
        SELECT temp.local_authority_code, temp.date, temp.area_sq_km, temp.age_0_20, temp.age_20_40, temp.age_40_60, temp.age_60_plus, temp.female_population, temp.all_ages, temp.male_population, temp.age_dependency_ratio, temp.est_num_households_with_child, NOW() AS start_date, NOW() AS end_date, FALSE AS is_current
        FROM (
            SELECT temp.*, 
                   ROW_NUMBER() OVER (PARTITION BY temp.local_authority_code ORDER BY temp.date DESC) AS rn
            FROM demographics_dimension_temp temp
        ) temp
        WHERE temp.rn > 1;
        """)
        
        drop_temp_table(cursor, 'demographics_dimension_temp')
        connection.commit()
        print("Initial load for demographics_dimension completed.")
        info_logger.info("Initial load for demographics_dimension completed")
        
def load_sales_transaction_fact():
    """Loads data into sales_transactions_fact table."""
    with get_db_connection() as connection:
        cursor = connection.cursor()

        # Create the temporary table
        create_temp_table(cursor, 'sales_transactions_fact_temp',
            'local_authority_code VARCHAR(255), date DATE, price NUMERIC, average_price FLOAT, predicted_price_unscaled FLOAT, index FLOAT, average_price_pct_change FLOAT, annual_change_percent FLOAT, new_price FLOAT, old_price FLOAT, sales_volume FLOAT, sales_volume_log FLOAT, old_sales_volume FLOAT, detached_flat_ratio FLOAT, detached_terraced_ratio FLOAT, semi_detached_price_pct_change FLOAT, detached_semi_detached_ratio FLOAT, detached_price_log FLOAT, semi_detached_price_log FLOAT, flat_price_log FLOAT, terraced_price_pct_change FLOAT, terraced_price_log FLOAT, gdhi FLOAT, deprivation_adjusted_gdhi FLOAT, gdhi_per_capita FLOAT, foo_price FLOAT, cash_price FLOAT, mortgage_price FLOAT, housing_demand_indicator FLOAT, deprivation_reduction_potential FLOAT, flat_price_pct_change FLOAT, detached_price_pct_change FLOAT, average_price_log FLOAT, ftb_price FLOAT')

        # Insert data into the temporary table
        insert_data(cursor, 'sales_transactions_fact_temp',
                    ['local_authority_code', 'date', 'price', 'average_price', 'predicted_price_unscaled', 'index', 'average_price_pct_change', 'annual_change_percent', 'new_price', 'old_price', 'sales_volume', 'sales_volume_log', 'old_sales_volume', 'detached_flat_ratio', 'detached_terraced_ratio', 'semi_detached_price_pct_change', 'detached_semi_detached_ratio', 'detached_price_log', 'semi_detached_price_log', 'flat_price_log', 'terraced_price_pct_change', 'terraced_price_log', 'gdhi', 'deprivation_adjusted_gdhi', 'gdhi_per_capita', 'foo_price', 'cash_price', 'mortgage_price', 'housing_demand_indicator', 'deprivation_reduction_potential', 'flat_price_pct_change', 'detached_price_pct_change', 'average_price_log', 'ftb_price'],
                    initial_load_df)

        # Insert the latest records into the fact table
        cursor.execute("""
        INSERT INTO sales_transactions_fact (
            local_authority_code, date, district_id, region_id, property_type_id, vehicle_id, 
            rental_id, demographics_id, education_employment_id, date_key, price, average_price, 
            predicted_price_unscaled, index, average_price_pct_change, annual_change_percent, 
            new_price, old_price, sales_volume, sales_volume_log, 
            old_sales_volume, detached_flat_ratio, detached_terraced_ratio, semi_detached_price_pct_change, 
            detached_semi_detached_ratio, detached_price_log, semi_detached_price_log, 
            flat_price_log, terraced_price_pct_change, terraced_price_log, gdhi, 
            deprivation_adjusted_gdhi, gdhi_per_capita, foo_price, cash_price, mortgage_price, 
            housing_demand_indicator, deprivation_reduction_potential, flat_price_pct_change, detached_price_pct_change, average_price_log, ftb_price, start_date, is_current
        )
        SELECT 
            temp.local_authority_code, temp.date, 
            dd.district_id, rd.region_id, ptd.property_type_id, vd.vehicle_id, 
            rentald.rental_id, ddemo.demographics_id, eed.education_employment_id, d.date_key,
            temp.price, temp.average_price, temp.predicted_price_unscaled, temp.index, 
            temp.average_price_pct_change, temp.annual_change_percent,
            temp.new_price, temp.old_price, temp.sales_volume, temp.sales_volume_log, 
            temp.old_sales_volume, temp.detached_flat_ratio, temp.detached_terraced_ratio, 
            temp.semi_detached_price_pct_change, temp.detached_semi_detached_ratio, 
            temp.detached_price_log, temp.semi_detached_price_log, temp.flat_price_log, 
            temp.terraced_price_pct_change, temp.terraced_price_log, temp.gdhi, 
            temp.deprivation_adjusted_gdhi, temp.gdhi_per_capita, temp.foo_price, temp.cash_price, 
            temp.mortgage_price, temp.housing_demand_indicator, temp.deprivation_reduction_potential,
            temp.flat_price_pct_change, temp.detached_price_pct_change, temp.average_price_log, temp.ftb_price,
            NOW() AS start_date, TRUE AS is_current
        FROM 
            sales_transactions_fact_temp temp
        JOIN 
            district_dimension dd ON dd.local_authority_code = temp.local_authority_code AND dd.date = temp.date
        JOIN 
            region_dimension rd ON rd.local_authority_code = temp.local_authority_code
        JOIN 
            property_type_dimension ptd ON ptd.local_authority_code = temp.local_authority_code AND ptd.date = temp.date
        JOIN 
            vehicle_dimension vd ON vd.local_authority_code = temp.local_authority_code
        JOIN 
            rental_dimension rentald ON rentald.local_authority_code = temp.local_authority_code AND rentald.date = temp.date
        JOIN 
            demographics_dimension ddemo ON ddemo.local_authority_code = temp.local_authority_code AND ddemo.date = temp.date
        JOIN 
            education_employment_dimension eed ON eed.local_authority_code = temp.local_authority_code AND eed.date = temp.date
        JOIN 
            date_dimension d ON d.date = temp.date;
        """)

        # Inserting the historical records with end_date set
        cursor.execute("""
        UPDATE sales_transactions_fact
        SET end_date = NOW(), is_current = FALSE
        WHERE local_authority_code IN (
            SELECT local_authority_code 
            FROM sales_transactions_fact_temp
        ) 
        AND is_current = TRUE
        AND date < (SELECT MAX(date) FROM sales_transactions_fact_temp WHERE local_authority_code = sales_transactions_fact.local_authority_code);
        """)

        # Droping the temporary table
        drop_temp_table(cursor, 'sales_transactions_fact_temp')
        
        connection.commit()
        print("Initial load for sales_transactions_fact completed.")
        info_logger.info("Initial load for sales_transactions_fact completed")
        


In [9]:
def main():
    """
    Main function to load data into all dimension and fact tables. 
    Stops execution if an error occurs during any data loading step.
    """
    try:
        # Loading data into the region_dimension table
        load_region_dimension()
        print("Loaded region dimension.")

        # Loading data into the date_dimension table
        load_date_dimension()
        print("Loaded date dimension.")

        # Loading data into the rental_dimension table
        load_rental_dimension()
        print("Loaded rental dimension.")

        # Loading data into the vehicle_dimension table
        load_vehicle_dimension()
        print("Loaded vehicle dimension.")

        # Loading data into the district_dimension table
        load_district_dimension()
        print("Loaded district dimension.")

        # Loading data into the property_type_dimension table
        load_property_type_dimension()
        print("Loaded property type dimension.")

        # Loading data into the education_employment_dimension table
        load_education_employment_dimension()
        print("Loaded education and employment dimension.")

        # Load data into the demographics_dimension table
        load_demographics_dimension()
        print("Loaded demographics dimension.")

        # Confirming successful completion
        print("All dimensions have been loaded successfully.")
        
        # Loading data into the sales_transactions_fact table
        load_sales_transaction_fact()
        print("Loaded sales transactions fact.")

        # Confirming successful completion
        print("All facts have been loaded successfully.")
               
    except Exception as e:
        # If an error occurs, printing an error message and stoping execution
        print(f"An error occurred: {e}")

# Entry point for script execution
if __name__ == "__main__":
    # Making a copy of the data to be used in the loading functions
    initial_load_df = data_load_df.copy()

    # Running the main function to start the data loading process
    main()


Initial load for region_dimension completed.


Loaded region dimension.
Initial load for date_dimension completed.
Loaded date dimension.


Initial load for rental_dimension completed.
Loaded rental dimension.
Initial load for vehicle_dimension completed.
Loaded vehicle dimension.


Initial load for district_dimension completed.
Loaded district dimension.


Initial load for property_type_dimension completed.
Loaded property type dimension.


Initial load for education_employment_dimension completed.
Loaded education and employment dimension.


Initial load for demographics_dimension completed.
Loaded demographics dimension.
All dimensions have been loaded successfully.


Initial load for sales_transactions_fact completed.
Loaded sales transactions fact.
All facts have been loaded successfully.


In [10]:

# Connect to PostgreSQL database
connection = psycopg2.connect(
    dbname='UK Real Estate DB',
    user='postgres',
    password='123!@*qweQWE',
    host='localhost',
    port='5432'
)
cursor = connection.cursor()

# Listing target tables and their SCD types
tables_metadata = [
    {"table_name": "region_dimension", "scd_type": 1},
    {"table_name": "date_dimension", "scd_type": 0},
    {"table_name": "sales_transactions_fact", "scd_type": 2},
    {"table_name": "vehicle_dimension", "scd_type": 1},
    {"table_name": "rental_dimension", "scd_type": 2},
    {"table_name": "demographics_dimension", "scd_type": 2},
    {"table_name": "education_employment_dimension", "scd_type": 2},
    {"table_name": "property_type_dimension", "scd_type": 2},
    {"table_name": "district_dimension", "scd_type": 2}
]

# Setting the extraction timestamp as the current time
last_extracted_date = datetime.now()

# Inserting metadata records
for table in tables_metadata:
    insert_metadata_query = """
    INSERT INTO metadata (table_name, last_extracted_date, scd_type, last_modified_date, created_date)
    VALUES (%s, %s, %s, %s, %s);
    """
    cursor.execute(insert_metadata_query, (
        table['table_name'],
        last_extracted_date,
        table['scd_type'],
        last_extracted_date,
        last_extracted_date
    ))

# Commiting the transaction
connection.commit()

# Close the connection
cursor.close()
connection.close()

print("Metadata for all tables updated successfully.")
info_logger.info("Metadata for all tables updated successfully")

Metadata for all tables updated successfully.
