In [3]:
import os
import pathlib
import pandas as pd
from snowflake.snowpark import Session
from dotenv import load_dotenv

load_dotenv()

# Snowflake connection parameters from env variables
snowflake_params = {
    "account": os.getenv("SNOWFLAKE_ACCOUNT"),
    "user": os.getenv("SNOWFLAKE_USER"),
    "password": os.getenv("SNOWFLAKE_PASSWORD"),
    "role": os.getenv("SNOWFLAKE_ROLE"),
    "warehouse": os.getenv("SNOWFLAKE_WAREHOUSE"),
    "database": os.getenv("SNOWFLAKE_DATABASE"),
    "schema": os.getenv("SNOWFLAKE_SCHEMA"),  # Raw schema for stage
}

new_database = snowflake_params["database"]
raw_schema = snowflake_params["schema"]
cleaned_schema = "ERD_SCHEMA_CLEANED"
final_schema = "ERD_SCHEMA_STAR"

# This is the NEW FINAL TRANSFORMED stage exclusively for the star schema transformed data
final_stage = "FINAL_TRANSFORMED_STAGE"
cleaning_stage = "DATA_CLEANING_STAGE"

# Utility function to generate incremental surrogate keys

def generate_surrogate_keys(df, key_name="id"):
    df = df.copy()
    df[key_name] = range(1, len(df) + 1)
    return df

# === Dim_Location transformation ===
def create_dim_location(df_invoice: pd.DataFrame, df_customer: pd.DataFrame) -> pd.DataFrame:
    # Extract distinct location attributes from Invoice and Customer billing info
    loc_invoice = df_invoice[["BILLINGCITY", "BILLINGSTATE", "BILLINGCOUNTRY", "BILLINGPOSTALCODE"]].drop_duplicates()
    loc_invoice.columns = ["CITY", "STATE", "COUNTRY", "POSTALCODE"]
    loc_customer = df_customer[["CITY", "STATE", "COUNTRY", "POSTALCODE"]].drop_duplicates()
    loc_all = pd.concat([loc_invoice, loc_customer], ignore_index=True).drop_duplicates().reset_index(drop=True)

    # Assign surrogate integer keys
    loc_all = generate_surrogate_keys(loc_all, "LOCATION_ID")
    return loc_all

# === FactSales transformation ===
def create_fact_sales(
    df_invoiceline: pd.DataFrame,
    df_invoice: pd.DataFrame,
    df_track: pd.DataFrame,
    df_album: pd.DataFrame,
    df_artist: pd.DataFrame,
    df_customer: pd.DataFrame,
    df_employee: pd.DataFrame,
    df_playlisttrack: pd.DataFrame,
    df_dim_location: pd.DataFrame,
) -> pd.DataFrame:
    # Join InvoiceLine to Invoice
    fact = df_invoiceline.merge(df_invoice, on="INVOICEID", how="left", suffixes=("", "_inv"))

    # Join to Track
    fact = fact.merge(df_track, left_on="TRACKID", right_on="TRACKID", how="left", suffixes=("", "_trk"))

    # Ensure ALBUMID columns are int
    fact['ALBUMID'] = pd.to_numeric(fact['ALBUMID'], errors='coerce').fillna(0).astype(int)
    df_album['ALBUMID'] = pd.to_numeric(df_album['ALBUMID'], errors='coerce').fillna(0).astype(int)

    # Join Track -> Album
    fact = fact.merge(df_album, left_on="ALBUMID", right_on="ALBUMID", how="left", suffixes=("", "_alb"))

    # Ensure ARTISTID columns are int
    fact['ARTISTID'] = pd.to_numeric(fact['ARTISTID'], errors='coerce').fillna(0).astype(int)
    df_artist['ARTISTID'] = pd.to_numeric(df_artist['ARTISTID'], errors='coerce').fillna(0).astype(int)

    # Join Album -> Artist
    fact = fact.merge(df_artist, left_on="ARTISTID", right_on="ARTISTID", how="left", suffixes=("", "_art"))

    # Ensure CUSTOMERID columns are int
    fact['CUSTOMERID'] = pd.to_numeric(fact['CUSTOMERID'], errors='coerce').fillna(0).astype(int)
    df_customer['CUSTOMERID'] = pd.to_numeric(df_customer['CUSTOMERID'], errors='coerce').fillna(0).astype(int)

    # Join InvoiceLine/Invoice -> Customer
    fact = fact.merge(df_customer, left_on="CUSTOMERID", right_on="CUSTOMERID", how="left", suffixes=("", "_cust"))

    # Ensure SUPPORTREPID and EMPLOYEEID columns are int
    fact['SUPPORTREPID'] = pd.to_numeric(fact.get('SUPPORTREPID', 0), errors='coerce').fillna(0).astype(int)
    df_employee['EMPLOYEEID'] = pd.to_numeric(df_employee['EMPLOYEEID'], errors='coerce').fillna(0).astype(int)

    # Join Customer -> Employee (SupportRep)
    fact = fact.merge(df_employee, left_on="SUPPORTREPID", right_on="EMPLOYEEID", how="left", suffixes=("", "_emp"))

    # Ensure TRACKID columns are int for playlisttrack join
    fact['TRACKID'] = pd.to_numeric(fact['TRACKID'], errors='coerce').fillna(0).astype(int)
    df_playlisttrack['TRACKID'] = pd.to_numeric(df_playlisttrack['TRACKID'], errors='coerce').fillna(0).astype(int)

    # Join PlaylistTrack on TrackID to get Playlist_id
    fact = fact.merge(df_playlisttrack, left_on="TRACKID", right_on="TRACKID", how="left", suffixes=("", "_plt"))

    # Join with Dim_Location on city/state/country/postalcode to get Location_id
    fact = fact.merge(
        df_dim_location,
        left_on=["BILLINGCITY", "BILLINGSTATE", "BILLINGCOUNTRY", "BILLINGPOSTALCODE"],
        right_on=["CITY", "STATE", "COUNTRY", "POSTALCODE"],
        how="left",
    )

    # Calculate Total_amount
    fact["TOTAL_AMOUNT"] = fact["UNITPRICE"] * fact["QUANTITY"]

    # Select and rename columns per star schema
    fact_sales = fact[
        [
            "INVOICELINEID",        # InvoiceLine_id (PK)
            "LOCATION_ID",          # Location_id (FK)
            "INVOICEID",            # Invoice_id (FK)
            "TRACKID",              # Track_id (FK)
            "CUSTOMERID",           # Customer_id (FK)
            "EMPLOYEEID",           # Employee_id (FK)
            "MEDIATYPEID",          # MediaType_id (FK)
            "INVOICEDATE",          # Invoice_date
            "UNITPRICE",            # Unit_price
            "QUANTITY",             # Quantity
            "TOTAL_AMOUNT",         # Total_amount
            "PLAYLISTID",           # Playlist_id (FK)
            "ALBUMID",              # Album_id (FK)
            "ARTISTID",             # Artist_id (FK)
            "MILLISECONDS",         # Milliseconds
            "BYTES",                # Bytes
        ]
    ].copy()

    # Rename columns to match star schema
    fact_sales.rename(
        columns={
            "INVOICELINEID": "INVOICELINE_ID",
            "INVOICEID": "INVOICE_ID",
            "TRACKID": "TRACK_ID",
            "CUSTOMERID": "CUSTOMER_ID",
            "EMPLOYEEID": "EMPLOYEE_ID",
            "MEDIATYPEID": "MEDIATYPE_ID",
            "INVOICEDATE": "INVOICE_DATE",
            "UNITPRICE": "UNIT_PRICE",
            "QUANTITY": "QUANTITY",
            "TOTAL_AMOUNT": "TOTAL_AMOUNT",
            "PLAYLISTID": "PLAYLIST_ID",
            "ALBUMID": "ALBUM_ID",
            "ARTISTID": "ARTIST_ID",
            "MILLISECONDS": "MILLISECONDS",
            "BYTES": "BYTES",
        },
        inplace=True
    )

    # Convert dates to datetime
    fact_sales["INVOICE_DATE"] = pd.to_datetime(fact_sales["INVOICE_DATE"], errors="coerce")

    return fact_sales
# === Snowflake interaction functions for final STAR schema ===

def create_final_stage(session):
    # Create a NEW STAGE specifically for the final transformed STAR schema data
    sql = f'''
        CREATE STAGE IF NOT EXISTS "{new_database}"."{raw_schema}"."{final_stage}"
        FILE_FORMAT = (TYPE = 'CSV' FIELD_OPTIONALLY_ENCLOSED_BY = '"' SKIP_HEADER = 1)
    '''
    print("Creating final transformed stage if not exists...")
    session.sql(sql).collect()

def create_star_schema_table(session, table_name, create_sql):
    print(f"Creating star schema table {final_schema}.{table_name} if not exists...")
    session.sql(create_sql).collect()

def export_to_csv(df, table_name):
    filename = f"{table_name}_star.csv"
    df.to_csv(filename, index=False)
    print(f"Exported {table_name} star schema data to {filename}")
    return filename

def remove_file_from_stage(session, filename):
    # Remove from the NEW final transformed stage
    remove_sql = f"REMOVE @\"{new_database}\".\"{raw_schema}\".\"{final_stage}\"/{filename}.gz"
    print(f"Removing old staged file {filename}.gz from final transformed stage...")
    session.sql(remove_sql).collect()

def upload_to_stage(session, csv_file):
    csv_path = pathlib.Path(csv_file).resolve().as_posix()
    filename = pathlib.Path(csv_file).name
    remove_file_from_stage(session, filename)
    put_sql = f"PUT 'file://{csv_path}' @\"{new_database}\".\"{raw_schema}\".\"{final_stage}\" AUTO_COMPRESS=TRUE"
    print(f"Uploading {csv_path} to final transformed stage {new_database}.{raw_schema}.{final_stage} ...")
    res = session.sql(put_sql).collect()
    print("PUT result:", res)

def copy_into_star_table(session, table_name, csv_file):
    copy_sql = f'''
        COPY INTO "{final_schema}"."{table_name.upper()}"
        FROM @\"{new_database}\".\"{raw_schema}\".\"{final_stage}\"/{csv_file}.gz
        FILE_FORMAT = (TYPE = 'CSV' FIELD_OPTIONALLY_ENCLOSED_BY='"' SKIP_HEADER=1)
        ON_ERROR = 'CONTINUE'
    '''
    print(f"Copying data into {final_schema}.{table_name.upper()} from final transformed stage file {csv_file}.gz ...")
    res = session.sql(copy_sql).collect()
    print("COPY INTO result:", res)

def truncate_star_table(session, table_name):
    truncate_sql = f'TRUNCATE TABLE "{final_schema}"."{table_name.upper()}"'
    print(f"Truncating table {final_schema}.{table_name.upper()} before loading fresh data...")
    session.sql(truncate_sql).collect()



# === Main ETL pipeline extension for FINAL TRANSFORMED STAR schema ===
import os

def main_star_schema():
    with Session.builder.configs(snowflake_params).create() as session:
        create_final_stage(session)  # Create the NEW final transformed stage

        base_path = os.path.join(os.getcwd(), "ERD_cleaned") + os.sep

        # Load cleaned CSVs needed from ERD_cleaned folder
        df_customer = pd.read_csv(base_path + "Customer_cleaned.csv")
        df_employee = pd.read_csv(base_path + "Employee_cleaned.csv")
        df_artist = pd.read_csv(base_path + "Artist_cleaned.csv")
        df_album = pd.read_csv(base_path + "Album_cleaned.csv")
        df_invoice = pd.read_csv(base_path + "Invoice_cleaned.csv")
        df_invoiceline = pd.read_csv(base_path + "InvoiceLine_cleaned.csv")
        df_track = pd.read_csv(base_path + "Track_cleaned.csv")
        df_playlisttrack = pd.read_csv(base_path + "PlaylistTrack_cleaned.csv")

        # rest of your code unchanged...
        print("Creating Dim_Location...")
        df_dim_location = create_dim_location(df_invoice, df_customer)

        print("Creating FactSales...")
        df_fact_sales = create_fact_sales(
            df_invoiceline,
            df_invoice,
            df_track,
            df_album,
            df_artist,
            df_customer,
            df_employee,
            df_playlisttrack,
            df_dim_location,
        )

        # Export and load Dim_Location
        loc_csv = export_to_csv(df_dim_location, "Dim_Location")
        upload_to_stage(session, loc_csv)
        create_star_schema_table(session, "Dim_Location", f"CREATE TABLE IF NOT EXISTS \"{final_schema}\".\"Dim_Location\" (LOCATION_ID INT PRIMARY KEY, CITY STRING, STATE STRING, COUNTRY STRING, POSTALCODE STRING)")
        truncate_star_table(session, "Dim_Location")
        copy_into_star_table(session, "Dim_Location", loc_csv)

        # Export and load FactSales
        fact_csv = export_to_csv(df_fact_sales, "FactSales")
        upload_to_stage(session, fact_csv)
        create_star_schema_table(session, "FactSales", 
            f"CREATE TABLE IF NOT EXISTS \"{final_schema}\".\"FactSales\" ("
            "INVOICELINE_ID INT PRIMARY KEY, LOCATION_ID INT, INVOICE_ID INT, TRACK_ID INT, CUSTOMER_ID INT, "
            "EMPLOYEE_ID INT, MEDIATYPE_ID INT, INVOICE_DATE DATE, UNIT_PRICE FLOAT, QUANTITY INT, TOTAL_AMOUNT FLOAT, "
            "PLAYLIST_ID INT, ALBUM_ID INT, ARTIST_ID INT, MILLISECONDS INT, BYTES INT)"
        )
        truncate_star_table(session, "FactSales")
        copy_into_star_table(session, "FactSales", fact_csv)

        print("Final transformed star schema tables loaded successfully.")

if __name__ == "__main__":
    main_star_schema()


Creating final transformed stage if not exists...
Creating Dim_Location...
Creating FactSales...
Exported Dim_Location star schema data to Dim_Location_star.csv
Removing old staged file Dim_Location_star.csv.gz from final transformed stage...
Uploading E:/IBA_MS_DS 2026/Data WareHousing and Analysis/BI_project/Dim_Location_star.csv to final transformed stage CHINOOK_DATABASE.ERD_SCHEMA.FINAL_TRANSFORMED_STAGE ...
PUT result: [Row(source='Dim_Location_star.csv', target='Dim_Location_star.csv.gz', source_size=2042, target_size=1248, source_compression='NONE', target_compression='GZIP', status='UPLOADED', message='')]
Creating star schema table ERD_SCHEMA_STAR.Dim_Location if not exists...


SnowparkSQLException: (1304): 01bc6a21-0001-2863-0001-824a000aa7d2: 002003 (02000): SQL compilation error:
Schema 'CHINOOK_DATABASE.ERD_SCHEMA_STAR' does not exist or not authorized.

In [None]:
import os
import pathlib
import pandas as pd
from snowflake.snowpark import Session
from dotenv import load_dotenv

load_dotenv()

# Snowflake connection parameters from env variables
snowflake_params = {
    "account": os.getenv("SNOWFLAKE_ACCOUNT"),
    "user": os.getenv("SNOWFLAKE_USER"),
    "password": os.getenv("SNOWFLAKE_PASSWORD"),
    "role": os.getenv("SNOWFLAKE_ROLE"),
    "warehouse": os.getenv("SNOWFLAKE_WAREHOUSE"),
    "database": os.getenv("SNOWFLAKE_DATABASE"),
    "schema": os.getenv("SNOWFLAKE_SCHEMA"),  # Raw schema for stage
}

new_database = snowflake_params["database"]
raw_schema = snowflake_params["schema"]
cleaned_schema = "ERD_SCHEMA_CLEANED"
final_schema = "ERD_SCHEMA_STAR"

final_stage = "FINAL_TRANSFORMED_STAGE"
cleaning_stage = "DATA_CLEANING_STAGE"

def generate_surrogate_keys(df, key_name="id"):
    df = df.copy()
    df[key_name] = range(1, len(df) + 1)
    return df

def create_dim_location(df_invoice: pd.DataFrame, df_customer: pd.DataFrame) -> pd.DataFrame:
    loc_invoice = df_invoice[["BILLINGCITY", "BILLINGSTATE", "BILLINGCOUNTRY", "BILLINGPOSTALCODE"]].drop_duplicates()
    loc_invoice.columns = ["CITY", "STATE", "COUNTRY", "POSTALCODE"]
    loc_customer = df_customer[["CITY", "STATE", "COUNTRY", "POSTALCODE"]].drop_duplicates()
    loc_all = pd.concat([loc_invoice, loc_customer], ignore_index=True).drop_duplicates().reset_index(drop=True)
    loc_all = generate_surrogate_keys(loc_all, "LOCATION_ID")
    return loc_all

def create_fact_sales(
    df_invoiceline, df_invoice, df_track, df_album, df_artist, df_customer, df_employee, df_playlisttrack, df_dim_location,
):
    fact = df_invoiceline.merge(df_invoice, on="INVOICEID", how="left", suffixes=("", "_inv"))
    fact = fact.merge(df_track, left_on="TRACKID", right_on="TRACKID", how="left", suffixes=("", "_trk"))

    fact['ALBUMID'] = pd.to_numeric(fact['ALBUMID'], errors='coerce').fillna(0).astype(int)
    df_album['ALBUMID'] = pd.to_numeric(df_album['ALBUMID'], errors='coerce').fillna(0).astype(int)
    fact = fact.merge(df_album, left_on="ALBUMID", right_on="ALBUMID", how="left", suffixes=("", "_alb"))

    fact['ARTISTID'] = pd.to_numeric(fact['ARTISTID'], errors='coerce').fillna(0).astype(int)
    df_artist['ARTISTID'] = pd.to_numeric(df_artist['ARTISTID'], errors='coerce').fillna(0).astype(int)
    fact = fact.merge(df_artist, left_on="ARTISTID", right_on="ARTISTID", how="left", suffixes=("", "_art"))

    fact['CUSTOMERID'] = pd.to_numeric(fact['CUSTOMERID'], errors='coerce').fillna(0).astype(int)
    df_customer['CUSTOMERID'] = pd.to_numeric(df_customer['CUSTOMERID'], errors='coerce').fillna(0).astype(int)
    fact = fact.merge(df_customer, left_on="CUSTOMERID", right_on="CUSTOMERID", how="left", suffixes=("", "_cust"))

    fact['SUPPORTREPID'] = pd.to_numeric(fact.get('SUPPORTREPID', 0), errors='coerce').fillna(0).astype(int)
    df_employee['EMPLOYEEID'] = pd.to_numeric(df_employee['EMPLOYEEID'], errors='coerce').fillna(0).astype(int)
    fact = fact.merge(df_employee, left_on="SUPPORTREPID", right_on="EMPLOYEEID", how="left", suffixes=("", "_emp"))

    fact['TRACKID'] = pd.to_numeric(fact['TRACKID'], errors='coerce').fillna(0).astype(int)
    df_playlisttrack['TRACKID'] = pd.to_numeric(df_playlisttrack['TRACKID'], errors='coerce').fillna(0).astype(int)
    fact = fact.merge(df_playlisttrack, left_on="TRACKID", right_on="TRACKID", how="left", suffixes=("", "_plt"))

    fact = fact.merge(
        df_dim_location,
        left_on=["BILLINGCITY", "BILLINGSTATE", "BILLINGCOUNTRY", "BILLINGPOSTALCODE"],
        right_on=["CITY", "STATE", "COUNTRY", "POSTALCODE"],
        how="left",
    )

    fact["TOTAL_AMOUNT"] = fact["UNITPRICE"] * fact["QUANTITY"]

    fact_sales = fact[
        [
            "INVOICELINEID", "LOCATION_ID", "INVOICEID", "TRACKID", "CUSTOMERID", "EMPLOYEEID",
            "MEDIATYPEID", "INVOICEDATE", "UNITPRICE", "QUANTITY", "TOTAL_AMOUNT",
            "PLAYLISTID", "ALBUMID", "ARTISTID", "MILLISECONDS", "BYTES",
        ]
    ].copy()

    fact_sales.rename(
        columns={
            "INVOICELINEID": "INVOICELINE_ID",
            "INVOICEID": "INVOICE_ID",
            "TRACKID": "TRACK_ID",
            "CUSTOMERID": "CUSTOMER_ID",
            "EMPLOYEEID": "EMPLOYEE_ID",
            "MEDIATYPEID": "MEDIATYPE_ID",
            "INVOICEDATE": "INVOICE_DATE",
            "UNITPRICE": "UNIT_PRICE",
            "QUANTITY": "QUANTITY",
            "TOTAL_AMOUNT": "TOTAL_AMOUNT",
            "PLAYLISTID": "PLAYLIST_ID",
            "ALBUMID": "ALBUM_ID",
            "ARTISTID": "ARTIST_ID",
            "MILLISECONDS": "MILLISECONDS",
            "BYTES": "BYTES",
        },
        inplace=True
    )

    fact_sales["INVOICE_DATE"] = pd.to_datetime(fact_sales["INVOICE_DATE"], errors="coerce")

    return fact_sales

def create_final_stage(session):
    sql = f'''
        CREATE STAGE IF NOT EXISTS "{new_database}"."{raw_schema}"."{final_stage}"
        FILE_FORMAT = (TYPE = 'CSV' FIELD_OPTIONALLY_ENCLOSED_BY = '"' SKIP_HEADER = 1)
    '''
    print("Creating final transformed stage if not exists...")
    session.sql(sql).collect()

def create_star_schema_table(session, table_name, create_sql):
    print(f"Creating star schema table {final_schema}.{table_name} if not exists...")
    session.sql(create_sql).collect()

def export_to_csv(df, table_name):
    folder = "ERD_cleaned"
    os.makedirs(folder, exist_ok=True)
    filename = os.path.join(folder, f"{table_name}_star.csv")
    df.to_csv(filename, index=False)
    print(f"Exported {table_name} star schema data to {filename}")
    return filename

def remove_file_from_stage(session, filename):
    remove_sql = f"REMOVE @\"{new_database}\".\"{raw_schema}\".\"{final_stage}\"/{filename}.gz"
    print(f"Removing old staged file {filename}.gz from final transformed stage...")
    session.sql(remove_sql).collect()

def upload_to_stage(session, csv_file):
    csv_path = pathlib.Path(csv_file).resolve().as_posix()
    filename = pathlib.Path(csv_file).name
    remove_file_from_stage(session, filename)
    put_sql = f"PUT 'file://{csv_path}' @\"{new_database}\".\"{raw_schema}\".\"{final_stage}\" AUTO_COMPRESS=TRUE"
    print(f"Uploading {csv_path} to final transformed stage {new_database}.{raw_schema}.{final_stage} ...")
    res = session.sql(put_sql).collect()
    print("PUT result:", res)

def copy_into_star_table(session, table_name, csv_file):
    copy_sql = f'''
        COPY INTO "{final_schema}"."{table_name.upper()}"
        FROM @\"{new_database}\".\"{raw_schema}\".\"{final_stage}\"/{csv_file}.gz
        FILE_FORMAT = (TYPE = 'CSV' FIELD_OPTIONALLY_ENCLOSED_BY='"' SKIP_HEADER=1)
        ON_ERROR = 'CONTINUE'
    '''
    print(f"Copying data into {final_schema}.{table_name.upper()} from final transformed stage file {csv_file}.gz ...")
    res = session.sql(copy_sql).collect()
    print("COPY INTO result:", res)

def truncate_star_table(session, table_name):
    truncate_sql = f'TRUNCATE TABLE "{final_schema}"."{table_name.upper()}"'
    print(f"Truncating table {final_schema}.{table_name.upper()} before loading fresh data...")
    session.sql(truncate_sql).collect()
def create_final_schema(session):
    sql = f'CREATE SCHEMA IF NOT EXISTS "{new_database}"."{final_schema}"'
    print(f"Creating final star schema {final_schema} if not exists...")
    session.sql(sql).collect()

def main_star_schema():
    with Session.builder.configs(snowflake_params).create() as session:
        create_final_schema(session)   # <--- ensure schema exists

        #create_final_stage(session)    # create stage
        # rest of your code ...
        create_final_stage(session)

        base_path = os.path.join(os.getcwd(), "ERD_cleaned") + os.sep

        df_customer = pd.read_csv(base_path + "Customer_cleaned.csv")
        df_employee = pd.read_csv(base_path + "Employee_cleaned.csv")
        df_artist = pd.read_csv(base_path + "Artist_cleaned.csv")
        df_album = pd.read_csv(base_path + "Album_cleaned.csv")
        df_invoice = pd.read_csv(base_path + "Invoice_cleaned.csv")
        df_invoiceline = pd.read_csv(base_path + "InvoiceLine_cleaned.csv")
        df_track = pd.read_csv(base_path + "Track_cleaned.csv")
        df_playlisttrack = pd.read_csv(base_path + "PlaylistTrack_cleaned.csv")

        print("Creating Dim_Location...")
        df_dim_location = create_dim_location(df_invoice, df_customer)

        print("Creating FactSales...")
        df_fact_sales = create_fact_sales(
            df_invoiceline,
            df_invoice,
            df_track,
            df_album,
            df_artist,
            df_customer,
            df_employee,
            df_playlisttrack,
            df_dim_location,
        )

        loc_csv = export_to_csv(df_dim_location, "Dim_Location")
        upload_to_stage(session, loc_csv)
        create_star_schema_table(session, "Dim_Location", f"CREATE TABLE IF NOT EXISTS \"{final_schema}\".\"Dim_Location\" (LOCATION_ID INT PRIMARY KEY, CITY STRING, STATE STRING, COUNTRY STRING, POSTALCODE STRING)")
        truncate_star_table(session, "Dim_Location")
        copy_into_star_table(session, "Dim_Location", loc_csv)

        fact_csv = export_to_csv(df_fact_sales, "FactSales")
        upload_to_stage(session, fact_csv)
        create_star_schema_table(session, "FactSales", 
            f"CREATE TABLE IF NOT EXISTS \"{final_schema}\".\"FactSales\" ("
            "INVOICELINE_ID INT PRIMARY KEY, LOCATION_ID INT, INVOICE_ID INT, TRACK_ID INT, CUSTOMER_ID INT, "
            "EMPLOYEE_ID INT, MEDIATYPE_ID INT, INVOICE_DATE DATE, UNIT_PRICE FLOAT, QUANTITY INT, TOTAL_AMOUNT FLOAT, "
            "PLAYLIST_ID INT, ALBUM_ID INT, ARTIST_ID INT, MILLISECONDS INT, BYTES INT)"
        )
        truncate_star_table(session, "FactSales")
        copy_into_star_table(session, "FactSales", fact_csv)

        print("Final transformed star schema tables loaded successfully.")

if __name__ == "__main__":
    main_star_schema()


Creating final transformed stage if not exists...
Creating Dim_Location...
Creating FactSales...
Exported Dim_Location star schema data to ERD_cleaned\Dim_Location_star.csv
Removing old staged file Dim_Location_star.csv.gz from final transformed stage...
Uploading E:/IBA_MS_DS 2026/Data WareHousing and Analysis/BI_project/ERD_cleaned/Dim_Location_star.csv to final transformed stage CHINOOK_DATABASE.ERD_SCHEMA.FINAL_TRANSFORMED_STAGE ...
PUT result: [Row(source='Dim_Location_star.csv', target='Dim_Location_star.csv.gz', source_size=2042, target_size=1248, source_compression='NONE', target_compression='GZIP', status='UPLOADED', message='')]
Creating star schema table ERD_SCHEMA_STAR.Dim_Location if not exists...


SnowparkSQLException: (1304): 01bc6a3c-0001-27cf-0001-824a000ab4ce: 002003 (02000): SQL compilation error:
Schema 'CHINOOK_DATABASE.ERD_SCHEMA_STAR' does not exist or not authorized.