In [1]:
import pandas as pd
from sqlalchemy import create_engine,text
import logging

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Database connection strings (replace with actual details)
source_db = 'postgresql://postgres:postgres@pgdb:5432/Adventureworks'
target_db = 'postgresql://postgres:postgres@pgdb:5432/AdventureworksDW'

# Create SQLAlchemy engines for Source and Target Databases
source_engine = create_engine(source_db)
target_engine = create_engine(target_db)

def extract(query, engine):
    """Extract data from the source database."""
    try:
        df = pd.read_sql_query(query, con=engine)
        logging.info(f"Data extracted successfully for query: {query}")
        return df
    except Exception as e:
        logging.error(f"Error extracting data: {e}")
        raise

def load(df, table_name, engine):
    """Load data into the target database."""
    try:
        df.to_sql(table_name, con=engine, if_exists='replace', index=False)
        logging.info(f"Data loaded into {table_name}")
    except Exception as e:
        logging.error(f"Error loading data into {table_name}: {e}")
        raise

# Transformation function for DimCurrency
def populate_dim_currency(currency_df, country_region_currency_df):
    dim_currency_df = currency_df.merge(country_region_currency_df, left_on='currencycode', right_on='currencycode', how='inner')
    dim_currency_df = dim_currency_df[['currencycode', 'name']].drop_duplicates().rename(columns={'currencycode': 'currencykey', 'name': 'currencyname'})
    # Additional logic to populate CurrencyAlternate if available or required
    return dim_currency_df

# Transformation function for FactCurrencyRate
def populate_fact_currency_rate(currency_rate_df, dim_currency_df):
    # Correct the column names based on the actual DataFrame columns
    fact_currency_rate_df = currency_rate_df.merge(dim_currency_df, left_on='tocurrencycode', right_on='currencykey', how='inner')
    
    fact_currency_rate_df = fact_currency_rate_df[['currencyrateid', 'currencykey', 'averagerate']].rename(columns={'averagerate': 'endofdayrate'})
    return fact_currency_rate_df
    
# Transformation function for DimDepartmentGroup
def transform_dim_department_group(department_df):
    dim_department_group_df = department_df[['departmentid', 'groupname']].copy().rename(columns={'departmentid': 'departmentgroupkey', 'groupname': 'departmentgroupname'})
    dim_department_group_df['parentdepartmentgroupkey'] = None  # Placeholder, adjust as needed
    return dim_department_group_df



In [2]:
if __name__ == "__main__": # ! Unnecessary, we're no running python script !  
    # Extract source data
    df_country_region_currency = extract('SELECT * FROM sales.countryregioncurrency', source_engine)
    df_currency = extract('SELECT * FROM sales.currency', source_engine)
    df_currency_rate = extract('SELECT * FROM sales.currencyrate', source_engine)
    df_department = extract('SELECT * FROM humanresources.department', source_engine)

    # Transform and load data for DimCurrency
  

2024-03-19 02:21:01,320 - INFO - Data extracted successfully for query: SELECT * FROM sales.countryregioncurrency
2024-03-19 02:21:01,321 - INFO - Data extracted successfully for query: SELECT * FROM sales.currency
2024-03-19 02:21:01,378 - INFO - Data extracted successfully for query: SELECT * FROM sales.currencyrate
2024-03-19 02:21:01,379 - INFO - Data extracted successfully for query: SELECT * FROM humanresources.department


In [3]:
df_dim_currency = populate_dim_currency(df_currency, df_country_region_currency)
load(df_dim_currency, 'DimCurrency', target_engine)


2024-03-19 02:21:01,419 - INFO - Data loaded into DimCurrency


In [4]:
df_currency_rate.head()

Unnamed: 0,currencyrateid,currencyratedate,fromcurrencycode,tocurrencycode,averagerate,endofdayrate,modifieddate
0,1,2011-05-31,USD,ARS,1.0,1.0002,2011-05-31
1,2,2011-05-31,USD,AUD,1.5491,1.55,2011-05-31
2,3,2011-05-31,USD,BRL,1.9379,1.9419,2011-05-31
3,4,2011-05-31,USD,CAD,1.4641,1.4683,2011-05-31
4,5,2011-05-31,USD,CNY,8.2781,8.2784,2011-05-31


In [5]:
df_dim_currency.head()

Unnamed: 0,currencykey,currencyname
0,AED,Emirati Dirham
1,ARS,Argentine Peso
2,ATS,Shilling
3,AUD,Australian Dollar
4,BBD,Barbados Dollar


In [6]:
# Transform and load data for FactCurrencyRate
df_fact_currency_rate = populate_fact_currency_rate(df_currency_rate, df_dim_currency)
load(df_fact_currency_rate, 'FactCurrencyRate', target_engine)


2024-03-19 02:21:01,536 - INFO - Data loaded into FactCurrencyRate


In [7]:

# Transform and load data for DimDepartmentGroup
df_dim_department_group = transform_dim_department_group(df_department)
load(df_dim_department_group, 'DimDepartmentGroup', target_engine)

# Note: Transformation and loading for a potential FactFinance table would follow here,
# depending on the availability of financial metrics or transactional data.

2024-03-19 02:21:01,564 - INFO - Data loaded into DimDepartmentGroup


In [8]:
df_dim_department_group.head()

Unnamed: 0,departmentgroupkey,departmentgroupname,parentdepartmentgroupkey
0,1,Research and Development,
1,2,Research and Development,
2,3,Sales and Marketing,
3,4,Sales and Marketing,
4,5,Inventory Management,


In [9]:
query = text("SELECT * FROM DimDepartmentGroup")

with target_engine.connect() as connection:
    result = connection.execute(query)
    df = pd.DataFrame(result.fetchall(), columns=result.keys())

In [10]:
df

Unnamed: 0,departmentgroupkey,parentdepartmentgroupkey,departmentgroupname
0,1,,Corporate
1,2,1.0,Executive General and Administration
2,3,1.0,Inventory Management
3,4,1.0,Manufacturing
4,5,1.0,Quality Assurance
5,6,1.0,Research and Development
6,7,1.0,Sales and Marketing
