In [24]:
from Extractor_Code.s3_extractor import PublicS3Extractor
from Extractor_Code.main_extractor import MainExtractor
from Extractor_Code.database_connector import DatabaseConnector
from Extractor_Code.csv_extractor import CSVExtractor
from Extractor_Code.json_extractor import JSONExtractor 

In [None]:
from dotenv import load_dotenv
import yaml
import os

# adjust this path to where your .env actually lives
load_dotenv(r"C:\Users\ASUS\Desktop\FDE\.env")

config_path = r"C:\Users\ASUS\Desktop\FDE\config.yaml"
with open(config_path) as f:
    raw = f.read()


# expands ${DB_HOST}, ${DB_USER}, etc. using os.environ
expanded = os.path.expandvars(raw)
config = yaml.safe_load(expanded)

# sanity check: should be a dict-of-dicts
print("CONFIG:", config)
print("TYPE of config['database']:", type(config['database']))

CONFIG: {'database': {'host': 'localhost', 'database': 'FDE', 'user': 'postgres', 'password': 'newpassword', 'port': 5432}, 's3': {'bucket_name': 'firstworkshop', 'region': 'us-east-1', 'files': {'JSON/products.json': 'lnd_products_json', 'JSON/sales.json': 'lnd_sales_json', 'CSV/customers.csv': 'lnd_customers_csv', 'CSV/products.csv': 'lnd_products_csv', 'CSV/sales.csv': 'lnd_sales_csv'}}, 'api': {'endpoints': {'https://dummyjson.com/products': 'lnd_products_api', 'https://dummyjson.com/users': 'lnd_users_api'}}}
TYPE of config['database']: <class 'dict'>


In [26]:
# 3) instantiate your connector with that dict
db = DatabaseConnector(config)
csv_extractor = CSVExtractor(db)
main_extractor = MainExtractor(config_path=config_path)
json_extractor = JSONExtractor(db)
s3_extractor = PublicS3Extractor(config, json_extractor, csv_extractor, main_extractor)

In [27]:
table_name = "lnd_sales_csv"  # replace

In [28]:
table_columns = main_extractor.get_table_columns(table_name=table_name)
print("Table columns:", table_columns)

INFO:Extractor_Code.main_extractor:Table landing.lnd_sales_csv has columns: ['id', 'order_number', 'line_item', 'order_date', 'delivery_date', 'customer_key', 'store_key', 'product_key', 'quantity', 'currency_code', 'loaded_at']


Table columns: ['id', 'order_number', 'line_item', 'order_date', 'delivery_date', 'customer_key', 'store_key', 'product_key', 'quantity', 'currency_code', 'loaded_at']


In [29]:
test_list = examples = [
    "customerKey",  
    "StoreID",        
    "orderDate",     
    "XMLData",    
    "userProfileURL",  
    "HTTPRequest",     
    "SSLCertificate",
    "simpleTest",      
    "CSVFileFormat",    
    "lastUpdatedAt", 
]
test_list

['customerKey',
 'StoreID',
 'orderDate',
 'XMLData',
 'userProfileURL',
 'HTTPRequest',
 'SSLCertificate',
 'simpleTest',
 'CSVFileFormat',
 'lastUpdatedAt']

In [30]:
for item in test_list:
    print(CSVExtractor.camel_to_snake(item))

customer_key
store_id
order_date
xml_data
user_profile_url
http_request
ssl_certificate
simple_test
csv_file_format
last_updated_at


In [31]:
files_mapping = config['s3']['files']
files_mapping

{'JSON/products.json': 'lnd_products_json',
 'JSON/sales.json': 'lnd_sales_json',
 'CSV/customers.csv': 'lnd_customers_csv',
 'CSV/products.csv': 'lnd_products_csv',
 'CSV/sales.csv': 'lnd_sales_csv'}

In [32]:
for s3_key, file_info in files_mapping.items():
    public_url = s3_extractor.get_public_url(s3_key)
    print(f"Public URL for {s3_key}: {public_url}")

Public URL for JSON/products.json: https://firstworkshop.s3.us-east-1.amazonaws.com/JSON/products.json
Public URL for JSON/sales.json: https://firstworkshop.s3.us-east-1.amazonaws.com/JSON/sales.json
Public URL for CSV/customers.csv: https://firstworkshop.s3.us-east-1.amazonaws.com/CSV/customers.csv
Public URL for CSV/products.csv: https://firstworkshop.s3.us-east-1.amazonaws.com/CSV/products.csv
Public URL for CSV/sales.csv: https://firstworkshop.s3.us-east-1.amazonaws.com/CSV/sales.csv
