In [0]:
%pip install pyyaml --upgrade
import logging
import os
import yaml
from datetime import datetime

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def safe_load_yaml(file_path):
    try:
        if not os.path.isfile(file_path):
            logging.error(f"Configuration file not found: {file_path}")
            raise FileNotFoundError (f"Missing configuration file: {file_path}")
        else:
            with open(file_path, 'r') as f:
                return yaml.safe_load(f)
    except Exception as e:
        logging.error(f"Error loading YAML file: {e}")

def create_schema_if_not_exists(catalog, schema_name, spark):
    spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog}.{schema_name}")

def create_table_if_not_exists(catalog, schema_name, table_name, definition, spark):
    cols=", ".join([f"{col['name']} {col['type']}" for col in definition])
    spark.sql(f"CREATE TABLE IF NOT EXISTS {catalog}.{schema_name}.{table_name} ({cols})")


#creation of schemas and tables
tbl_definitions=safe_load_yaml('/Workspace/Users/hritikraj143@gmail.com/Retail-Analytics/Config/table_definitions.yaml')
glb_config=safe_load_yaml('/Workspace/Users/hritikraj143@gmail.com/Retail-Analytics/Config/global_config.yaml')
catalog=glb_config['catalog']

#validate yaml structure

if not isinstance(tbl_definitions,dict):
    raise ValueError('Invalid yaml format: Expected a dictionary')

for schemas, tables in tbl_definitions.items():
    if not isinstance(tables,dict):
        logging.error(f"Invalid yaml definition for schema {schemas}, expected a dictionary")
        continue
    
    try:
        create_schema_if_not_exists(catalog, schemas, spark)
        logging.info(f"Schema {schemas} created or already exists")
    except Exception as e:
        logging.error(f"Error creating schema {schemas}: {e}")
        continue

    for table, meta in tables.items():
        if not isinstance(meta,dict):
            logging.error(f"Invalid yaml definition for table {table},expected a dictionar")
            continue
        try:
            create_table_if_not_exists(catalog, schemas, table, meta['columns'], spark)
            logging.info(f"Table {table} created or already exists")
        except Exception as e:
            logging.error(f"Error creating table {table}: {e}")
            continue
            