In [None]:
%run /home/jovyan/work/etl/src/data_extraction.ipynb import DataExtraction
%run /home/jovyan/work/etl/src/load_data.ipynb import LoadData

In [None]:
import logging
from typing import Dict, Tuple
from pyspark.sql import SparkSession, DataFrame

In [None]:
def extract_process(spark: SparkSession, 
                    files: Dict[str, str]
                    ) -> Tuple[DataFrame, DataFrame, DataFrame]:
    """
    Extracts data from CSV files and saves into database
    
    Args:
        spark: Active SparkSession
        files: A dictionary with table names as keys and corresponding CSV file paths as values
    Returns:
        Tuple[DataFrame, DataFrame, DataFrame]: Tuple of 3 DataFrames storing data 
                                                in order 'scorers', 'results', 'shootouts'
    """
    try:
        logging.info("\nStarted the extraction process:")
        data_extraction = DataExtraction(spark)
        for key in files.keys():
            data_extraction.save_to_database(path=files[key], table_name=key)
            
        load_data = LoadData(spark)
        keys = list(files.keys())
        scorers = load_data.load_from_database(keys[0])
        results = load_data.load_from_database(keys[1])
        shootouts = load_data.load_from_database(keys[2])

        logging.info("Finished the extraction process")
        return scorers, results, shootouts
    except Exception as e:
        logging.error(f"Error in the extraction process: {e}")
        raise e