In [1]:
%run /home/jovyan/work/database_operations/spark_db_connection.ipynb import SparkPostgresConnection
%run /home/jovyan/work/database_operations/db_operations.ipynb import DataOperations

In [None]:
import logging
from typing import List, Type
from pyspark.sql import SparkSession

In [2]:
def set_logging():
    """
    Set up logging settings
    """
    try:
        logger = logging.getLogger()
        if logger.hasHandlers():
            logger.handlers.clear()
        logging.basicConfig(level=logging.INFO, format='%(message)s')
    except Exception as e:
        logging.error(f"Error in logging set up: {e}")
        raise e
set_logging()

In [3]:
class DataExtraction:
    def __init__(self, spark: SparkSession) -> None:
        """
        Args:
            spark: Active SparkSession
            """
        self.database_operations = DataOperations(spark)

    def save_to_database(self, path: str, table_name: str) -> None:
        """
        Saves the data into database
        
        Args:
            path: Path to the CSV file
            table_name: Name of a table to store in the database
        """
        try:
            table_to_save = self.database_operations.ingest_data(path)
            self.database_operations.save_data(table_to_save, table_name)
        except Exception as e:
            logging.error(f"Error in data extraction: {e}")
            raise e
    

In [5]:
class ExtractionParamaters:
    """
    Class for defining data sources and table names
    """
    list_of_paths: List[str] = ["/home/jovyan/work/dataset/goalscorers.csv", 
                                "/home/jovyan/work/dataset/results.csv", 
                                "/home/jovyan/work/dataset/shootouts.csv"]
    list_of_names: List[str] = ["scorers", "results", "shootouts"]
