In [4]:
from pyspark.sql import SparkSession, DataFrame
import logging

def create_spark_session() -> SparkSession:
    """
    Creates SparkSession
    
    Returns:
        SparkSession: Active Spark Session
    """
    try:
        spark = SparkSession.builder \
            .appName("DataProcessing") \
            .config("spark.jars", "postgresql-42.6.0.jar") \
            .getOrCreate()
        return spark
    except Exception as e:
        logging.error(f"Error in Spark connection: {e}")
        raise e

In [5]:
class SparkPostgresConnection:
    """
    Class for connecting Spark with PostgreSQL
    """
    def __init__(self, 
            spark: SparkSession,
            url: str = "jdbc:postgresql://postgres:5432/football",
            user: str = "user",
            password: str = "football",
            driver: str = "org.postgresql.Driver") -> None:
        """
        Initiates parameters
        
        Args:
            spark: Active SparkSession
            url: The JDBC URL for the database connection
            user: The username for PostgreSQL authentication
            password: The password for PostgreSQL authentication
            driver: The JDBC driver class for PostgreSQL
        """
        self.spark = spark
        self.url = url
        self.user = user
        self.password = password
        self.driver = driver
        self.properties = {
                "user": self.user,
                "password": self.password,
                "driver": self.driver
            }
    
    def load_from_db(self, table_name: str) -> DataFrame:
        """
        Loads table from PostgreSQL

        Args:
            table_name: Name of the table to load
        Returns:
            DataFrame: Loaded table
        """
        try:
            loaded_table = self.spark.read.jdbc(url=self.url, table=table_name, properties=self.properties)
            return loaded_table
        except Exception as e:
            logging.error(f"Error while loading table from Database: {e}")
            raise e

    def save_to_db(self, df: DataFrame, table_name: str, mode: str = "overwrite") -> None:
        """
        Saves table into PostgreSQL

        Args:
            df: DataFrame to save into database
            table_name: Name of the new table 
            mode: Mode of saving style [append, overwrite, error, ignore]
        """
        try:
            df.write.format("jdbc") \
                .option("url", self.url) \
                .option("dbtable", table_name) \
                .options(**self.properties) \
                .mode(mode) \
                .save()
        except Exception as e:
            logging.error(f"Error while saving table into Database: {e}")
            raise e
        