In [None]:
from pyspark.sql.functions import *
import pyspark.sql.dataframe
import urllib


class S3Connector:
    """
    Class of methods required for mounting and unmounting the S3 bucket.

    Parameters:
    ----------
    config_dict: dict
        A dictionary containing the keys "s3_bucket_name", "target_bucket_mount_name"
        and "aws_authentication_csv_filepath" and their values.
    
    Attributes:
    ----------
    _s3_bucket_name: str
        Protected; the name of the bucket in S3 to be mounted to DataBricks,
        which holds the Batch Layer master data.
    
    _bucket_mount_name: str
        Protected; the name to be given to the mount connecting Databricks to
        the S3 bucket.
    
    _aws_credentials_filepath: str
        Protected; the filepath to the CSV file in which the access tokens
        required to connect with S3 are stored.
    
    _is_bucket_mounted: bool
        Protected; set to True or False on initialisation by the
        check_if_bucket_already_mounted() method.
    """
    def __init__(self, config_dict: dict) -> None:
        """
        See help(S3Connector) for an accurate signature.
        """
        self._s3_bucket_name = config_dict["s3_bucket_name"]
        self._bucket_mount_name = config_dict["target_bucket_mount_name"]
        self._aws_credentials_filepath = config_dict["aws_authentication_csv_filepath"]
        self._is_bucket_mounted = self.check_if_bucket_already_mounted()

    def check_if_bucket_already_mounted(self) -> bool:
        """
        Method used to check if the bucket by the name stored at the object's
        _bucket_mount_name attribute is already mounted.

        Returns:
        -------
        bool: True if the bucket mounting already exists, False otherwise.
        """
        list_mounts = dbutils.fs.mounts()
        mount_points = map(lambda x: x[0], list_mounts)
        return self._bucket_mount_name in list(mount_points)

    def __read_aws_authentication_details_to_df(self) -> pyspark.sql.dataframe:
        file_type = "csv"
        first_row_is_header = "true"
        delimiter = ","
        aws_keys_df = spark.read.format(file_type)\
                                .option("header", first_row_is_header)\
                                .option("sep", delimiter)\
                                .load(self._aws_credentials_filepath)
                                
        return aws_keys_df

    def _extract_access_credentials(self) -> tuple:
        """
        Protected; method that extracts the security tokens from the
        AWS credentials CSV file.

        Returns:
        -------
        tuple: (ACCESS_KEY, ENCODED_SECRET_KEY)
        """
        aws_keys_df = self.__read_aws_authentication_details_to_df()

        # Get the AWS access key and secret key from the spark dataframe
        ACCESS_KEY = aws_keys_df.select('Access key ID').collect()[0]['Access key ID'] 
        SECRET_KEY = aws_keys_df.select('Secret access key').collect()[0]['Secret access key']

        # Encode the secret key
        ENCODED_SECRET_KEY = urllib.parse.quote(string=SECRET_KEY, safe="")
        
        return (ACCESS_KEY, ENCODED_SECRET_KEY)

    def mount_s3_bucket(self) -> None:
        """
        Method which mounts the desired S3 bucket to Databricks, if it
        is not already mounted.
        """
        if self._is_bucket_mounted:
            print(f"Your S3 bucket has already been mounted under the name of {self._bucket_mount_name}")
        else:
            aws_credentials_tuple = self._extract_access_credentials()

            ACCESS_KEY = aws_credentials_tuple[0]
            ENCODED_SECRET_KEY = aws_credentials_tuple[1]

            SOURCE_URL = f"s3n://{ACCESS_KEY}:{ENCODED_SECRET_KEY}@{self._s3_bucket_name}"

            dbutils.fs.mount(SOURCE_URL, self._bucket_mount_name)
            self._is_bucket_mounted = True
    
    def unmount_s3_bucket(self) -> None:
        """
        Method which unmounts the S3 bucket of the name assigned to the
        object's _bucket_mount_name attribute.
        """
        dbutils.fs.unmount(self._bucket_mount_name)
        self._is_bucket_mounted = False