In [0]:
%run "./Extractor_Factory"

In [0]:
from typing import Dict
from pyspark.sql import DataFrame

class Extractor:
    """
    Abstract base class for extractors. Provides a base structure for 
    extraction logic that can be extended by other classes.
    """
    
    def __init__(self):
        """Initialize the Extractor base class."""
        pass
    
    def extract(self):
        """
        Abstract method that should be implemented by any subclass. 
        Defines the contract for the data extraction process.
        """
        pass


class TablesExtractor(Extractor):
    """
    Concrete implementation of the Extractor class for extracting tables. 
    This class retrieves data from parquet files stored in a specified path 
    and returns them as a dictionary of DataFrames.
    """
    
    def extract(self) -> Dict[str, DataFrame]:
        """
        Extracts data from parquet files located in a given directory structure 
        and returns the tables as a dictionary of DataFrames. The table names are 
        used as keys, and the DataFrames are the values.
        
        Returns:
        dict: A dictionary where each key is a table name, and each value is the 
              corresponding DataFrame loaded from a parquet file.
        """
        
        # Dictionary to store the DataFrames with table names as keys
        inputDFs = {}
        
        # Iterate over all the directories in the specified path on DBFS
        for schema_info in dbutils.fs.ls("dbfs:/mnt/bronze/public/"):
            # Extract the actual path from the FileInfo object
            table_path = schema_info.path
            table_name = table_path.rstrip('/').split('/')[-1]  # Get the table name from the path
    
            # Construct the file path for the corresponding parquet file
            file_path = f"{table_path}{table_name}.parquet"
    
            # Get the DataFrame by reading the parquet file
            df = get_data(file_type="parquet", file_path=file_path).get_data_frame()
            
            # Store the DataFrame in the dictionary with the table name as the key
            inputDFs[table_name] = df 

        # Return the dictionary of DataFrames
        return inputDFs
