In [0]:
class DataSource:
    def __init__(self, path, spark):
        self.path = path
        self.spark = spark

    def get_dataframe(self):
        raise NotImplementedError("get_dataframe() is not implemented")


class CSVDataSource(DataSource):
    def get_dataframe(self):
        return self.spark.read.format("csv").option("header", "true").load(self.path)


class ParquetDataSource(DataSource):
    def get_dataframe(self):
        return self.spark.read.format("parquet").load(self.path)


class DeltaDataSource(DataSource):
    def get_dataframe(self):
        return self.spark.read.format("delta").load(self.path)


def get_data_source(data_type, file_path, spark):
    if data_type == "csv":
        return CSVDataSource(file_path, spark)
    elif data_type == "parquet":
        return ParquetDataSource(file_path, spark)
    elif data_type == "delta":
        return DeltaDataSource(file_path, spark)
    else:
        raise ValueError(f"Not implemented for data_type: {data_type}")
