# DFx ETL Pipeline

## ETL

A generic notebook to execute ETL pipelines for the supported data providers.

### Libraries

In [1]:
import logging

from dfx_etl.pipelines import get_pipeline

### Configuration

In [2]:
PIPELINE_NAME = 'world_bank_wdi'
# set the logging level
logging.basicConfig(
    level=logging.WARNING,  # logging.INFO or logging.WARNING
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    handlers=[logging.StreamHandler()],
)

In [3]:
pipeline = get_pipeline(PIPELINE_NAME)
pipeline

Pipeline(retriever=Retriever(uri=PosixPath('inputs/WDI_CSV/WDICSV.csv'), headers=None), transformer=Transformer())

### Extract

In [4]:
pipeline.retrieve()
print("Shape:", pipeline.df_raw.shape)
display(pipeline.df_raw.head())

Shape: (402458, 69)


Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
0,Africa Eastern and Southern,AFE,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.ZS,,,,,,,...,18.145833,18.685118,19.205632,19.742772,20.332679,20.8628,21.419621,21.996456,22.54144,
1,Africa Eastern and Southern,AFE,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.RU.ZS,,,,,,,...,7.259936,7.606712,7.926604,8.309896,8.704591,9.10664,9.480804,9.903209,10.288154,
2,Africa Eastern and Southern,AFE,Access to clean fuels and technologies for coo...,EG.CFT.ACCS.UR.ZS,,,,,,,...,38.741988,39.052626,39.321068,39.649534,39.968299,40.354628,40.723805,41.026351,41.289974,
3,Africa Eastern and Southern,AFE,Access to electricity (% of population),EG.ELC.ACCS.ZS,,,,,,,...,33.922276,38.859598,40.223744,43.035073,44.390861,46.282371,48.127211,48.801258,50.667516,
4,Africa Eastern and Southern,AFE,"Access to electricity, rural (% of rural popul...",EG.ELC.ACCS.RU.ZS,,,,,,,...,16.495064,24.584906,25.375037,26.941984,28.983183,30.909991,32.709837,33.747222,35.240236,


### Transform

In [5]:
pipeline.transform()
print("Shape:", pipeline.df_transformed.shape)
display(pipeline.df_transformed.head())

Shape: (1732232, 7)


Unnamed: 0,country_code,indicator_name,year,value,provider,disaggregation,source
0,AFG,Access to clean fuels and technologies for coo...,2015,27.5,world_bank_wdi,Total,
1,AFG,Access to clean fuels and technologies for coo...,2015,11.1,world_bank_wdi,Total,
2,AFG,Access to clean fuels and technologies for coo...,2015,79.4,world_bank_wdi,Total,
3,AFG,Access to electricity (% of population) [EG.EL...,2015,71.5,world_bank_wdi,Total,
4,AFG,"Access to electricity, rural (% of rural popul...",2015,64.6,world_bank_wdi,Total,


### Load

In [6]:
pipeline.load();