In [8]:
import pandas as pd

---

#### Inflation


Inflation tables updated monthly at: https://bbs.gov.bd/site/page/29b379ff-7bac-41d9-b321-e41929bab4a1/-

- Get most recent year month from `Consumer Price Index (CPI) and Inflation Rate (IR)` section:
- data table in pdf format, so use `tabula` to extract data

In [1]:
import tabula

In [3]:
dataset = 'https://bbs.portal.gov.bd/sites/default/files/files/bbs.portal.gov.bd/page/9ead9eb1_91ac_4998_a1a3_a5caf4ddc4c6/2023-09-25-08-25-0a59c12c663e2cc22c9be416bcb1fa82.pdf'

In [6]:
# Read the PDF and extract tables into a list of DataFrames
table = tabula.read_pdf(dataset, pages=1, multiple_tables=False)

The output file is empty.


In [7]:
table

[]

In [4]:


# Assuming the table you want is the first table in the PDF, you can access it like this
# Replace 0 with the appropriate index if the table is at a different position
df = tables[0]

# Display the DataFrame
print(df)

IndexError: list index out of range

---

### World Bank

**Country Page:** https://data.worldbank.org/country/bangladesh

Data Catalog: https://api.worldbank.org/v2/en/country/BGD?downloadformat=csv

In [14]:
import requests
import zipfile
import io
import pandas as pd

# URL of the zip file
url = 'https://api.worldbank.org/v2/en/country/BGD?downloadformat=csv'

# Send an HTTP request and get the zip file content
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Extract the CSV file starting with 'API' from the zip file
    with zipfile.ZipFile(io.BytesIO(response.content)) as zip_ref:
        csv_filename = None
        for file_name in zip_ref.namelist():
            if file_name.startswith('API') and file_name.endswith('.csv'):
                csv_filename = file_name
                break
        
        if csv_filename:
            # Extract the CSV file
            zip_ref.extract(csv_filename)
            csv_file = csv_filename
        else:
            print('No CSV file starting with "API" found in the zip file.')
            exit(1)
else:
    print('Failed to download the zip file.')
    exit(1)

ParserError: Error tokenizing data. C error: Expected 3 fields in line 5, saw 68


In [28]:
# Read the CSV file into a pandas DataFrame, ignore first two columns
raw_data = pd.read_csv(csv_file, header=2, index_col=False)
raw_data = raw_data.drop(raw_data.columns[[0, 1]], axis=1)  # drop country name & code
raw_data = raw_data.dropna(axis=1, how='all') # drop any columns that only contain missing values
raw_data.head(5)

Unnamed: 0,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,"Internally displaced persons, total displaced ...",VC.IDP.TOCV,,,,,,,,,...,426000.0,426000.0,426000.0,426000.0,432000.0,426000.0,427000.0,427000.0,427000.0,427000.0
1,Voice and Accountability: Standard Error,VA.STD.ERR,,,,,,,,,...,0.110641,0.127796,0.129927,0.125566,0.126059,0.128895,0.12345,0.127924,0.130795,
2,Voice and Accountability: Number of Sources,VA.NO.SRC,,,,,,,,,...,15.0,12.0,12.0,12.0,12.0,12.0,11.0,10.0,10.0,
3,High-technology exports (% of manufactured exp...,TX.VAL.TECH.MF.ZS,,,,,,,,,...,0.299089,,0.307867,,,,,,,
4,Export value index (2015 = 100),TX.VAL.MRCH.XD.WD,,,,,,,,,...,89.915593,93.904913,100.0,107.769398,110.723624,121.227906,121.491474,103.78854,136.581796,


---

#### Extract Series

Create a function to help search dataframe

- create function that takes a loose data series we want to extract in the form of a list of strings, and searches `Indicator Name` column, and returns the rows of any relevant series we might want to extract

In [None]:
def filter_series(dataframe, keywords):
    """
    Filter a DataFrame based on a list of keywords present in the 'Indicator Name' column.

    Parameters:
    - dataframe (pd.DataFrame): The DataFrame to be filtered.
    - keywords (list): A list of keywords to search for in a case-insensitive manner.

    Returns:
    - pd.DataFrame: A filtered DataFrame containing rows where the 'Indicator Name' column
      contains all the specified keywords.

    Example:
    >>> keywords = ['population', 'total']
    >>> filtered_data = filter_series(raw_data, keywords)
    >>> print(filtered_data)
       Indicator Name Indicator Code      1960      1961      1962      1963      1964
    0  Population, total   SP.POP.TOTL  50396429  51882769  53461661  55094115  56774465
    """
    # Convert keywords to lowercase for case-insensitive search
    keywords_lower = [keyword.lower() for keyword in keywords]
    
    # Initialize a mask with True for all rows
    mask = pd.Series(True, index=dataframe.index)
    
    # Apply the mask for each keyword
    for keyword in keywords_lower:
        mask &= dataframe['Indicator Name'].str.lower().str.contains(keyword)
    
    # Filter the DataFrame using the combined mask
    filtered_data = dataframe[mask]
    
    return filtered_data

##### Population

In [65]:
filter_series(raw_data, ['Population', 'total']).head()


Unnamed: 0,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
26,Urban population (% of total population),SP.URB.TOTL.IN.ZS,5.135,5.278,5.498,5.727,5.964,6.211,6.467,6.733,...,32.762,33.535,34.308,35.083,35.858,36.632,37.405,38.177,38.946,39.711
29,"Population, male (% of total population)",SP.POP.TOTL.MA.ZS,51.90252,51.87827,51.85654,51.84286,51.83347,51.82633,51.82088,51.81263,...,49.85193,49.8031,49.75974,49.72908,49.70669,49.67779,49.64116,49.61369,49.59846,49.57124
30,"Population, total",SP.POP.TOTL,50396430.0,51882770.0,53461660.0,55094120.0,56774460.0,58500160.0,60265260.0,62104490.0,...,154030100.0,155961300.0,157830000.0,159784600.0,161794000.0,163684000.0,165516200.0,167421000.0,169356300.0,171186400.0
40,Population ages 15-64 (% of total population),SP.POP.1564.TO.ZS,54.72769,54.51445,54.25726,53.93275,53.55325,53.2251,52.95755,52.70504,...,63.80357,64.4096,64.99142,65.55376,66.09745,66.58262,66.99931,67.38486,67.7199,67.98642
43,Population ages 0-14 (% of total population),SP.POP.0014.TO.ZS,42.26348,42.48862,42.74926,43.06812,43.43385,43.74363,43.98707,44.21036,...,31.53206,30.86886,30.20124,29.52014,28.83163,28.17087,27.55682,26.98218,26.45298,25.96916


In [69]:
# We know 'Population, total' is in row index 30 of master dataframe, so extract this into its own df
df_wide = raw_data.iloc[[30]].reset_index(drop=True)
df_wide

Unnamed: 0,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,"Population, total",SP.POP.TOTL,50396429.0,51882769.0,53461661.0,55094115.0,56774465.0,58500159.0,60265259.0,62104488.0,...,154030139.0,155961299.0,157830000.0,159784568.0,161793964.0,163683958.0,165516222.0,167420951.0,169356251.0,171186372.0


In [78]:
# Convert the DataFrame from wide to long format
df_long = pd.melt(df_wide, id_vars=['Indicator Name', 'Indicator Code'], var_name='Year', value_name='Value')

# create new df with only `Year` and `Value` columns
df = df_long[['Year', 'Value']]

# rename cols
df.columns = ['date', 'value']

# convert `date` column to datetime
df['date'] = pd.to_datetime(df['date'], format='%Y')

# convert `value` col to int
df['value'] = df['value'].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['date'] = pd.to_datetime(df['date'], format='%Y')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['value'] = df['value'].astype(int)


In [84]:
# export to csv and json
df.to_csv('data/popu.csv', index=False)
df.to_json('data/popu.json', orient='records')

---

#### Unemployment

In [89]:
filter_series(raw_data, ['Unemployment', 'total'])

Unnamed: 0,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
63,Adequacy of unemployment benefits and ALMP (% ...,per_lm_alllm.adq_pop_tot,,,,,,,,,...,,,,,,,,,,
664,"Unemployment, youth total (% of total labor fo...",SL.UEM.1524.ZS,,,,,,,,,...,9.881,10.268,10.666,11.051,12.219,12.293,12.366,14.383,13.772,12.928
804,"Unemployment, total (% of total labor force) (...",SL.UEM.TOTL.NE.ZS,,,,,,,,,...,4.43,,,4.35,4.37,,,,,
806,Unemployment with intermediate education (% of...,SL.UEM.INTM.ZS,,,,,,,,,...,6.98,,,9.71,8.48,,,,,
861,Benefit incidence of unemployment benefits and...,per_lm_alllm.ben_q1_tot,,,,,,,,,...,,,,,,,,,,
1125,Unemployment with advanced education (% of tot...,SL.UEM.ADVN.ZS,,,,,,,,,...,7.0,,,9.03,10.74,,,,,
1126,"Unemployment, youth total (% of total labor fo...",SL.UEM.1524.NE.ZS,,,,,,,,,...,9.88,,,11.37,12.76,,,,,
1223,"Unemployment, total (% of total labor force) (...",SL.UEM.TOTL.ZS,,,,,,,,,...,4.43,4.393,4.371,4.35,4.37,4.38,4.382,5.209,5.097,4.699
1226,Unemployment with basic education (% of total ...,SL.UEM.BASC.ZS,,,,,,,,,...,3.82,,,3.23,3.45,,,,,


In [146]:
def extract_series(row_index):
    '''
    pass in index of row to extract from `raw_data` and return a df in long format
    '''
    # extract row index 1223
    df_wide = raw_data.iloc[[row_index]].reset_index(drop=True)

    print(df_wide.iloc[0, 0], '\n') # print the first value of the first column
    print(df_wide)

    # drop NaN columns
    df_wide = df_wide.dropna(axis=1, how='all')

    # Convert the DataFrame from wide to long format
    df_long = pd.melt(df_wide, id_vars=['Indicator Name', 'Indicator Code'], var_name='date', value_name='value')

    # create new df with only `date` and `value` columns
    df = df_long[['date', 'value']]

    # convert `date` column to datetime (using .loc to avoid copy warning)
    df.loc[:, 'date'] = pd.to_datetime(df['date'], format='%Y').dt.strftime("%Y-%m-%d")

    print(df.info())

    return df

In [147]:
df = extract_series(1223)

Unemployment, total (% of total labor force) (modeled ILO estimate) 

                                      Indicator Name  Indicator Code  1960  \
0  Unemployment, total (% of total labor force) (...  SL.UEM.TOTL.ZS   NaN   

   1961  1962  1963  1964  1965  1966  1967  ...  2013   2014   2015  2016  \
0   NaN   NaN   NaN   NaN   NaN   NaN   NaN  ...  4.43  4.393  4.371  4.35   

   2017  2018   2019   2020   2021   2022  
0  4.37  4.38  4.382  5.209  5.097  4.699  

[1 rows x 65 columns]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    32 non-null     object 
 1   value   32 non-null     float64
dtypes: float64(1), object(1)
memory usage: 640.0+ bytes
None


In [148]:
# export to csv and json
df.to_csv('data/unem.csv', index=False)
df.to_json('data/unem.json', orient='records')

#### Inactivity

#### Employment Rate

In [177]:
filter_series(raw_data, ['employment', 'population ratio', '15+', 'total'])

Unnamed: 0,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
684,"Employment to population ratio, 15+, total (%)...",SL.EMP.TOTL.SP.ZS,,,,,,,,,...,54.175,54.006,53.829,53.65,55.781,55.911,56.019,55.007,55.277,55.993
1146,"Employment to population ratio, 15+, total (%)...",SL.EMP.TOTL.SP.NE.ZS,,,,,,,,,...,54.65,,,56.09,55.78,,,,,
1149,"Employment to population ratio, ages 15-24, to...",SL.EMP.1524.SP.ZS,,,,,,,,,...,36.553,35.636,34.728,33.837,35.521,35.6,35.607,34.127,34.355,34.954
1251,"Employment to population ratio, ages 15-24, to...",SL.EMP.1524.SP.NE.ZS,,,,,,,,,...,40.93,,,38.04,35.3,,,,,


In [179]:
# Extract 'Employment to population ratio, 15+, total (%) (modeled ILO estimate) ' at row 684
df = extract_series(684)
df.tail()

Employment to population ratio, 15+, total (%) (modeled ILO estimate) 

                                      Indicator Name     Indicator Code  1960  \
0  Employment to population ratio, 15+, total (%)...  SL.EMP.TOTL.SP.ZS   NaN   

   1961  1962  1963  1964  1965  1966  1967  ...    2013    2014    2015  \
0   NaN   NaN   NaN   NaN   NaN   NaN   NaN  ...  54.175  54.006  53.829   

    2016    2017    2018    2019    2020    2021    2022  
0  53.65  55.781  55.911  56.019  55.007  55.277  55.993  

[1 rows x 65 columns]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32 entries, 0 to 31
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    32 non-null     object 
 1   value   32 non-null     float64
dtypes: float64(1), object(1)
memory usage: 640.0+ bytes
None


Unnamed: 0,date,value
27,2018-01-01,55.911
28,2019-01-01,56.019
29,2020-01-01,55.007
30,2021-01-01,55.277
31,2022-01-01,55.993


In [181]:
# export to csv and json
df.to_csv('data/empl.csv', index=False)
df.to_json('data/empl.json', orient='records')

#### Employment

In [184]:
filter_series(raw_data, ['employment', 'total'])

Unnamed: 0,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
55,"Share of youth not in education, employment or...",SL.UEM.NEET.ZS,,,,,,,,,...,32.04,,,28.88,27.39,,,,,
63,Adequacy of unemployment benefits and ALMP (% ...,per_lm_alllm.adq_pop_tot,,,,,,,,,...,,,,,,,,,,
664,"Unemployment, youth total (% of total labor fo...",SL.UEM.1524.ZS,,,,,,,,,...,9.881,10.268,10.666,11.051,12.219,12.293,12.366,14.383,13.772,12.928
666,"Part time employment, total (% of total employ...",SL.TLF.PART.ZS,,,,,,,,,...,18.41,,,18.69,20.44,,,,,
677,Employment in services (% of total employment)...,SL.SRV.EMPL.ZS,,,,,,,,,...,34.11192,35.06317,35.98689,36.87573,38.98337,39.90997,40.72791,40.87996,41.19604,
680,Employment in industry (% of total employment)...,SL.IND.EMPL.ZS,,,,,,,,,...,20.78889,20.55099,20.46674,20.45953,20.41933,20.66612,20.94673,21.24903,21.71396,
681,"Contributing family workers, total (% of total...",SL.FAM.WORK.ZS,,,,,,,,,...,18.13614,16.93737,15.72823,14.50835,11.86011,11.111,10.5193,10.4782,10.1633,
684,"Employment to population ratio, 15+, total (%)...",SL.EMP.TOTL.SP.ZS,,,,,,,,,...,54.175,54.006,53.829,53.65,55.781,55.911,56.019,55.007,55.277,55.993
804,"Unemployment, total (% of total labor force) (...",SL.UEM.TOTL.NE.ZS,,,,,,,,,...,4.43,,,4.35,4.37,,,,,
806,Unemployment with intermediate education (% of...,SL.UEM.INTM.ZS,,,,,,,,,...,6.98,,,9.71,8.48,,,,,


#### Participation Rate

In [190]:
filter_series(raw_data, ['participation rate', 'total', '15\+'])

Unnamed: 0,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
812,"Labor force participation rate, total (% of to...",SL.TLF.CACT.NE.ZS,,57.17,,,,,,,...,57.18,,,56.09,58.33,,,,,
1231,"Labor force participation rate, total (% of to...",SL.TLF.CACT.ZS,,,,,,,,,...,56.686,56.488,56.289,56.09,58.33,58.473,58.587,58.03,58.246,58.754


In [191]:
# Extract 'Labor force participation rate, total (% of total population ages 15+) (modeled ILO estimate)' 
# at row 1231
df = extract_series(1231)
df.tail()

Labor force participation rate, total (% of total population ages 15+) (modeled ILO estimate) 

                                      Indicator Name  Indicator Code  1960  \
0  Labor force participation rate, total (% of to...  SL.TLF.CACT.ZS   NaN   

   1961  1962  1963  1964  1965  1966  1967  ...    2013    2014    2015  \
0   NaN   NaN   NaN   NaN   NaN   NaN   NaN  ...  56.686  56.488  56.289   

    2016   2017    2018    2019   2020    2021    2022  
0  56.09  58.33  58.473  58.587  58.03  58.246  58.754  

[1 rows x 65 columns]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33 entries, 0 to 32
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    33 non-null     object 
 1   value   33 non-null     float64
dtypes: float64(1), object(1)
memory usage: 656.0+ bytes
None


Unnamed: 0,date,value
28,2018-01-01,58.473
29,2019-01-01,58.587
30,2020-01-01,58.03
31,2021-01-01,58.246
32,2022-01-01,58.754


In [192]:
# export to csv and json
df.to_csv('data/part.csv', index=False)
df.to_json('data/part.json', orient='records')

#### Labour Force

In [166]:
filter_series(raw_data, ['labor force', 'total'])

Unnamed: 0,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
664,"Unemployment, youth total (% of total labor fo...",SL.UEM.1524.ZS,,,,,,,,,...,9.881,10.268,10.666,11.051,12.219,12.293,12.366,14.383,13.772,12.928
671,"Labor force participation rate, total (% of to...",SL.TLF.ACTI.ZS,,,,,,,,,...,58.213,58.043,57.882,57.731,60.402,60.645,60.868,60.351,60.657,
672,"Labor force participation rate for ages 15-24,...",SL.TLF.ACTI.1524.NE.ZS,,50.66,,,,,,,...,45.42,,,38.04,40.46,,,,,
804,"Unemployment, total (% of total labor force) (...",SL.UEM.TOTL.NE.ZS,,,,,,,,,...,4.43,,,4.35,4.37,,,,,
806,Unemployment with intermediate education (% of...,SL.UEM.INTM.ZS,,,,,,,,,...,6.98,,,9.71,8.48,,,,,
810,"Labor force, female (% of total labor force)",SL.TLF.TOTL.FE.ZS,,,,,,,,,...,28.55937,29.07011,29.58657,30.09696,31.72093,31.87737,32.00891,32.03826,32.41967,32.67183
811,Labor force with intermediate education (% of ...,SL.TLF.INTM.ZS,,,,,,,,,...,55.81,,,51.31,47.61,,,,,
812,"Labor force participation rate, total (% of to...",SL.TLF.CACT.NE.ZS,,57.17,,,,,,,...,57.18,,,56.09,58.33,,,,,
816,"Labor force participation rate for ages 15-24,...",SL.TLF.ACTI.1524.ZS,,,,,,,,,...,40.561,39.714,38.874,38.041,40.465,40.589,40.632,39.86,39.842,40.144
1125,Unemployment with advanced education (% of tot...,SL.UEM.ADVN.ZS,,,,,,,,,...,7.0,,,9.03,10.74,,,,,


In [172]:
# Extract 'Labor force, total' at row 1229
df = extract_series(1229)
df['value'] = df['value'].astype(int)
df.tail()

Labor force, total 

       Indicator Name  Indicator Code  1960  1961  1962  1963  1964  1965  \
0  Labor force, total  SL.TLF.TOTL.IN   NaN   NaN   NaN   NaN   NaN   NaN   

   1966  1967  ...        2013        2014        2015        2016  \
0   NaN   NaN  ...  59781774.0  60904130.0  62009866.0  63166277.0   

         2017        2018        2019        2020        2021        2022  
0  67164738.0  68748318.0  70248864.0  70940010.0  72549161.0  74459362.0  

[1 rows x 65 columns]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33 entries, 0 to 32
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    33 non-null     object 
 1   value   33 non-null     float64
dtypes: float64(1), object(1)
memory usage: 656.0+ bytes
None


Unnamed: 0,date,value
28,2018-01-01,68748318
29,2019-01-01,70248864
30,2020-01-01,70940010
31,2021-01-01,72549161
32,2022-01-01,74459362


In [173]:
# export to csv and json
df.to_csv('data/labour_force.csv', index=False)
df.to_json('data/labour_force.json', orient='records')

---

#### Growth

In [149]:
filter_series(raw_data, ['growth'])

Unnamed: 0,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
27,Rural population growth (annual %),SP.RUR.TOTL.ZG,,2.755787,2.765277,2.765195,2.752662,2.731269,2.699307,2.721449,...,0.129949,0.089653,0.021228,0.044033,0.048708,-0.052658,-0.114188,-0.096784,-0.102357,-0.186068
75,GDP per capita growth (annual %),NY.GDP.PCAP.KD.ZG,,3.019802,2.338669,-3.405414,7.668926,-1.391021,-0.437252,-4.781817,...,4.678722,4.747779,5.291061,5.80321,5.266455,6.080238,6.687654,2.271109,5.716649,5.954845
77,GDP growth (annual %),NY.GDP.MKTP.KD.ZG,,6.058161,5.453031,-0.455894,10.952789,1.606258,2.566812,-1.875864,...,6.013606,6.061059,6.55264,7.113478,6.59025,7.319413,7.881907,3.448026,6.938679,7.099829
94,"Manufacturing, value added (annual % growth)",NV.IND.MANF.KD.ZG,,8.229668,7.004629,5.469353,4.940766,4.280426,6.052464,-0.028641,...,10.30689,8.766757,10.310577,11.690815,7.08644,10.452442,12.334487,1.680819,11.585139,11.410828
135,Claims on central government (annual growth as...,FM.AST.CGOV.ZG.M3,,,,,,,,,...,3.45382,2.493596,2.372433,3.836338,3.279088,4.364018,6.278977,3.740638,4.016348,3.522746
283,Population growth (annual %),SP.POP.GROW,,2.906641,2.997805,3.007812,3.004376,2.994281,2.972633,3.006246,...,1.267157,1.24596,1.191061,1.230795,1.249724,1.161378,1.113172,1.14421,1.149318,1.074837
304,Gross fixed capital formation (annual % growth),NE.GDI.FTOT.KD.ZG,,,,,,,,,...,5.364642,9.855051,7.118801,8.909372,8.359574,12.132901,6.880267,3.945884,8.090779,11.653373
306,Exports of goods and services (annual % growth),NE.EXP.GNFS.KD.ZG,,-11.501106,20.796544,4.830609,3.742608,-5.189872,14.332446,-11.766218,...,2.451884,3.201149,-2.82999,2.19675,-1.843281,6.10592,11.456473,-17.502392,9.191456,29.392555
309,Final consumption expenditure (annual % growth),NE.CON.TOTL.KD.ZG,,8.949271,-1.743946,4.911219,10.593545,0.481668,1.841663,-2.947137,...,5.173398,4.275974,6.028725,3.399133,6.344413,9.077003,5.525803,2.908652,7.920388,7.374349
367,GNI growth (annual %),NY.GNP.MKTP.KD.ZG,,,,,,,,,...,5.59728,4.692012,6.354338,5.395267,5.315838,7.925879,7.907855,3.5243,8.065671,5.802404


In [150]:
# Extract 'GDP growth (annual %)' at row 77
df = extract_series(77)
df.tail()

GDP growth (annual %) 

          Indicator Name     Indicator Code  1960      1961      1962  \
0  GDP growth (annual %)  NY.GDP.MKTP.KD.ZG   NaN  6.058161  5.453031   

       1963       1964      1965      1966      1967  ...      2013      2014  \
0 -0.455894  10.952789  1.606258  2.566812 -1.875864  ...  6.013606  6.061059   

      2015      2016     2017      2018      2019      2020      2021  \
0  6.55264  7.113478  6.59025  7.319413  7.881907  3.448026  6.938679   

       2022  
0  7.099829  

[1 rows x 65 columns]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 62 entries, 0 to 61
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    62 non-null     object 
 1   value   62 non-null     float64
dtypes: float64(1), object(1)
memory usage: 1.1+ KB
None


Unnamed: 0,date,value
57,2018-01-01,7.319413
58,2019-01-01,7.881907
59,2020-01-01,3.448026
60,2021-01-01,6.938679
61,2022-01-01,7.099829


In [152]:
# export
df.to_csv('data/grow.csv', index=False)
df.to_json('data/grow.json', orient='records')

`ToDo`: check whether we want percentage values in 100 scale or 1 scale
- 100 scale for now

---

#### Inflation

In [155]:
filter_series(raw_data, ['Inflation, consumer prices'])

Unnamed: 0,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
737,"Inflation, consumer prices (annual %)",FP.CPI.TOTL.ZG,,,,,,,,,...,7.530406,6.991639,6.19428,5.513526,5.70207,5.543621,5.591996,5.691075,5.545654,7.696954


In [156]:
# Extract 'Inflation, consumer prices (annual %) ' at row 737
df = extract_series(737)
df.tail()

Inflation, consumer prices (annual %) 

                          Indicator Name  Indicator Code  1960  1961  1962  \
0  Inflation, consumer prices (annual %)  FP.CPI.TOTL.ZG   NaN   NaN   NaN   

   1963  1964  1965  1966  1967  ...      2013      2014     2015      2016  \
0   NaN   NaN   NaN   NaN   NaN  ...  7.530406  6.991639  6.19428  5.513526   

      2017      2018      2019      2020      2021      2022  
0  5.70207  5.543621  5.591996  5.691075  5.545654  7.696954  

[1 rows x 65 columns]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36 entries, 0 to 35
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    36 non-null     object 
 1   value   36 non-null     float64
dtypes: float64(1), object(1)
memory usage: 704.0+ bytes
None


Unnamed: 0,date,value
31,2018-01-01,5.543621
32,2019-01-01,5.591996
33,2020-01-01,5.691075
34,2021-01-01,5.545654
35,2022-01-01,7.696954


In [157]:
# export
df.to_csv('data/infl.csv', index=False)
df.to_json('data/infl.json', orient='records')

---

#### Inequality

In [158]:
filter_series(raw_data, ['gini'])

Unnamed: 0,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
695,Gini index,SI.POV.GINI,,,,,,,,,...,,,,32.4,,,,,,31.8


In [159]:
# Extract 'Gini index' at row 695
df = extract_series(695)
df.tail()

Gini index 

  Indicator Name Indicator Code  1960  1961  1962  1963  1964  1965  1966  \
0     Gini index    SI.POV.GINI   NaN   NaN   NaN   NaN   NaN   NaN   NaN   

   1967  ...  2013  2014  2015  2016  2017  2018  2019  2020  2021  2022  
0   NaN  ...   NaN   NaN   NaN  32.4   NaN   NaN   NaN   NaN   NaN  31.8  

[1 rows x 65 columns]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   date    10 non-null     object 
 1   value   10 non-null     float64
dtypes: float64(1), object(1)
memory usage: 288.0+ bytes
None


Unnamed: 0,date,value
5,2000-01-01,33.4
6,2005-01-01,33.2
7,2010-01-01,32.1
8,2016-01-01,32.4
9,2022-01-01,31.8


In [160]:
# export
df.to_csv('data/ineq.csv', index=False)
df.to_json('data/ineq.json', orient='records')

---

#### Exports

In [153]:
filter_series(raw_data, ['exports'])

Unnamed: 0,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
3,High-technology exports (% of manufactured exp...,TX.VAL.TECH.MF.ZS,,,,,,,,,...,0.2990886,,0.3078671,,,,,,,
5,Merchandise exports to low- and middle-income ...,TX.VAL.MRCH.R6.ZS,,,,,,,,,...,0.4715582,0.3722408,0.4591402,0.3795023,0.5308626,0.5061223,0.4426419,0.5483284,,
6,Merchandise exports to low- and middle-income ...,TX.VAL.MRCH.R2.ZS,,,,,,,,,...,3.4171,3.319974,2.859091,2.866961,3.038562,2.410128,2.385277,2.971103,,
7,Merchandise exports (current US$),TX.VAL.MRCH.CD.WT,,,,,,,,,...,29114000000.0,30405000000.0,32379000000.0,34894000000.0,35851000000.0,39252000000.0,39337000000.0,33605000000.0,44223000000.0,54695000000.0
8,Insurance and financial services (% of commerc...,TX.VAL.INSF.ZS.WT,,,,,,,,,...,4.115775,4.42132,4.241769,4.474893,6.327005,3.755721,4.640008,3.296103,2.719318,3.547923
9,Agricultural raw materials exports (% of merch...,TX.VAL.AGRI.ZS.UN,,,,,,,,,...,1.051364,,0.7471825,,,,,,,
123,Taxes on exports (current LCU),GC.TAX.EXPT.CN,,,,,,,,,...,299100000.0,33500000.0,39100000.0,300000000.0,216400000.0,338800000.0,1145572000.0,773900000.0,5889668.0,
164,Public and publicly guaranteed debt service (%...,DT.TDS.DPPG.XP.ZS,,,,,,,,,...,4.852805,4.857939,3.689619,3.253705,3.786378,3.837185,4.223391,5.204949,5.585951,
234,"Transport services (% of service exports, BoP)",BX.GSR.TRAN.ZS,,,,,,,,,...,16.56677,12.5103,12.41368,11.80547,12.62651,12.03831,10.00412,11.3853,17.86736,17.46052
235,"Goods exports (BoP, current US$)",BX.GSR.MRCH.CD,,,,,,,,,...,28638190000.0,29924490000.0,31736040000.0,34122050000.0,35300770000.0,38681900000.0,38747290000.0,32456150000.0,41815970000.0,51868270000.0


---

#### Productivity

In [185]:
filter_series(raw_data, ['productivity'])

Unnamed: 0,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
141,"Water productivity, total (constant 2015 US$ G...",ER.GDP.FWTL.M3.KD,,,,,,,,,...,4.812359,5.104039,5.438488,5.825354,6.209259,6.663741,7.18897,7.436848,,
