In [258]:
# Import python libraries
import wbdata as wb
import pandas as pd
import numpy as np
import datetime as dt
import docx

# Set parameters
country = 'RUS'

# Create a blank Word document where the output will be exported
doc = docx.Document()

In [259]:
# Generate Country Overview table
country_overview = {'SP.POP.TOTL': 'Population (million)',
                    'SP.POP.GROW': 'Population growth (annual % change)',
                    'AG.LND.ARBL.ZS': 'Arable land (%)',
                    'AG.LND.TOTL.K2': 'Land area (sq. km.)'}
data_date = dt.datetime(2019, 1, 1), dt.datetime(2016, 1, 1)

df_country_overview = wb.get_dataframe(country_overview, country=country, data_date=data_date)
df_country_overview_grouped = pd.DataFrame(df_country_overview.transpose().stack()).groupby(level=0)
df_country_overview_right = df_country_overview_grouped.first()
df_country_overview_left = pd.DataFrame(df_country_overview.transpose().stack()).reset_index(level=1)
df_country_overview_joined = df_country_overview_left.join(df_country_overview_right, rsuffix='_r')
df_country_overview = df_country_overview_joined.groupby(level=0).first()
df_country_overview = df_country_overview[['0', 'date']]
df_country_overview = df_country_overview.reindex(['Population (million)',
                                                   'Population growth (annual % change)',
                                                   'Land area (sq. km.)',
                                                   'Arable land (%)'])
df_country_overview.reset_index(inplace=True)
df_country_overview.columns = ['', 'Value', 'Year']
df_country_overview.iloc[0, 1] = df_country_overview.iloc[0, 1] / 1e6
df_country_overview = df_country_overview.round()

In [260]:
# Generate GDP and Employment Structure table
production_structure = {'NV.AGR.TOTL.ZS': 'Agriculture',
                        'NV.IND.TOTL.ZS': 'Industry',
                        'NV.IND.MANF.ZS': 'Manufacturing',
                        'NV.SRV.TOTL.ZS': 'Services'}
employment_share = {'SL.AGR.EMPL.ZS': 'Agriculture',
                    'SL.IND.EMPL.ZS': 'Industry',
                    'SL.SRV.EMPL.ZS': 'Services'}
gdp_by_expenditure = {'NE.CON.PRVT.ZS': 'Private consumption',
                      'NE.CON.GOVT.ZS': 'Government consumption',
                      'NE.GDI.TOTL.ZS': 'Investments', 
                      'NE.EXP.GNFS.ZS': 'Exports',
                      'NE.IMP.GNFS.ZS': 'Imports'}
data_date = dt.datetime(2019, 1, 1)

# Generate the production and employment structure
df_production_structure = wb.get_dataframe(production_structure, country=country, data_date=data_date)
df_employment_share = wb.get_dataframe(employment_share, country=country, data_date=data_date)
df_expenditure = wb.get_dataframe(gdp_by_expenditure, country=country, data_date=data_date)

df_production_structure['Agriculture only'] = np.nan
df_production_structure['Forestry and Fishing'] = np.nan
df_production_structure['Construction'] = np.nan
df_production_structure['Trade and Transport'] = np.nan
df_production_structure['Finance and Business'] = np.nan 

df_production_structure = df_production_structure[['Agriculture', 'Agriculture only', 
                                                   'Forestry and Fishing', 'Industry', 
                                                   'Manufacturing', 'Construction',
                                                   'Services', 'Trade and Transport', 
                                                   'Finance and Business']]
df_production_structure = df_production_structure.transpose()
df_employment_share = df_employment_share.transpose() 
df_production_structure = df_production_structure.join(df_employment_share, rsuffix='_e')
df_production_structure.reset_index(inplace=True)

# Generate the expenditure structure
df_expenditure['Private'] = np.nan
df_expenditure['Public'] = np.nan
df_expenditure['Net Exports'] = df_expenditure['Exports'] - df_expenditure['Imports']
df_expenditure['Statistical discrepancy'] = np.nan
df_expenditure = df_expenditure[['Private consumption', 'Government consumption',
                                 'Investments', 'Private', 'Public', 
                                 'Net Exports', 'Exports', 'Imports',
                                 'Statistical discrepancy']]
df_expenditure = df_expenditure.transpose()
df_expenditure.reset_index(inplace=True)

df_structure = df_production_structure.join(df_expenditure, rsuffix='_ex')
df_structure.columns = ['', f'Production Structure in {data_date.year}',
                        f'Employment Structure in {data_date.year}',
                        '', f'Expenditure Structure in {data_date.year}']
df_structure = df_structure.round(1).fillna('...')

In [261]:
# Generate Trade tables
# Generate exports destination table
exports_destination = pd.read_csv('Destinations-2018.csv')
total_exports = exports_destination['Trade Value'].sum()
exports_destination['Exports Share'] = exports_destination['Trade Value'] / total_exports * 100
exports_destination.sort_values(by='Exports Share', ascending=False, inplace=True)
exports_destination.reset_index(drop=True, inplace=True)
df_exports_destination = exports_destination.iloc[:5, [3, 6]]

# Generate imports origin table
imports_origin = pd.read_csv('Origins-2018.csv')
total_imports = imports_origin['Trade Value'].sum()
imports_origin['Imports Share'] = imports_origin['Trade Value'] / total_imports * 100
imports_origin.sort_values(by='Imports Share', ascending=False, inplace=True)
imports_origin.reset_index(drop=True, inplace=True)
df_imports_origin = imports_origin.iloc[:5, [3, 6]]

# Join exports destination and imports origin tables
df_trade_country = df_exports_destination.join(df_imports_origin, rsuffix='_r')
df_trade_country = df_trade_country.round(1)
df_trade_country.columns = ['Main destinations of exports in 2018',
                            '% of total',
                            'Main destinations of imports in 2018',
                            '% of total']

# Generate exports items table
exports_items = pd.read_csv('Exports-2018.csv')
exports_items['Exports Share'] = exports_items['Trade Value'] / total_exports * 100
exports_items.sort_values(by='Exports Share', ascending=False, inplace=True)
exports_items.reset_index(drop=True, inplace=True)
df_exports_items = exports_items.iloc[:5, [5, 7]]

# Generate imports items table
imports_items = pd.read_csv('Imports-2018.csv')
imports_items['Imports Share'] = imports_items['Trade Value'] / total_imports * 100
imports_items.sort_values(by='Imports Share', ascending=False, inplace=True)
imports_items.reset_index(drop=True, inplace=True)
df_imports_items = imports_items.iloc[:5, [5, 7]]

# Join exports and imports items tables
df_trade_items = df_exports_items.join(df_imports_items, rsuffix='_r')
df_trade_items = df_trade_items.round(1)
df_trade_items.columns = ['Principal exports in 2018',
                  '% of total',
                  'Principal imports in 2018',
                  '% of total']

In [262]:
# Generate Projections table
projections = {'NY.GDP.MKTP.KD.ZG': 'GDP growth (%)',
               'FP.CPI.TOTL.ZG': 'Inflation (annual average, %)',
               'BN.CAB.XOKA.GD.ZS': 'Current account balance (% of GDP)'}
data_date = dt.datetime(2014, 1, 1), dt.datetime(2021, 1, 1)

df_projections = wb.get_dataframe(projections, country=country, data_date=data_date)
df_projections = df_projections.reset_index().sort_values(by='date').set_index('date').transpose()
df_projections.reset_index(inplace=True)
df_projections.columns = ['', '2014', '2015', '2016', '2017', '2018', '2019', '2020f']
df_projections['2021f'] = np.nan
df_projections = df_projections.round(1).fillna('...')


In [263]:
# Generate Poverty Indicators table
poverty_indicators = {'SI.POV.DDAY': 'Population living less than $1.9 a day (%)',
                      'SI.POV.NAHC': 'Population below national poverty line (%)',
                      'SH.STA.MALN.ZS': 'Underweight children under 5 years old (%)',
                      'SE.PRM.NENR': 'Net enrollment ratio in primary education, Total (%)',
                      'SE.PRM.NENR.FE': 'Net enrollment ratio in primary education, Female (%)',
                      'SE.PRM.NENR.MA': 'Net enrollment ratio in primary education, Male (%)',
                      'SE.ADT.LITR.ZS': 'Adult literacy rate (%)',
                      'SH.STA.MMRT': 'Maternal mortality ratio (modeled estimate, per 100,000 live births)',
                      'SH.STA.MMRT.NE': 'Maternal mortality ratio (national estimate, per 100,000 live births)',
                      'SP.DYN.IMRT.IN': 'Infant mortality rate (below 1 year/per 1,000 live births)',
                      'SP.DYN.LE00.IN': 'Life expectancy at birth (years)',
                      'EN.ATM.CO2E.PC': 'CO2 emissions (metric tons per capita)',
                      'SH.H2O.BASW.ZS': 'People using at least basic drinking services (% of population)',
                      'SH.STA.BASS.ZS': 'People using at least basic sanitation services (% of population)'}
df_poverty = wb.get_dataframe(poverty_indicators, country=[f'{country}', 'OED'])

df_poverty_l = pd.DataFrame(df_poverty.stack(), columns=['value'])
df_poverty_l = df_poverty_l.reset_index().groupby(['country', 'level_2']).first()
df_poverty_l.reset_index(inplace=True)
df_poverty_l['date'] = pd.to_numeric(df_poverty_l['date'])
df_poverty_l = df_poverty_l.pivot_table(index='level_2', columns='country', values='value')
df_poverty_l.reset_index(inplace=True)

df_poverty_r = pd.DataFrame(df_poverty.stack(), columns=['value'])
df_poverty_r = df_poverty_r.reset_index().groupby(['country', 'level_2']).first()
df_poverty_r.reset_index(inplace=True)
df_poverty_r['date'] = pd.to_numeric(df_poverty_r['date'])
df_poverty_r = df_poverty_r.pivot_table(index='level_2', columns='country', values='date')
df_poverty_r.reset_index(inplace=True)

df_poverty = df_poverty_l.join(df_poverty_r, rsuffix=' Year')
df_poverty = df_poverty.iloc[:, [0, 2, 5, 1, 4]]
df_poverty.columns = ['Indicator', f'{country}', 'Year', 'OECD', 'Year']
df_poverty = df_poverty.round(1)
df_poverty.iloc[:, 2] = df_poverty.iloc[:, 2].apply(lambda x : "{:.0f}".format(x))
df_poverty.iloc[:, 4] = df_poverty.iloc[:, 4].apply(lambda x : "{:.0f}".format(x))


df_poverty = df_poverty.set_index('Indicator').reindex(['Population living less than $1.9 a day (%)',
                                                        'Population below national poverty line (%)',
                                                        'Underweight children under 5 years old (%)',
                                                        'Net enrollment ratio in primary education, Total (%)',
                                                        'Net enrollment ratio in primary education, Female (%)',
                                                        'Net enrollment ratio in primary education, Male (%)',
                                                        'Adult literacy rate (%)',
                                                        'Maternal mortality ratio (modeled estimate, per 100,000 live births)',
                                                        'Maternal mortality ratio (national estimate, per 100,000 live births)',
                                                        'Infant mortality rate (below 1 year/per 1,000 live births)',
                                                        'Life expectancy at birth (years)',
                                                        'CO2 emissions (metric tons per capita)',
                                                        'People using at least basic drinking services (% of population)',
                                                        'People using at least basic sanitation services (% of population)'])
df_poverty.reset_index(inplace=True)
df_poverty = df_poverty.fillna('...')

In [264]:
# Export Country Overview table to Word
doc.add_heading('Country Overview')
table1 = doc.add_table(df_country_overview.shape[0]+1, df_country_overview.shape[1])
for j in range(df_country_overview.shape[-1]):
    table1.cell(0, j).text = df_country_overview.columns[j]
for i in range(df_country_overview.shape[0]):
    for j in range(df_country_overview.shape[-1]):
        table1.cell(i+1,j).text = str(df_country_overview.values[i,j])
table1.style = 'Table Grid'

# Export the Economic Structure table to Word
doc.add_heading('Economic Structure')
table2 = doc.add_table(df_structure.shape[0]+1, df_structure.shape[1])
for j in range(df_structure.shape[-1]):
    table2.cell(0, j).text = df_structure.columns[j]
for i in range(df_structure.shape[0]):
    for j in range(df_structure.shape[-1]):
        table2.cell(i+1,j).text = str(df_structure.values[i,j])
table2.style = 'Table Grid'

# Export the Trade Country table to Word
doc.add_heading('Trade Structure: Exports destinations and Import Origins')
table3 = doc.add_table(df_trade_country.shape[0]+1, df_trade_country.shape[1])
for j in range(df_trade_country.shape[-1]):
    table3.cell(0, j).text = df_trade_country.columns[j]
for i in range(df_trade_country.shape[0]):
    for j in range(df_trade_country.shape[-1]):
        table3.cell(i+1,j).text = str(df_trade_country.values[i,j])
table3.style = 'Table Grid'

# Export the Trade Items table to Word
doc.add_heading('Trade Structure: Exports and Imports Products')
table4 = doc.add_table(df_trade_items.shape[0]+1, df_trade_items.shape[1])
for j in range(df_trade_items.shape[-1]):
    table4.cell(0, j).text = df_trade_items.columns[j]
for i in range(df_trade_items.shape[0]):
    for j in range(df_trade_items.shape[-1]):
        table4.cell(i+1,j).text = str(df_trade_items.values[i,j])
table4.style = 'Table Grid'

#Export the Projections table
doc.add_heading('Selected Economic Indicators and Projections')
table5 = doc.add_table(df_projections.shape[0]+1, df_projections.shape[1])
for j in range(df_projections.shape[-1]):
    table5.cell(0, j).text = df_projections.columns[j]
for i in range(df_projections.shape[0]):
    for j in range(df_projections.shape[-1]):
        table5.cell(i+1,j).text = str(df_projections.values[i,j])
table5.style = 'Table Grid'

#Export the Poverty Indicators table
doc.add_heading('Key Poverty and Social Indicators')
table6 = doc.add_table(df_poverty.shape[0]+1, df_poverty.shape[1])
for j in range(df_poverty.shape[-1]):
    table6.cell(0, j).text = df_poverty.columns[j]
for i in range(df_poverty.shape[0]):
    for j in range(df_poverty.shape[-1]):
        table6.cell(i+1,j).text = str(df_poverty.values[i,j])
table6.style = 'Table Grid'

doc.save(f'./{country}_CIN.docx')