# IMF's world economic outlook, Oct 2022

[Link](https://www.imf.org/en/Publications/WEO/weo-database/2022/October)

In [1]:
import pandas as pd
import numpy as np

In [2]:
main = pd.read_csv('WEOOct2022all.csv')

In [3]:
main

Unnamed: 0,WEO Country Code,ISO,WEO Subject Code,Country,Subject Descriptor,Subject Notes,Units,Scale,Country/Series-specific Notes,1980,...,2019,2020,2021,2022,2023,2024,2025,2026,2027,Estimates Start After
0,512,AFG,NGDP_R,Afghanistan,"Gross domestic product, constant prices",Expressed in billions of national currency uni...,National currency,Billions,Source: National Statistics Office Latest actu...,,...,1319.90,1288.87,,,,,,,,2020.0
1,512,AFG,NGDP_RPCH,Afghanistan,"Gross domestic product, constant prices",Annual percentages of constant price GDP are y...,Percent change,,"See notes for: Gross domestic product, consta...",,...,3.912,-2.351,,,,,,,,2020.0
2,512,AFG,NGDP,Afghanistan,"Gross domestic product, current prices",Expressed in billions of national currency uni...,National currency,Billions,Source: National Statistics Office Latest actu...,,...,1469.60,1547.29,,,,,,,,2020.0
3,512,AFG,NGDPD,Afghanistan,"Gross domestic product, current prices",Values are based upon GDP in national currency...,U.S. dollars,Billions,"See notes for: Gross domestic product, curren...",,...,18.876,20.136,,,,,,,,2020.0
4,512,AFG,PPPGDP,Afghanistan,"Gross domestic product, current prices",These data form the basis for the country weig...,Purchasing power parity; international dollars,Billions,"See notes for: Gross domestic product, curren...",,...,81.873,80.912,,,,,,,,2020.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8621,698,ZWE,NGDP_FY,Zimbabwe,Gross domestic product corresponding to fiscal...,Gross domestic product corresponding to fiscal...,National currency,Billions,Source: Ministry of Finance or Treasury Latest...,,...,187.419,1189.14,2911.11,14408.63,41965.70,72190.13,97188.08,113996.38,131931.52,2019.0
8622,698,ZWE,BCA,Zimbabwe,Current account balance,Current account is all transactions other than...,U.S. dollars,Billions,Source: Reserve Bank of Zimbabwe and Ministry ...,-0.301,...,0.92,0.678,0.348,0.215,0.096,0.149,0.206,0.237,0.201,2020.0
8623,698,ZWE,BCA_NGDPD,Zimbabwe,Current account balance,Current account is all transactions other than...,Percent of GDP,,"See notes for: Gross domestic product, curren...",,...,4.003,2.926,1.059,0.561,0.257,0.384,0.531,0.612,0.498,2019.0
8624,,,,,,,,,,,...,,,,,,,,,,


Set the Countries

In [33]:
countries = main[['WEO Country Code','ISO', 'Country']].drop_duplicates()
# get id column
countries['country_id'] = countries['WEO Country Code'] + countries['ISO']
countries = countries.drop(['WEO Country Code', 'ISO'], axis=1).dropna()
# rename 
countries = countries.rename(columns={'Country': 'country'})
# reorder
countries = countries[['country_id', 'country']]

In [34]:
countries

Unnamed: 0,country_id,country
0,512AFG,Afghanistan
44,914ALB,Albania
88,612DZA,Algeria
132,171AND,Andorra
176,614AGO,Angola
...,...,...
8404,582VNM,Vietnam
8448,487WBG,West Bank and Gaza
8492,474YEM,Yemen
8536,754ZMB,Zambia


Set the Topics

In [16]:
subjects = main[['WEO Subject Code'	, 'Subject Descriptor', 'Subject Notes', 'Units', 'Scale']].drop_duplicates()
subjects = subjects.rename(columns={
                            'WEO Subject Code': 'subject_id', 
                            'Subject Descriptor':'descriptor', 
                            'Subject Notes':'notes', 
                            'Units': 'units', 
                            'Scale':'scale'})
subjects = subjects.drop([8624], axis=0)

In [17]:
subjects

Unnamed: 0,subject_id,descriptor,notes,units,scale
0,NGDP_R,"Gross domestic product, constant prices",Expressed in billions of national currency uni...,National currency,Billions
1,NGDP_RPCH,"Gross domestic product, constant prices",Annual percentages of constant price GDP are y...,Percent change,
2,NGDP,"Gross domestic product, current prices",Expressed in billions of national currency uni...,National currency,Billions
3,NGDPD,"Gross domestic product, current prices",Values are based upon GDP in national currency...,U.S. dollars,Billions
4,PPPGDP,"Gross domestic product, current prices",These data form the basis for the country weig...,Purchasing power parity; international dollars,Billions
5,NGDP_D,"Gross domestic product, deflator",The GDP deflator is derived by dividing curren...,Index,
6,NGDPRPC,"Gross domestic product per capita, constant pr...",GDP is expressed in constant national currency...,National currency,Units
7,NGDPRPPPPC,"Gross domestic product per capita, constant pr...",GDP is expressed in constant international dol...,Purchasing power parity; 2017 international do...,Units
8,NGDPPC,"Gross domestic product per capita, current prices",GDP is expressed in current national currency ...,National currency,Units
9,NGDPDPC,"Gross domestic product per capita, current prices",GDP is expressed in current U.S. dollars per p...,U.S. dollars,Units


In [52]:
# get country code 
ser = main.rename(columns={'Country':'country'}).merge(countries, on='country', how='inner')
# rename columns
ser = ser.rename(columns={'id': 'country_id', 'WEO Subject Code':'subject_id','Country/Series-specific Notes':'notes', 'Estimates Start After':'estimates_start' })
# drop unecessary columns 
ser = ser.drop(['WEO Country Code','ISO', 'country', 'Subject Descriptor', 'Subject Notes', 'Units', 'Scale'], axis=1)
# column reorder
start_cols = ['subject_id', 'country_id', 'estimates_start', 'notes']
ser = ser[start_cols + [c for c in ser.columns if c not in start_cols]]
ser


Unnamed: 0,subject_id,country_id,estimates_start,notes,1980,1981,1982,1983,1984,1985,...,2018,2019,2020,2021,2022,2023,2024,2025,2026,2027
0,NGDP_R,512AFG,2020.0,Source: National Statistics Office Latest actu...,,,,,,,...,1270.22,1319.90,1288.87,,,,,,,
1,NGDP_RPCH,512AFG,2020.0,"See notes for: Gross domestic product, consta...",,,,,,,...,1.189,3.912,-2.351,,,,,,,
2,NGDP,512AFG,2020.0,Source: National Statistics Office Latest actu...,,,,,,,...,1327.69,1469.60,1547.29,,,,,,,
3,NGDPD,512AFG,2020.0,"See notes for: Gross domestic product, curren...",,,,,,,...,18.401,18.876,20.136,,,,,,,
4,PPPGDP,512AFG,2020.0,"See notes for: Gross domestic product, curren...",,,,,,,...,77.406,81.873,80.912,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8619,GGXWDG,698ZWE,2019.0,Source: Ministry of Finance or Treasury Latest...,,,,,,,...,18.842,174.699,1218.76,1947.91,13347.46,27224.17,41501.09,52964.56,60566.34,67806.72
8620,GGXWDG_NGDP,698ZWE,2019.0,See notes for: General government gross debt ...,,,,,,,...,51.001,93.213,102.491,66.913,92.635,64.872,57.489,54.497,53.13,51.395
8621,NGDP_FY,698ZWE,2019.0,Source: Ministry of Finance or Treasury Latest...,,,,,,,...,36.945,187.419,1189.14,2911.11,14408.63,41965.70,72190.13,97188.08,113996.38,131931.52
8622,BCA,698ZWE,2020.0,Source: Reserve Bank of Zimbabwe and Ministry ...,-0.301,-0.674,-0.748,-0.504,-0.171,-0.153,...,-1.38,0.92,0.678,0.348,0.215,0.096,0.149,0.206,0.237,0.201


In [75]:
ser_long = pd.melt(
    ser, 
    id_vars=['subject_id', 'country_id', 'estimates_start', 'notes'],
     var_name='year',
    value_name='value'
    )

In [79]:
ser_long['estimates_start'] = ser_long['estimates_start'].fillna(0).astype(int)
ser_long['value'] = ser_long['value'].str.replace(',','') 
ser_long['value'] = ser_long['value'].str.replace('--','') 
ser_long['value'] = pd.to_numeric(ser_long['value'])

In [80]:
ser_long

Unnamed: 0,subject_id,country_id,estimates_start,notes,year,value
0,NGDP_R,512AFG,2020,Source: National Statistics Office Latest actu...,1980,
1,NGDP_RPCH,512AFG,2020,"See notes for: Gross domestic product, consta...",1980,
2,NGDP,512AFG,2020,Source: National Statistics Office Latest actu...,1980,
3,NGDPD,512AFG,2020,"See notes for: Gross domestic product, curren...",1980,
4,PPPGDP,512AFG,2020,"See notes for: Gross domestic product, curren...",1980,
...,...,...,...,...,...,...
413947,GGXWDG,698ZWE,2019,Source: Ministry of Finance or Treasury Latest...,2027,67806.720
413948,GGXWDG_NGDP,698ZWE,2019,See notes for: General government gross debt ...,2027,51.395
413949,NGDP_FY,698ZWE,2019,Source: Ministry of Finance or Treasury Latest...,2027,131931.520
413950,BCA,698ZWE,2020,Source: Reserve Bank of Zimbabwe and Ministry ...,2027,0.201


Count lengths for VARCHARS 

In [42]:
for c in list(subjects.columns):
    print(c + ' - ' + str(max(subjects[[c]].astype('str').applymap(lambda x: len(x)).max())))

subject_id - 12
descriptor - 82
notes - 1313
units - 50
scale - 8


In [44]:
for c in list(ser_long.columns):
    print(c + ' - ' + str(max(ser_long[[c]].astype('str').applymap(lambda x: len(x)).max())))

subject_id - 12
country_id - 6
estimates_start - 6
notes - 3264
year - 4
value - 24


In [43]:
for c in list(countries.columns):
    print(c + ' - ' + str(max(countries[[c]].astype('str').applymap(lambda x: len(x)).max())))

country_id - 6
country - 32


In [81]:
ser_long.to_csv('csv_files/series.csv', index=True, index_label='id')

In [47]:
countries.to_csv('csv_files/countries.csv', index=False)

In [48]:
subjects.to_csv('csv_files/subjects.csv', index=False)