In [1]:
import pandas as pd, altair as alt

In [2]:
# import metadata staging
df = pd.read_csv('/Users/joshhellings/Documents/ECO/api/metadata-staging.csv')

In [3]:
len(df)

211

In [4]:
print('Unique Countries: ', df['country'].nunique())
print('Unique Series: ', df['ecoSeries'].nunique())
df['ecoSeries'].value_counts()

Unique Countries:  24
Unique Series:  27


infl     23
unem     21
popu     20
impo     19
expo     19
grow     18
empl     14
debt     12
gdpp     11
gdpa     10
prod      7
inac      6
wage      6
empd      6
ineq      5
gdp       2
part      2
inter     1
conf      1
petro     1
capit     1
by10      1
undr      1
house     1
fert      1
esta      1
umpd      1
Name: ecoSeries, dtype: int64

In [24]:
# plot the number of each series on a chart
alt.Chart(df).mark_bar().encode(
    alt.X('ecoSeries:N', sort='-y', axis=alt.Axis(labelAngle=-30, labelOffset=5)),
    alt.Y('count(ecoSeries):Q', title='Country Count', axis=alt.Axis(labelFontSize=11, zindex=1, gridOpacity=0.3)),
)

In [6]:
# find the country which doesn't have an inflation `infl` series.
# 1. create list of countries with inflation series
# 2. create list of all countries
# 3. find the difference

infl_countries = df[df['ecoSeries'] == 'infl']['country'].unique()

all_countries = df['country'].unique()

set(all_countries) - set(infl_countries)

{'bra'}

In [7]:
# `by10`: 10-year bond yield

# 
# `capit`: Capital Formation, gross fixed (Iran only)
# `petro`: Petrochemical Industry Output (Iran only)
# `conf`: Consumer Confidence (Mexicon only)
# `esta`: Real estate investment (China only)
# `inter`: Interest rate, base rate (Mexico only)

df[df['ecoSeries'] == 'inter']

Unnamed: 0,country_full,country,ecoSeries,ecoAPI,localAPI,localSeries,localDataset,Title,SubTitle,API_base,...,description,units,date,field,multiplier,date_keys,value_keys,filter,orient,use_cache
133,Mexico,mex,inter,https://api.economicsobservatory.com/mex/inter,https://en.www.inegi.org.mx/app/api/indicadore...,182022,BIE,Interest rate,"Base rate, % | Source: INEGI through ECO API",https://en.www.inegi.org.mx/app/api/indicadore...,...,Central Bank Interest Rates,%,%,,1,,,,,0


In [8]:
df[df['ecoSeries'] == 'gdp']['description'].values

array([nan,
       'Seasonally adjusted Gross Domestic Product at current prices (EUR mn)Ã”Ã¸Î©'],
      dtype=object)

In [9]:
# `gdpa`: constant 2015 US$
df[df['ecoSeries'] == 'gdpa'].head(2)

Unnamed: 0,country_full,country,ecoSeries,ecoAPI,localAPI,localSeries,localDataset,Title,SubTitle,API_base,...,description,units,date,field,multiplier,date_keys,value_keys,filter,orient,use_cache
6,Bangladesh,bgd,gdpa,https://api.economicsobservatory.com/bgd/gdpa,https://raw.githubusercontent.com/EconomicsObs...,,,GDP,2015 USD | Source: BBS (WB) through ECO API,https://raw.githubusercontent.com/EconomicsObs...,...,GDP (2015 US$),constant 2015 US$,,,1,,,,,1
38,DR Congo,cod,gdpa,https://api.economicsobservatory.com/cod/gdpa,https://raw.githubusercontent.com/EconomicsObs...,,,GDP,2015 USD | Source: World Bank through ECO API,https://raw.githubusercontent.com/EconomicsObs...,...,GDP (2015 US$),constant 2015 US$,,,1,,,,,1


In [12]:
# Find number of series grouping by country
df_count = df.groupby('country')['ecoSeries'].nunique().sort_values(ascending=False)
df_count

country
gbr    12
idn    12
ind    10
jpn    10
usa    10
can    10
tur    10
cod    10
tha    10
eth    10
pak    10
phl     9
vnm     9
bgd     9
fra     9
egy     9
nga     8
deu     8
mex     7
rus     7
chn     7
irn     6
bra     5
aus     4
Name: ecoSeries, dtype: int64

In [17]:
pd.DataFrame(df_count)

Unnamed: 0_level_0,ecoSeries
country,Unnamed: 1_level_1
gbr,12
idn,12
ind,10
jpn,10
usa,10
can,10
tur,10
cod,10
tha,10
eth,10


In [16]:
# save df_count to csv
df_count.to_csv('series-count.csv')

In [11]:
# count number of `use cache` series
df[df['use_cache'] == True]['ecoSeries'].count()

156