### HDR: Mutual attributes and countries

In [1]:
import numpy as np
import os
import pandas as pd

from pprint import pprint

#### Set input/output folder

In [2]:
data_in  = '../data/HDR_1c_aggregate_attr_by_year'
data_out = '../data/HDR_mutual_attributes_and_countries'

if not os.path.exists(data_out):
    os.mkdir(data_out)

#### Export mutual attributes and countries

In [3]:
years = np.arange(2010, 2016 + 1)
print(years, end='\n\n')

countries  = set()
attributes = set()

dfs = dict()

for year in years:
    dfs[year] = pd.read_csv(f'{data_in}/{year}.csv', index_col='Country')
    
    if len(attributes) == 0:
        for attribute in dfs[year].columns.values:
            attributes.add(attribute)
        
    if len(countries) == 0:
        for country in dfs[year].index.values:
            countries.add(country)

    attributes.intersection_update(dfs[year].columns.values)
    countries.intersection_update(dfs[year].index.values)
    
    print(f'# of attributes after {year}:', len(attributes))
    print(f'# of countries  after {year}:', len(countries), end='\n\n')
    
pd.Series(list(attributes)).sort_values() \
  .to_csv(f'{data_out}/attributes_{years[0]}_{years[-1]}.csv', index=None)
pd.Series(list(countries)).sort_values() \
  .to_csv(f'{data_out}/countries_{years[0]}_{years[-1]}.csv', index=None)

[2010 2011 2012 2013 2014 2015 2016]

# of attributes after 2010: 115
# of countries  after 2010: 195

# of attributes after 2011: 107
# of countries  after 2011: 195

# of attributes after 2012: 107
# of countries  after 2012: 195

# of attributes after 2013: 107
# of countries  after 2013: 195

# of attributes after 2014: 107
# of countries  after 2014: 195

# of attributes after 2015: 105
# of countries  after 2015: 195

# of attributes after 2016: 98
# of countries  after 2016: 195

