In [1]:
import numpy as np
import pandas as pd
import pickle
import io

from scipy.stats import rankdata

## Constantes

In [2]:
# mismas constantes que en notebook 1_...
ANIO = 2018
VENTANA = 5
DATASETS_DIR = './datasets/'

## Lee datasets

In [3]:
green = pd.read_csv(DATASETS_DIR + 'green_subset_RPOL_list.csv')
green.rename(columns={'Green HS92 codes': 'HS92'}, inplace = True)
green_codes = green.HS92.to_numpy()
green_codes = green_codes.astype(str)
green_codes.shape, green_codes.dtype

((293,), dtype('<U21'))

In [4]:
renewables = pd.read_csv(DATASETS_DIR + 'RE_subset_RPOL_list.csv')
renewables.rename(columns={'RE HS92 codes': 'HS92'}, inplace = True)
renew_codes = renewables.HS92.to_numpy()
renew_codes = renew_codes.astype(str)
renew_codes.shape, renew_codes.dtype

((57,), dtype('<U21'))

In [5]:
green.columns, renewables.columns

(Index(['HS92', 'HS92 description'], dtype='object'),
 Index(['HS92', 'HS92 description'], dtype='object'))

In [6]:
with open(f'serializables/locations_{ANIO}_{VENTANA}.pkl','rb') as f:
    locations = pickle.load(f)

paises = pd.read_csv(DATASETS_DIR + "lista_paises_iso3.csv")
paises.rename(columns={'0': 'Name'}, inplace=True)

In [7]:
with open(f'serializables/products_{ANIO}_{VENTANA}.pkl','rb') as f:
    products = pickle.load(f)

In [8]:
with open(f'serializables/proximity_{ANIO}_{VENTANA}.pkl','rb') as f:
    proximity = pickle.load(f)

In [9]:
with open(f'serializables/proximity_c_{ANIO}_{VENTANA}.pkl','rb') as f:
    proximity_c = pickle.load(f)

In [10]:
with open(f'serializables/density_{ANIO}_{VENTANA}.pkl','rb') as f:
    density_cp = pickle.load(f)

In [11]:
with open(f'serializables/export_values_{ANIO}_{VENTANA}.pkl','rb') as f:
    export_values = pickle.load(f)

In [12]:
with open(f'serializables/Mcp_{ANIO}_{VENTANA}.pkl','rb') as f:
    Mcp = pickle.load(f)

In [13]:
with open(f'serializables/eci_{ANIO}_{VENTANA}.pkl','rb') as f:
    eci = pickle.load(f)

In [14]:
with open(f'serializables/pci_{ANIO}_{VENTANA}.pkl','rb') as f:
    pci = pickle.load(f)

In [15]:
with open(f'serializables/RCA_{ANIO}_{VENTANA}.pkl','rb') as f:
    RCA = pickle.load(f)

### Toma indices de productos verdes y de energias renovables. También toma indices de paises regionales y desarrollados

In [16]:
green_idx = np.searchsorted(products, green_codes)
green_idx.shape, green_idx.dtype

((293,), dtype('int64'))

In [17]:
renew_idx = np.searchsorted(products, renew_codes)
renew_idx.shape, renew_idx.dtype

((57,), dtype('int64'))

In [18]:
regional_names = {
    'BRA': 'Brasil', 'MEX': 'México', 'COL': 'Colombia', 'ARG': 'Argentina', 'PER': 'Perú',
    'VEN': 'Venezuela', 'CHL': 'Chile', 'ECU': 'Ecuador', 'BOL': 'Bolivia',
    'PRY': 'Paraguay', 'URY': 'Uruguay'
}
regional_codes = list(regional_names.keys())
regional_codes

['BRA', 'MEX', 'COL', 'ARG', 'PER', 'VEN', 'CHL', 'ECU', 'BOL', 'PRY', 'URY']

"Regional" seria paises de America del Sur + Mexico

In [19]:
developed_codes = ['CHN', 'ITA', 'ESP', 'FRA', 'DEU', 'TUR', 'POL', 'AUT',
                   'PRT', 'IND', 'USA', 'JPN', 'CZE', 'DNK', 'GBR']

## Calculo GCI

In [20]:
pci_g = pci[green_idx]

In [21]:
# Paper de Mealy pide normalizar todos los PCI verdes para que queden entre 0 y 1

pci_g = pci_g - min(pci_g)
pci_g = pci_g / max(pci_g)

In [22]:
gci = np.matmul(Mcp[:, green_idx], pci_g)

In [23]:
gci = (gci - gci.mean())/gci.std()

In [24]:
gci.shape

(122,)

## Calculo GCP

In [25]:
non_RCA = np.ones((len(locations), len(green_idx)), dtype=np.int64) - Mcp[:, green_idx]
non_RCA = non_RCA.astype(float)
denominator = non_RCA.sum(axis = 1)

In [26]:
pci_g = pci[green_idx]

In [27]:
# Normalizacion entre 0 y 1

pci_g = pci_g - min(pci_g)
pci_g = pci_g / max(pci_g)

In [28]:
numerator = np.matmul(non_RCA * density_cp[:, green_idx], pci_g)
numerator.shape, numerator.dtype

((122,), dtype('float64'))

In [29]:
gcp = numerator / denominator

In [30]:
gcp = (gcp - gcp.mean())/gcp.std()

## Calculo Complexity outlook index (COI)

In [31]:
# calculo distancias (ver Atlas of Complexity Economics, Hausmann et al 2014, technical box 5.4, page 62)

non_RCA_all = np.ones((len(locations), len(products)), dtype=np.int64) - Mcp
non_RCA_all = non_RCA_all.astype(float)

distance_cp = np.matmul(non_RCA_all, proximity)

for i in range(len(products)):
    distance_cp[:, i] = distance_cp[:, i]/proximity[i, :].sum()

In [32]:
inv_distance_cp = np.ones((len(locations), len(products)), float) - distance_cp

In [33]:
coi = np.matmul(non_RCA_all * inv_distance_cp, pci)

In [34]:
coi = (coi - coi.mean())/coi.std()

## Crea DataFrame para indices de paises

In [35]:
df_countries = pd.DataFrame({
    'Country_code': locations, 
    'ECI': eci,
    'GCI': gci,
    'GCP': gcp,
    'COI': coi
})

# agrega columnas de ranking mundial para cada index
for idx in ['ECI', 'GCI', 'GCP', 'COI']:
    df_countries.sort_values(idx, ascending=False, inplace=True)
    df_countries[idx + '_rank'] = np.array(list(range(1, len(locations)+1)))

In [36]:
# agrega nombre pais
df_countries['Country'] = ''
for row in df_countries.iterrows():
    df_countries.loc[df_countries['Country_code']==row[1]['Country_code'], 'Country'] = paises.loc[paises.ISO3 == row[1]['Country_code'], 'Name'].values[0]

In [37]:
# ordena dataframe por columna "ordena_por"
ordena_por = 'GCI'

df_countries.sort_values(ordena_por, ascending=False, inplace=True)

In [38]:
df_countries = df_countries[['Country', 'Country_code', 'ECI', 'ECI_rank', 'GCI', 'GCI_rank', 
                             'GCP', 'GCP_rank', 'COI', 'COI_rank']].copy()

In [39]:
df_countries.to_csv(DATASETS_DIR + f'df_countries_{ANIO-VENTANA+1}_{ANIO}.csv')

In [40]:
df_countries.loc[df_countries.Country_code == 'ARG']

Unnamed: 0,Country,Country_code,ECI,ECI_rank,GCI,GCI_rank,GCP,GCP_rank,COI,COI_rank
3,Argentina,ARG,0.197288,49,-0.593356,73,-0.403258,68,0.310957,42
