# main.ipynb

## modules

### common

In [1]:
import os
import shutil
import json
from uuid import uuid4 # nos servirá para darle un códgio único para cada iteración del notebook
from pathlib import Path # para crear rutas de navegación.

### requirements

In [2]:
import requests # contacto con APIs
import pendulum # para el tipo de datos datatime y más funciones con fechas
import pandas as pd
import fire

## 1. pendulum

In [8]:
# > variables
region_city = 'America/Lima'
#tambien se puede usar region_city :str = 'america/lima' pero es solo para definir el datos cómo cadena

In [10]:
c_date = pendulum.now(region_city).to_date_string()
#Pendulumn es una librería que sirve para manejar datos del tipo datetime en python

print(c_date, '---', rc)

2022-09-07 --- America/Lima


## 2. requests

In [5]:
# > variables
serie = 'PM04902AA'
api_url = f'https://estadisticas.bcrp.gob.pe/estadisticas/series/api/{serie}/json/2020/2022/'
headers = {'Content-Type': 'application/json'} # el header es necesario para conectarse.

In [15]:
# > request + try-except
try:
    response = requests.get(url=api_url, headers=headers, timeout=60) # pedido a API, este nos devolverá un código(200 =bien)
    response.encoding = 'utf-8'# Aclaramos el tipo de enconding
    api_data = json.loads(response.text) # para visualizar lo que estamos pidiendo en formato json
except Exception as e:
   print('[INFO] something went wrong...')
   raise
#

In [19]:
api_data

{'config': {'title': 'Ingreso nacional disponible (millones S/ 2007)',
  'series': [{'name': 'Ingreso nacional disponible (millones S/ 2007) - Renta de Factores',
    'dec': '0'}]},
 'periods': [{'name': '2020', 'values': ['-14748.8775165815']},
  {'name': '2021', 'values': ['-46045.7306272063']}]}

In [18]:
print(json.dumps(api_data, indent=2)) # para mejor formateo de cómo se ve el JSON

{
  "config": {
    "title": "Ingreso nacional disponible (millones S/ 2007)",
    "series": [
      {
        "name": "Ingreso nacional disponible (millones S/ 2007) - Renta de Factores",
        "dec": "0"
      }
    ]
  },
  "periods": [
    {
      "name": "2020",
      "values": [
        "-14748.8775165815"
      ]
    },
    {
      "name": "2021",
      "values": [
        "-46045.7306272063"
      ]
    }
  ]
}


## 3. pandas: dataframes

In [29]:
# > variables
record_path = 'periods'

# > pandas from json
df = pd.json_normalize(api_data, record_path=record_path) # leemos el dataframe usando el parte "periods"

df.head()

Unnamed: 0,name,values
0,2020,[-14748.8775165815]
1,2021,[-46045.7306272063]


In [21]:
# > variables , para cambiar los nombres del data frame

columnas = {'name': 'YEAR', 'values': serie.upper()}
columnas


{'name': 'YEAR', 'values': 'PM04902AA'}

In [30]:
# > pandas rename column
df = df.rename(columns=columnas)

df.head()

Unnamed: 0,YEAR,PM04902AA
0,2020,[-14748.8775165815]
1,2021,[-46045.7306272063]


In [31]:
# > pandas format
df[serie] = df[serie].str[0].astype('float') # el valor que se nos muestra en la columna PM04902AA es una lista

# Así que hacemos este código para sacarlo de la lista y ponerlo cómo numero decimal

df.head()

Unnamed: 0,YEAR,PM04902AA
0,2020,-14748.877517
1,2021,-46045.730627


## 4. pandas: exporting data

In [None]:
# > variables
curr_date = c_date # fecha generada más arriba
exec_uuid = str(uuid4()) # identificador único
s = serie  # que insertamos para el pedido a API 'PM04902AA'

save_path = f'./data/current/{curr_date}/{exec_uuid}/{s}.csv'

print(save_path)

In [None]:
# > split save path
save_folder = '/'.join(save_path.split('/')[:-1])

print(save_folder)

In [None]:
# > make save directory if not exists
Path(save_folder).mkdir(parents=True, exist_ok=True)

In [None]:
# > export data from dataframe to csv
dfc = df.copy(deep=True)
dfc.to_csv(save_path, sep=';', encoding='iso-8859-1', index=False)

## 5. main scenario

### 5.1. multiple solicitudes

In [None]:
# > fixed
series = ['PM04901AA', 'PM04902AA', 'PM04903AA', 'PM04904AA', 'PM04905AA', 'PM04906AA', 'PM04907AA']
exec_uuid = str(uuid4())

for serie in series:
    # > variables
    api_url = f'https://estadisticas.bcrp.gob.pe/estadisticas/series/api/{serie}/json/2020/2022/'
    headers = {'Content-Type': 'application/json'}
    
    # > request + try-except
    try:
        response = requests.get(url=api_url, headers=headers, timeout=60)
        response.encoding = 'utf-8'
        api_data = json.loads(response.text)
    except Exception as e:
        print('[INFO] something went wrong...')
        raise
    
    # > variables
    record_path = 'periods'
    columnas = {'name': 'YEAR', 'values': serie.upper()}
    
    # > pandas from json
    df = pd.json_normalize(api_data, record_path=record_path)
    df = df.rename(columns=columnas)
    df[serie] = df[serie].str[0].astype('float')
    
    # > variables
    curr_date = c_date
    s = serie
    save_path = f'./data/current/{curr_date}/{exec_uuid}/{s}.csv'
    
    # > split save path
    save_folder = '/'.join(save_path.split('/')[:-1])
    
    # > make save directory if not exists
    Path(save_folder).mkdir(parents=True, exist_ok=True)
    
    # > export data from dataframe to csv
    dfc = df.copy(deep=True)
    dfc.to_csv(save_path, sep=';', encoding='iso-8859-1', index=False)

### 5.2. dataframe auxiliar

In [None]:
# > variables
curr_date = c_date ; year = curr_date.split('-')[0]
read_path = '/'.join(save_path.split('/')[:-1])

In [None]:
# > dataframe dummy
tdf = pd.DataFrame({'YEAR': range(1940, int(year)+1), 'LOAD_DATE': curr_date})
tdf = tdf.set_index('YEAR')

tdf.head()

In [None]:
# > elementos a juntar
files = os.listdir(read_path)
for f in files:
    print(f)

In [None]:
# > complete table
files = os.listdir(read_path)
for f in files:
    df = pd.read_csv(f'{read_path}/{f}', sep=';', encoding='iso-8859-1')
    df = df.set_index('YEAR')
    tdf = tdf.join(df)

In [None]:
# > nulos por dummy table
print(tdf.isna().sum())
tdf.head()

In [None]:
# > reset index + drop nulos
tdf = tdf.reset_index()
tdf = tdf.dropna(subset=tdf.columns[2:])

In [None]:
# > output
print(tdf.isna().sum())
tdf.head()

### 5.3. exportar datos

In [None]:
# > variables
export_path = f'./data/output/{curr_date}/output_{exec_uuid}.csv'
save_folder = '/'.join(export_path.split('/')[:-1])
sep = '|'
encoding='iso-8859-1'

In [None]:
# > export full table
Path(save_folder).mkdir(parents=True, exist_ok=True)
df.to_csv(export_path, sep=sep, encoding=encoding, index=False)

In [None]:
# > move current to historic
current_path = read_path
historic_path = read_path.replace('current', 'historic')
shutil.move(current_path, historic_path)