# main.ipynb

## modules

### common

In [1]:
import os
import shutil
import json
from uuid import uuid4
from pathlib import Path

### requirements

In [2]:
import requests
import pendulum
import pandas as pd
import fire

## 1. pendulum

In [None]:
# > variables
region_city: str = 'America/Lima'

In [None]:
c_date = pendulum.now(rc := region_city).to_date_string()

print(c_date, '---', rc)

## 2. requests

In [None]:
# > variables
serie = 'PM04902AA'
api_url = f'https://estadisticas.bcrp.gob.pe/estadisticas/series/api/{serie}/json/2020/2022/'
headers = {'Content-Type': 'application/json'}

In [None]:
# > request + try-except
try:
    response = requests.get(url=api_url, headers=headers, timeout=60)
    response.encoding = 'utf-8'
    api_data = json.loads(response.text)
except Exception as e:
    print('[INFO] something went wrong...')
    raise

In [None]:
print(json.dumps(api_data, indent=2))

## 3. pandas: dataframes

In [None]:
# > variables
record_path = 'periods'
columnas = {'name': 'YEAR', 'values': serie.upper()}

In [None]:
# > pandas from json
df = pd.json_normalize(api_data, record_path=record_path)

df.head()

In [None]:
# > pandas rename column
df = df.rename(columns=columnas)

df.head()

In [None]:
# > pandas format
df[serie] = df[serie].str[0].astype('float')

df.head()

## 4. pandas: exporting data

In [None]:
# > variables
curr_date = c_date
exec_uuid = str(uuid4())
s = serie

save_path = f'./data/current/{curr_date}/{exec_uuid}/{s}.csv'

print(save_path)

In [None]:
# > split save path
save_folder = '/'.join(save_path.split('/')[:-1])

print(save_folder)

In [None]:
# > make save directory if not exists
Path(save_folder).mkdir(parents=True, exist_ok=True)

In [None]:
# > export data from dataframe to csv
dfc = df.copy(deep=True)
dfc.to_csv(save_path, sep=';', encoding='iso-8859-1', index=False)

## 5. main scenario

### 5.1. multiple solicitudes

In [None]:
# > fixed
series = ['PM04901AA', 'PM04902AA', 'PM04903AA', 'PM04904AA', 'PM04905AA', 'PM04906AA', 'PM04907AA']
exec_uuid = str(uuid4())

for serie in series:
    # > variables
    api_url = f'https://estadisticas.bcrp.gob.pe/estadisticas/series/api/{serie}/json/2020/2022/'
    headers = {'Content-Type': 'application/json'}
    
    # > request + try-except
    try:
        response = requests.get(url=api_url, headers=headers, timeout=60)
        response.encoding = 'utf-8'
        api_data = json.loads(response.text)
    except Exception as e:
        print('[INFO] something went wrong...')
        raise
    
    # > variables
    record_path = 'periods'
    columnas = {'name': 'YEAR', 'values': serie.upper()}
    
    # > pandas from json
    df = pd.json_normalize(api_data, record_path=record_path)
    df = df.rename(columns=columnas)
    df[serie] = df[serie].str[0].astype('float')
    
    # > variables
    curr_date = c_date
    s = serie
    save_path = f'./data/current/{curr_date}/{exec_uuid}/{s}.csv'
    
    # > split save path
    save_folder = '/'.join(save_path.split('/')[:-1])
    
    # > make save directory if not exists
    Path(save_folder).mkdir(parents=True, exist_ok=True)
    
    # > export data from dataframe to csv
    dfc = df.copy(deep=True)
    dfc.to_csv(save_path, sep=';', encoding='iso-8859-1', index=False)

### 5.2. dataframe auxiliar

In [None]:
# > variables
curr_date = c_date ; year = curr_date.split('-')[0]
read_path = '/'.join(save_path.split('/')[:-1])

In [None]:
# > dataframe dummy
tdf = pd.DataFrame({'YEAR': range(1940, int(year)+1), 'LOAD_DATE': curr_date})
tdf = tdf.set_index('YEAR')

tdf.head()

In [None]:
# > elementos a juntar
files = os.listdir(read_path)
for f in files:
    print(f)

In [None]:
# > complete table
files = os.listdir(read_path)
for f in files:
    df = pd.read_csv(f'{read_path}/{f}', sep=';', encoding='iso-8859-1')
    df = df.set_index('YEAR')
    tdf = tdf.join(df)

In [None]:
# > nulos por dummy table
print(tdf.isna().sum())
tdf.head()

In [None]:
# > reset index + drop nulos
tdf = tdf.reset_index()
tdf = tdf.dropna(subset=tdf.columns[2:])

In [None]:
# > output
print(tdf.isna().sum())
tdf.head()

### 5.3. exportar datos

In [None]:
# > variables
export_path = f'./data/output/{curr_date}/output_{exec_uuid}.csv'
save_folder = '/'.join(export_path.split('/')[:-1])
sep = '|'
encoding='iso-8859-1'

In [None]:
# > export full table
Path(save_folder).mkdir(parents=True, exist_ok=True)
df.to_csv(export_path, sep=sep, encoding=encoding, index=False)

In [None]:
# > move current to historic
current_path = read_path
historic_path = read_path.replace('current', 'historic')
shutil.move(current_path, historic_path)