# Notebook Magic

In [1]:
%matplotlib inline
%load_ext autoreload

## Imports

In [2]:
import os
import yaml
import pandas as pd
import s3fs
import matplotlib.pyplot as plt

from iefp import data
from iefp import utils

## Load CSVs from S3

In [3]:
aws_cred = yaml.load(open("../conf/local/credentials.yml"), Loader=yaml.FullLoader)

In [4]:
s3 = s3fs.S3FileSystem(key=aws_cred['dssg']['aws_access_key_id'], secret=aws_cred['dssg']['aws_secret_access_key'])

In [5]:
table_names = ["ESTGESTOR_EMP_OFERTAS.csv",
            "ESTGESTOR_EMP_PEDIDOS.csv",
            "ESTGESTOR_MOV_APRESENTADOS.csv",
            "ESTGESTOR_MOV_CONTROLADOS.csv",
            "ESTGESTOR_MOV_CONVOCADOS.csv",
            "ESTGESTOR_MOV_INTERVENCOES.csv",
            "ESTGESTOR_MOV_OCUPACOES.csv",
            "ESTGESTOR_MOV_REC_INTERNACIONAL.csv",
            "ESTGESTOR_MOV_RMG.csv",
            "ESTGESTOR_MOV_UTENTES.csv",
            "ESTGESTOR_RESP_OFERTAS.csv",
            "ESTUSER_GDE_OFERTAS.csv",
            "ESTUSER_GDE_UTENTES.csv",]

s3_path_template = "s3://iefp-unemployment/SIGAE/{}"

In [6]:
tables = dict()

for name in table_names:
    tables.update({name.strip(".csv"): pd.read_csv(s3_path_template.format(name))})

## Translate Column Names

In [7]:
# Load data dictionary
column_dict = yaml.load(open('../references/SIGAE_dict.yaml'),Loader=yaml.FullLoader)

In [8]:
dfs_translated = data.translate_columns(tables.values(), column_dict["SIGAE_DICT"])
tables = dict(zip(table_names, dfs_translated))

## Output Sample Data

In [16]:
for name, df in tables.items():
    df = df.dropna(axis='columns')
    print("\nTable name: {}".format(name.strip(".csv")))
    print(df.head(3).T)


Table name: ESTGESTOR_EMP_OFERTAS
                                      0                1                2
YEAR_MONTH_DATE_STAMP            200904           200904           200904
TYPE_ENTRY                           71               81               61
CENTER                              334              334              334
CNP_JOB_CURRENT                  828205           522005           713605
IND_CNP                              23               15               18
TIME                                  C                C                C
NATURE_EMPLOYMENT                     T                P                T
SALARY                              700              450              450
COMMUNICATION_DATE       3/20/2009 0:00    4/1/2009 0:00    3/5/2009 0:00
COMMUNICATION_MODE                    E                E                F
NUM_JOBS                              1                1                2
DATE_VALIDITY           12/31/2009 0:00  12/31/2009 0:00  12/31/2009 0:00
CAN