# Notebook Magic

In [1]:
%matplotlib inline
%load_ext autoreload

## Imports

In [2]:
import os
import yaml
import pandas as pd
import s3fs
import matplotlib.pyplot as plt

from iefp import data
from iefp import utils

## Load CSVs from S3

In [3]:
aws_cred = yaml.load(open("../conf/local/credentials.yml"), Loader=yaml.FullLoader)

In [4]:
s3 = s3fs.S3FileSystem(key=aws_cred['dssg']['aws_access_key_id'], secret=aws_cred['dssg']['aws_secret_access_key'])

In [5]:
table_names = ["ESTGESTOR_EMP_OFERTAS.csv",
            "ESTGESTOR_EMP_PEDIDOS.csv",
            "ESTGESTOR_MOV_APRESENTADOS.csv",
            "ESTGESTOR_MOV_CONTROLADOS.csv",
            "ESTGESTOR_MOV_CONVOCADOS.csv",
            "ESTGESTOR_MOV_INTERVENCOES.csv",
            "ESTGESTOR_MOV_OCUPACOES.csv",
            "ESTGESTOR_MOV_REC_INTERNACIONAL.csv",
            "ESTGESTOR_MOV_RMG.csv",
            "ESTGESTOR_MOV_UTENTES.csv",
            "ESTGESTOR_RESP_OFERTAS.csv",
            "ESTUSER_GDE_OFERTAS.csv",
            "ESTUSER_GDE_UTENTES.csv",]

s3_path_template = "s3://iefp-unemployment/SIGAE/{}"

In [6]:
tables = dict()

for name in table_names:
    tables.update({name.strip(".csv"): pd.read_csv(s3_path_template.format(name))})

## Translate Column Names

In [7]:
# Load data dictionary
column_dict = yaml.load(open('../references/SIGAE_dict.yaml'),Loader=yaml.FullLoader)

In [8]:
dfs_translated = data.translate_columns(tables.values(), column_dict["SIGAE_DICT"])
tables = dict(zip(table_names, dfs_translated))

## Output Sample Data

In [10]:
utils.preview_all_data(tables, "hello")

YEAR_MONTH_DATE_STAMP: 200904, 200904, 200904

TYPE_ENTRY: 71, 81, 61

CENTER: 334, 334, 334

CNP_JOB_CURRENT: 828205, 522005, 713605

IND_CNP: 23, 15, 18

TIME: C, C, C

NATURE_EMPLOYMENT: T, P, T

NUM_MONTHS: 12.0, 3.0, 3.0

ID_MINIMUM_AGE: 25.0, 25.0, 18.0

ID_MAXIMUM_AGE: 55.0, 55.0, 45.0

MINIMUM_QUALIFICATIONS: 9, 9, 9

MAXIMUM_QUALIFICATIONS: 12, 12, LC

SALARY: 700.0, 450.0, 450.0

TYPE_SALARY: M, M, M

OWN_TRANSPORT: S, N, N

CONDUCT_LETTER: 

PROFESSIONAL_TRAINING: N, N, N

FRENCH: 

ENGLISH: S, S, S

GERMAN: 

SPAIN: 

OTHER_LANGUAGE: O, O

RECRIUTMENT_AREA: 

PARISH_WORK_PLACE: 141001.0, 141912.0, 141001.0

COMMUNICATION_DATE: 3/20/2009 0:00, 4/1/2009 0:00, 3/5/2009 0:00

COMMUNICATION_MODE: E, E, F

NUM_JOBS: 1, 1, 2

DATE_VALIDITY: 12/31/2009 0:00, 12/31/2009 0:00, 12/31/2009 0:00

PARISH_RESIDENCE_1: 141001.0, 141912.0, 141001.0

PARISH_RESIDENCE_2: 50505.0, 81602.0

PARISH_RESIDENCE_3: 

CANCELLATION_DATE: 4/21/2009 15:55, 4/22/2009 15:55, 4/23/2009 15:55

CANCELLATION_

CAE_ENTITY_V2: 

OFFER_ORIGIN: N, N, N

COUNTRY_EURES: 

PROFESSIONAL_DOCUMENTATION: N, N, N

QUALIFICATION: 3.0, 3.0, 3.0

PREVIOUS_PPE: N, N, N

CNP_PREVIOUS: 412200.0, 412200.0, 712205.0

SUBSIDY: 33.0, 23.0, 7.0

PLACE_OF_WORK: C, C, C

TYPE_EXTERNAL_APPLICATION: EU, EUEX

TYPE_DESIRED_CONTRACT: P, P, P

DESIRED_EMPLOYMENT_TYPE: C, C, C

PROFESSIONAL_TRAINING: S, S, N

QUALIFICATION_AREA: 6950.0, 10271.0, 620.0

TYPE_DOC_ID: BI, BI, BI

TIME_EXPERIENCE_LAST_CNP: 3.0, 324.0, 300.0

TIME_EXPERIENCE: 3.0, 324.0, 300.0

CENTER_NP: 3.0, 1.0, 1.0

STATE: ACT, ACT, ACT

DIRECT_PRESENTATION: S, S, S

CENTER_OFA: 448, 448, 448

RSI: N, N, N

UNEMPLOYMENT_REASON: CC, AE, CC

BIWEEKLY_PRESENTATION: N, S, S

PAE: N, S, S

EST_TOP: 4306.0, 3122.0, 2800.0

UNI_GRAD_YEAR: 2003.0, 2005.0, 2008.0

OPERATOR: 7145, 7145, 7145

PREVIOUS_EMPLOYMENT: 432.0, 46381.0, 41200.0

CAE_ENTITY: 

FOLLOW_UP: 

CAE_PRIORITY: 

TYPE_OCCUPATION: 

UNEMPLOYMENT_DATE: 

EMPLOYMENT_RECRUITMENT_SUPPORT_2012: 

ID_PRESE

F_C_MOD_FORM: 700.0, 700.0, 700.0

F_START_DATE: 11/2/2006 0:00, 11/2/2006 0:00, 11/2/2006 0:00

F_DATE_END: 9/12/2007 0:00, 9/12/2007 0:00, 9/12/2007 0:00

F_C_COURSE: 521481A, 521481A, 521481A

F_D_MOD_FORM: APRENDIZAGEM, APRENDIZAGEM, APRENDIZAGEM

REASON_ENC: 

F_SITUATION: PR, PR, PR

AREA_OF_TRAINING: 521.0, 521.0, 521.0

F_GIVE_FORM: METALURGIA E METALOMECÂNICA, METALURGIA E METALOMECÂNICA, METALURGIA E METALOMECÂNICA

REVALIDATION: N, N, N

F_VACANCIES: 20.0, 20.0, 20.0

F_OCCUPIED_VACANCIES: 0.0, 0.0, 0.0

REASON_REFUSED: 

UNEMPLOYMENT_REASON: 

BIWEEKLY_PRESENTATION: 

PAE: 

EST_TOP: 

UNI_GRAD_YEAR: 

OPERATOR: 

PREVIOUS_EMPLOYMENT: 43992.0, 43992.0, 43992.0

REQ_RSI_CODE: 

F_CNO_NPC: 

F_CNO_SUB_ENT: 

TYPE_OCCUPATION: 

UNEMPLOYMENT_DATE: 

ID_INTERVENTION_FORWARD: 

SUBSIDY_DATE: 

ITO_ID: 

ID_INTERVENTION_FORWARD_SOURCE: 

INTERVENTION_ROUTE_CODE: 

DEPENDENT_DESCENDANTS: 

SUBSIDY_SPOUSE: 

JOB_POSITION: 

TIME_PARTIAL: 

ID_SPOUSE: 

SPOUSE_CATEGORY: 

STATUS_SPOU

MAXIMUM_QUALIFICATION: 12, 12, 12

LEGAL_NATIONALITY: PT, PT, PT

PUBLISHING_LEVEL: 0, 0, 1

LAST_RELEASE_DATE: 12/6/2012 7:18, 12/11/2012 13:48, 8/7/2013 16:41

BDE_PARISH: 50201, 50201, 131710

BDE_CAE: 96030, 14131, 78200

BDE_NR_PERSONAL_SERVICE: 2.0, 371.0, 160.0

RECRIUTMENT_AREA: 

CENTER_CTN_NP: 

CENTER_CTN: 

PROFESSIONAL_DOCUMENTATION: NR, NR, N

SUBSIDY_MEAL: SA, SA, SA

SUBSIDY_MEAL_VALUE: 7.4, 3.0, 3.0

SUBSIDY_SHIFT: 

SUBSIDY_SHIFT_VALUE: 0.0, 0.0, 0.0

SUBSIDY_TRANSPORT: 

SUBSIDY_TRANSPORT_VALUE: 0.0, 0.0, 0.0

ORIGIN: N, N, N

TIME_EXPERIENCE_MIN: 3.0, 6.0, 0.0

TIME_EXPERIENCE_MAX: 36.0

TYPE_OFFER_LSE: FEC, FEC, FEC

ACCEPT_BEC: 

REASON_FOR_SUSPENSION: SA, SA, SA

SUSPENSION_DATE: 12/7/2012 7:24, 12/11/2012 16:45, 4/7/2014 18:36

VALID_INTERNET_PRESENCE: N, N, S

PE_2012: S, S

STRATEGIC_INTEREST: 

CAE_ETT: 96030.0, 78200.0, 33200.0

PARISH_: 50201, 50201, 131710

PUBLIC_COMPETITION: N

EMPLOYMENT_HOURS: D, D, Q

EDENCIA_EET_CODE: S, N, S

TSU_REDUCTION: N, N

IN