# Ingest Higher Educational Institutions for fact_employability

### Config

In [1]:
import sys
import configparser
config = configparser.ConfigParser()
config.read_file(open('../../../settings.ini'))

sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')

### Main params

In [2]:
remote_path = 'http://pacha.datawheel.us/educacion/empleabilidad/'
local_path = '../data/'

### Imports

In [3]:
import postgres #from local file postgres.py
from commons import inline_table_xml, download_file, download_zip_file, extract_zip_file #from local file commons.py

import json
import pandas as pd
from sqlalchemy import create_engine

In [4]:
df = download_file(remote_path+'ids/',local_path,'listado_instituciones.csv')
df = df.rename(columns={'institucion':'higher_educational_institution','institucion_id':'higher_educational_institution_id'});
df = df.astype({'higher_educational_institution_id':'int'})
list(df)

Already downloaded. Using: ../data/listado_instituciones.csv
Encoding: utf-8


['higher_educational_institution', 'higher_educational_institution_id']

### Gruoping by institutions type

In [5]:
df['higher_educational_institution_group_id'] = 1
df['higher_educational_institution_group_name'] = 'Universities'
df['higher_educational_institution_group_name_es'] = 'Universidades'

df.loc[df['higher_educational_institution'].str.startswith('IP'), 'higher_educational_institution_group_id'] = 2
df.loc[df['higher_educational_institution'].str.startswith('IP'), 'higher_educational_institution_group_name'] = 'Professional Institute'
df.loc[df['higher_educational_institution'].str.startswith('IP'), 'educational_institution_group_name_es'] = 'Instituto Profesional'


df.loc[df['higher_educational_institution'].str.startswith('CFT'), 'higher_educational_institution_group_id'] = 3
df.loc[df['higher_educational_institution'].str.startswith('CFT'), 'higher_educational_institution_group_name'] = 'Technical Training Center'
df.loc[df['higher_educational_institution'].str.startswith('CFT'), 'higher_educational_institution_group_name_es'] = 'Centros de Formación Técnica'
    
list(df)

['higher_educational_institution',
 'higher_educational_institution_id',
 'higher_educational_institution_group_id',
 'higher_educational_institution_group_name',
 'higher_educational_institution_group_name_es',
 'educational_institution_group_name_es']

### Ingest

In [6]:
engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)
db.to_sql(df, 'education', 'dim_higher_educational_institutions')

DROP TABLE IF EXISTS education.dim_higher_educational_institutions;
CREATE TABLE "education"."dim_higher_educational_institutions" (
"higher_educational_institution" TEXT,
  "higher_educational_institution_id" INTEGER,
  "higher_educational_institution_group_id" INTEGER,
  "higher_educational_institution_group_name" TEXT,
  "higher_educational_institution_group_name_es" TEXT,
  "educational_institution_group_name_es" TEXT
)
COPY "education"."dim_higher_educational_institutions" ("higher_educational_institution","higher_educational_institution_id","higher_educational_institution_group_id","higher_educational_institution_group_name","higher_educational_institution_group_name_es","educational_institution_group_name_es") FROM STDIN WITH CSV HEADER DELIMITER ',';
