# Import & Load Abandonment Data

### Configuration

In [1]:
import sys
import configparser
config = configparser.ConfigParser()
config.read_file(open('../../../settings.ini'))

sys.path.insert(0, config.get('PATHS','libs_path'))
engine_path = config.get('DATABASE','engine_path')

### Main Parameters

In [2]:
remote_path = ''
local_path = '../data_final/'

In [3]:
import postgres #from local file postgres.py
from commons import inline_table_xml, download_file, download_zip_file, extract_zip_file #from local file commons.py

import json
import pandas as pd
from sqlalchemy import create_engine

### Load File

In [4]:
#Open unzipped file
df = pd.read_csv(local_path + 'mds_abandonment_rate.csv', delimiter = ",")
df = df[['comuna_id', 'year', 'education_id', 'promotion_id', 'total']]
df = df.astype({'comuna_id':'int', 'year':'int', 'education_id':'int', 'promotion_id':'int', 'total':'int'})
list(df)

['comuna_id', 'year', 'education_id', 'promotion_id', 'total']

### Ingest

In [5]:
engine = create_engine(engine_path)
db = postgres.PostgresDriver(engine)
db.to_sql(df, 'childhood', 'fact_abandonment')

DROP TABLE IF EXISTS childhood.fact_abandonment;
CREATE TABLE "childhood"."fact_abandonment" (
"comuna_id" INTEGER,
  "year" INTEGER,
  "education_id" INTEGER,
  "promotion_id" INTEGER,
  "total" INTEGER
)
COPY "childhood"."fact_abandonment" ("comuna_id","year","education_id","promotion_id","total") FROM STDIN WITH CSV HEADER DELIMITER ',';


### Add Indices to Foreign Keys

In [6]:
engine.execute("""
CREATE INDEX fact_abandonment_comuna_id_index 
ON childhood.fact_abandonment (comuna_id)
""")

engine.execute("""
CREATE INDEX fact_abandonment_year_index 
ON childhood.fact_abandonment ("year");
""")

<sqlalchemy.engine.result.ResultProxy at 0x10f16dc88>

### Relation tables

In [11]:
d2 = download_file(remote_path, local_path, 'promotion.csv')
print(inline_table_xml(d2, 'prom_id', 'name_en', 'name_es'))

Already downloaded. Using: ../data_final/promotion.csv

<InlineTable alias="prom_id">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">Promoted</Value>
      <Value column="description">Aprobados</Value>
      <Value column="es_description">Aprobados</Value>
    </Row>
  <Row>
      <Value column="id">Repeated</Value>
      <Value column="description">Reprobados</Value>
      <Value column="es_description">Reprobados</Value>
    </Row>
  <Row>
      <Value column="id">Abandoned</Value>
      <Value column="description">Abandonos</Value>
      <Value column="es_description">Abandonos</Value>
    </Row>
  </Rows>
</InlineTable>
    


In [12]:
d3 = download_file(remote_path, local_path, 'education.csv')
print(inline_table_xml(d3, 'edu_id', 'name_en', 'name_es'))

Already downloaded. Using: ../data_final/education.csv

<InlineTable alias="edu_id">
  <ColumnDefs>
    <ColumnDef name="id" type="Numeric" />
    <ColumnDef name="description" type="String" />
    <ColumnDef name="es_description" type="String" />
  </ColumnDefs>
  <Rows>
    <Row>
      <Value column="id">Elementary School</Value>
      <Value column="description">Enseñanza Básica</Value>
      <Value column="es_description">Enseñanza Básica</Value>
    </Row>
  <Row>
      <Value column="id">High School</Value>
      <Value column="description">Enseñanza Media</Value>
      <Value column="es_description">Enseñanza Media</Value>
    </Row>
  </Rows>
</InlineTable>
    
