In [75]:
!pip install psycopg2

Defaulting to user installation because normal site-packages is not writeable


In [76]:
import psycopg2
import io

In [77]:
import os
os.environ['PYTHONIOENCODING'] = 'utf-8'

In [78]:
import locale
locale.setlocale(locale.LC_ALL, 'en_US.UTF-8')

'en_US.UTF-8'

In [79]:
conn = psycopg2.connect(
    dbname="postgres",
    user="postgres",
    password="Lucas123",
    host="localhost",
    port="5432"
)

In [80]:
cur = conn.cursor()

In [81]:
with open('utils/build_script.sql', 'r') as file:
    sql_script = file.read()

cur.execute(sql_script)

Creating auxiliar tables to populate the database

In [82]:
try:
    cur.execute("DROP TABLE IF EXISTS MATRICULAS")
    cur.execute("""
        CREATE TABLE MATRICULAS (
            AN_LETIVO                     numeric,
            CD_UNIDADE_EDUCACAO           numeric,
            NOME_DISTRITO                 text,
            CD_SETOR                      numeric,
            TIPO_ESCOLA                   text,
            NOME_ESCOLA                   text,
            DRE                           text,
            CD_INEP_ESCOLA                numeric,
            CD_TURNO                      numeric,
            DESC_TURNO                    text,
            CD_SERIE                      numeric,
            DESC_SERIE                    text,
            MODALIDADE                    text,
            NOME_TURMA                    text,
            DESC_ETAPA_ENSINO             text,
            DESC_CICLO_ENSINO             text,
            DESC_TIPO_TURMA               text,
            CD_ALUNO_SME                  numeric,
            NASC_ALUNO                    timestamp,
            CD_SEXO                       text,
            DESC_RACA_COR                 text,
            DESC_PAIS_NASC                text,
            NEE                           text,
            CD_MAT                        numeric,
            DT_IN_MAT                     timestamp,
            DT_FIM_MAT                    timestamp,
            SITUACAO_MAT                  text,
            DATA_SIT                      timestamp
        )
    """)
    conn.commit()
except Exception as e:
    print("failed to create table:", e)
    conn.rollback()


In [83]:
copy_command = """
COPY MATRICULAS (
    AN_LETIVO,
    CD_UNIDADE_EDUCACAO,
    NOME_DISTRITO,
    CD_SETOR,
    TIPO_ESCOLA,
    NOME_ESCOLA,
    DRE,
    CD_INEP_ESCOLA,
    CD_TURNO,
    DESC_TURNO,
    CD_SERIE,
    DESC_SERIE,
    MODALIDADE,
    NOME_TURMA,
    DESC_ETAPA_ENSINO,
    DESC_CICLO_ENSINO,
    DESC_TIPO_TURMA,
    CD_ALUNO_SME,
    NASC_ALUNO,
    CD_SEXO,
    DESC_RACA_COR,
    DESC_PAIS_NASC,
    NEE,
    CD_MAT,
    DT_IN_MAT,
    DT_FIM_MAT,
    SITUACAO_MAT,
    DATA_SIT
)
FROM STDIN WITH (
    FORMAT CSV,
    HEADER,
    DELIMITER ';',
    NULL ''
);
"""

try:

    for file in os.listdir('datasets'):
        if file.endswith('.csv') and 'Microdados' in file:
            with open(os.path.join('datasets', file), 'r', encoding='utf-8-sig') as f:
                class CommaToDot(io.TextIOBase):
                    def __init__(self, f):  self.f = f
                    def read(self, size=-1):
                        chunk = self.f.read(size)
                        return chunk.replace(',', '.') if chunk else ''

                fixer = CommaToDot(f)
                cur.copy_expert(copy_command, fixer)
                conn.commit()

except Exception as e:
    print("failed to populate table:", e)
    conn.rollback()

In [84]:
series = cur.execute("SELECT desc_serie FROM MATRICULAS")
print(series)

None


In [85]:
try:
    cur.execute("""
    DROP TABLE IF EXISTS TURMAS;

    CREATE TABLE TURMAS (
        DRE               TEXT,
        CODINEP           NUMERIC,
        TIPOESC           TEXT,
        NOMESC            TEXT,
        SUBPREF           TEXT,
        DISTRITO          TEXT,
        CODAMB            NUMERIC,
        DESCAMB           TEXT,
        CAPREAL           NUMERIC,
        METRAGEM          NUMERIC,
        MODAL             TEXT,
        REDE              TEXT,
        CODSERIE          NUMERIC,
        DESCSERIE         TEXT,
        TURNO             TEXT,
        TURMA             TEXT,
        VAGOFER           NUMERIC,
        MATRIC            NUMERIC
    )
    """)
    conn.commit()

except Exception as e:
    print("failed to create table:", e)
    conn.rollback()

In [86]:
copy_command = """
COPY TURMAS (
    DRE,
    CODINEP,
    TIPOESC,
    NOMESC,
    SUBPREF,
    DISTRITO,
    CODAMB,
    DESCAMB,
    CAPREAL,
    METRAGEM,
    MODAL,
    REDE,
    CODSERIE,
    DESCSERIE,
    TURNO,
    TURMA,
    VAGOFER,
    MATRIC
)
FROM STDIN WITH (
    FORMAT CSV,
    HEADER,
    DELIMITER ';',
    NULL ''
);
"""
try:
    with open('datasets/turmas_processed.csv', 'r', encoding='utf-8-sig') as f:
        cur.copy_expert(copy_command, f)
        conn.commit()

except Exception as e:
    print("failed to populate table:", e)
    conn.rollback()


In [87]:
try:
    cur.execute("""
    DROP TABLE IF EXISTS PARCERIAS;

    CREATE TABLE PARCERIAS (
        NUM_PROTOCOLO       NUMERIC,
        DRE                 TEXT,
        OSC_PARCEIRA        TEXT,
        CNPJ                TEXT,
        CODIGO_ESCOLA       NUMERIC,
        UNIDADE_EDUCACIONAL TEXT,
        VALOR_MENSAL        NUMERIC,
        VERBA_LOCACAO       NUMERIC,
        VALOR_MENSAL_IPTU   NUMERIC,
        DATA_INICIO         TIMESTAMP,
        DATA_TERMINO        TIMESTAMP
    )
    """)
    conn.commit()

except Exception as e:
    print("failed to create table:", e)
    conn.rollback()

In [88]:
copy_command = """
COPY PARCERIAS (
    NUM_PROTOCOLO,
    DRE,
    OSC_PARCEIRA,
    CNPJ,
    CODIGO_ESCOLA,
    UNIDADE_EDUCACIONAL,
    VALOR_MENSAL,
    VERBA_LOCACAO,
    VALOR_MENSAL_IPTU,
    DATA_INICIO,
    DATA_TERMINO
)
FROM STDIN WITH (
    FORMAT CSV,
    HEADER,
    DELIMITER ';',
    NULL ''
);
"""
try:
    with open('datasets/parcerias_processed.csv', 'r', encoding='utf-8-sig') as f:
        class CommaToDot(io.TextIOBase):
                    def __init__(self, f):  self.f = f
                    def read(self, size=-1):
                        chunk = self.f.read(size)
                        return chunk.replace(',', '.') if chunk else ''

        fixer = CommaToDot(f)
        cur.copy_expert(copy_command, fixer)
        conn.commit()

except Exception as e:
    print("failed to populate table:", e)
    conn.rollback()

Populating actual database with the data from the auxiliar tables

In [89]:
# DISTRITO
try:
    cur.execute("""
        INSERT INTO Distrito (nome_distrito, dre, subpref)
        SELECT DISTINCT distrito, dre, subpref
        FROM Turmas
        ON CONFLICT (nome_distrito) DO NOTHING
                """)
    conn.commit()

except Exception as e:
    print("failed to populate table DISTRITO:", e)
    conn.rollback()

# TURNO
try:
    cur.execute("""
        INSERT INTO Turno (cod_turno, descricao_turno)
        SELECT DISTINCT cd_turno, desc_turno
        FROM Matriculas
        ON CONFLICT (cod_turno) DO NOTHING
    """)
    conn.commit()

except Exception as e:
    print("failed to populate table TURNO:", e)
    conn.rollback()

# SERIE
try:
    cur.execute("""
        INSERT INTO Serie (cod_serie, descricao_serie, modalidade)
        SELECT DISTINCT cd_serie, desc_serie, modalidade
        FROM Matriculas
        ON CONFLICT (cod_serie) DO NOTHING
    """)
    conn.commit()

except Exception as e:
    print("failed to populate table SERIE:", e)
    conn.rollback()

# MATERIA
try:
    cur.execute("""
        INSERT INTO Materia (cd_materia, data_inicio, data_final)
        SELECT DISTINCT cd_mat, dt_in_mat, dt_fim_mat
        FROM Matriculas
                WHERE cd_mat IS NOT NULL
        ON CONFLICT (cd_materia) DO NOTHING
    """)
    conn.commit()

except Exception as e:
    print("failed to populate table MATERIA:", e)
    conn.rollback()


In [90]:

# OSC
try:
    cur.execute("""
        INSERT INTO OSC (cnpj, nome)
        SELECT DISTINCT cnpj, osc_parceira
        FROM Parcerias
        ON CONFLICT (cnpj) DO NOTHING
    """)
    conn.commit()

except Exception as e:
    print("failed to populate table OSC:", e)
    conn.rollback()

# PARCERIA
try:
    cur.execute("""
        INSERT INTO Parceria (protocolo, osc_cnpj, valor_mensal, verba_locacao, valor_mensal_iptu, data_inicio, data_termino)
        SELECT DISTINCT num_protocolo, cnpj, valor_mensal, verba_locacao, valor_mensal_iptu, data_inicio, data_termino
        FROM Parcerias
        ON CONFLICT (protocolo) DO NOTHING
    """)
    conn.commit()

except Exception as e:
    print("failed to populate table PARCERIA:", e)
    conn.rollback()


In [91]:
# ESCOLA
try:
    cur.execute("""
        INSERT INTO Escola (cod_inep, nome_distrito, protocolo, tipo_esc, nome_esc, rede)
        SELECT DISTINCT
        t.codinep,
        t.distrito,
        p.num_protocolo,
        t.tipoesc,
        t.nomesc,
        t.rede
        FROM Turmas t
        LEFT JOIN Parcerias p
        ON p.unidade_educacional ILIKE '%' || t.nomesc || '%'
            WHERE t.nomesc IS NOT NULL
        ON CONFLICT (nome_esc) DO NOTHING;
    """)
    conn.commit()

except Exception as e:
    print("failed to populate table ESCOLA:", e)
    conn.rollback()



In [92]:

# AMBIENTE
try:
    cur.execute("""
        INSERT INTO Ambiente (cod_amb, nome_esc, desc_amb, capacidade, metragem)
        SELECT DISTINCT codamb, nomesc, descamb, capreal, metragem
        FROM Turmas
        INNER JOIN Matriculas ON Turmas.nomesc = Matriculas.nome_escola
                WHERE metragem IS NOT NULL AND nomesc IS NOT NULL AND capreal IS NOT NULL
        ON CONFLICT (cod_amb) DO NOTHING
    """)
    conn.commit()

except Exception as e:
    print("failed to populate table AMBIENTE:", e)
    conn.rollback()


In [93]:

# TURMA
try:
    cur.execute("""
        INSERT INTO Turma (nome_turma, nome_esc, desc_turno, desc_serie, cod_turno, cod_serie, matriculados, vagas, tipo_turma, etapa_ensino, ciclo_ensino)
        SELECT DISTINCT 
                t.turma, t.nomesc, m.desc_turno, m.desc_serie, m.cd_turno, m.cd_serie,t.matric, t.vagofer, m.desc_tipo_turma, m.desc_etapa_ensino, m.desc_ciclo_ensino
        FROM Turmas t
        INNER JOIN Matriculas m
            ON m.nome_turma = t.turma AND t.nomesc = m.nome_escola AND t.turno = m.desc_turno AND t.descserie = m.desc_serie
        WHERE t.turma IS NOT NULL AND m.desc_turno IS NOT NULL AND m.desc_serie IS NOT NULL AND m.cd_turno IS NOT NULL AND m.cd_serie IS NOT NULL
        ON CONFLICT (nome_turma, nome_esc, desc_turno, desc_serie) DO NOTHING
    """)
    conn.commit()

except Exception as e:
    print("failed to populate table TURMA:", e)
    conn.rollback()



In [94]:

# ALUNO
try:
    cur.execute("""
        INSERT INTO Aluno (id_aluno, nome_turma, nome_esc, desc_turno, desc_serie, nee, raca_cor, sexo, pais_nasc, nascimento)
        SELECT DISTINCT cd_aluno_sme, nome_turma, nomesc, turno, descserie, nee, desc_raca_cor, cd_sexo, desc_pais_nasc, nasc_aluno
        FROM Matriculas m
        INNER JOIN Turmas t ON m.nome_turma = t.turma AND m.nome_escola = t.nomesc AND m.desc_turno = t.turno AND m.desc_serie = t.descserie
        WHERE m.cd_aluno_sme IS NOT NULL AND t.turma IS NOT NULL AND m.nome_escola IS NOT NULL AND t.turno IS NOT NULL AND t.descserie IS NOT NULL
        ON CONFLICT (id_aluno, nome_turma, nome_esc, desc_turno, desc_serie) DO NOTHING
    """)
    conn.commit()
except Exception as e:
    print("failed to populate table ALUNO:", e)
    conn.rollback()


In [None]:

# SITUACAO
try:
    cur.execute("""
        INSERT INTO Situacao (id_situacao, id_aluno, nome_turma, nome_esc, desc_turno, desc_serie, cd_materia, desc_situacao, data_coleta)
        SELECT
        ROW_NUMBER() OVER (ORDER BY cd_aluno_sme, cd_mat) - 1 AS id_situacao,
        sub.cd_aluno_sme,
        sub.nome_turma,
        sub.nomesc,
        sub.turno,
        sub.descserie,
        sub.cd_mat,
        sub.situacao_mat,
        sub.data_sit
        FROM (
        SELECT DISTINCT
            cd_aluno_sme,
            nome_turma,
            nomesc,
            turno,
            descserie,
            cd_mat,
            situacao_mat,
            data_sit
        FROM Matriculas m
        INNER JOIN Turmas t ON m.nome_turma = t.turma AND m.nome_escola = t.nomesc AND m.desc_turno = t.turno AND m.desc_serie = t.descserie
		WHERE m.cd_aluno_sme IS NOT NULL AND t.turma IS NOT NULL AND m.nome_escola IS NOT NULL AND t.turno IS NOT NULL AND t.descserie IS NOT NULL AND m.cd_mat IS NOT NULL
        ) AS sub
        ON CONFLICT (id_situacao) DO NOTHING;
    """)
    conn.commit()
except Exception as e:
    print("failed to populate table SITUACAO:", e)
    conn.rollback()


failed to populate table SITUACAO: insert or update on table "situacao" violates foreign key constraint "fk_situacao_aluno"
DETAIL:  Key (id_aluno, nome_turma, nome_esc, desc_turno, desc_serie)=(3555220000000, 3A, HERBERT DE SOUZA - BETINHO, Noite, EJA COMPL I) is not present in table "aluno".



Deleting auxiliar tables

In [96]:
try:
    cur.execute("""
        DROP TABLE IF EXISTS matriculas;
        DROP TABLE IF EXISTS turmas;
        DROP TABLE IF EXISTS parcerias;
    """)
    conn.commit()
except Exception as e:
    print("failed to delete auxiliar tables", e)
    conn.rollback()

In [97]:
conn.commit()
cur.close()
conn.close()