In [None]:
import logging
import os
import pandas as pd
import psycopg2

In [None]:
def open_connection():
    conn = \
        psycopg2.connect( \
            host = 'localhost',
            database = 'queroponto',
            user = 'postgres',
            password = 'postgres')

    cur = conn.cursor()
    
    return conn, cur

In [None]:
def close_connection(conn, cur):
    cur.close()
    conn.close()

In [None]:
def insert_data(conn, cur, table, id_sector, data):
    col_names = ', '.join([f"\"{item[0]}\"" for item in data])
    col_values = ['NULL' if item[1] != item[1] else item[1] for item in data]
    col_values = ', '.join(map(str, [item for item in col_values]))
    
    query = f"INSERT INTO public.{table} (id_setor_censitario, {col_names}) VALUES ({id_sector}, {col_values});"
    
    try:
        cur.execute(query)
    except psycopg2.IntegrityError as e:
        conn.rollback()
        logger.error(f"Failed: {e}")
    else:
        conn.commit()

---

In [None]:
logger = logging.getLogger()
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s %(name)s %(levelname)s %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)

In [None]:
# file = '../arquivos/csv/AC.csv'
# file = '../arquivos/csv/AL.csv'
# file = '../arquivos/csv/AM.csv'
# file = '../arquivos/csv/AP.csv'
# file = '../arquivos/csv/BA.csv'
# file = '../arquivos/csv/CE.csv'
# file = '../arquivos/csv/DF.csv'
# file = '../arquivos/csv/ES.csv'
# file = '../arquivos/csv/GO.csv'
# file = '../arquivos/csv/MA.csv'
# file = '../arquivos/csv/MG.csv'
# file = '../arquivos/csv/MS.csv'
# file = '../arquivos/csv/MT.csv'
# file = '../arquivos/csv/PA.csv'
# file = '../arquivos/csv/PB.csv'
# file = '../arquivos/csv/PE.csv'
# file = '../arquivos/csv/PI.csv'
# file = '../arquivos/csv/PR.csv'
# file = '../arquivos/csv/RJ.csv'
# file = '../arquivos/csv/RN.csv'
# file = '../arquivos/csv/RO.csv'
# file = '../arquivos/csv/RR.csv'
# file = '../arquivos/csv/RS.csv'
# file = '../arquivos/csv/SE.csv'
# file = '../arquivos/csv/SP.csv'
# file = '../arquivos/csv/TO.csv'
# -------------------------------
# file = '../arquivos/csv/SC.csv'

In [None]:
df = pd.read_csv(file, low_memory = False)

In [None]:
logger.info(f"{file.split('/')[-1]} with {len(df)} rows.")

In [None]:
df = df.apply(pd.to_numeric, errors = 'coerce')

In [None]:
cols = [
    {"00_04": ["P11_V024", "P11_V025", "P11_V026", "P11_V025", "P11_V026", 
               "P11_V027", "P11_V028", "P11_V029", "P11_V030", "P11_V031", 
               "P11_V032", "P11_V033", "P11_V034", "P11_V035", "P11_V036", 
               "P11_V037", "P11_V038"]},
    {"05_09": ["P11_V039", "P11_V040", "P11_V041", "P11_V042", "P11_V043"]},
    {"10_14": ["P11_V044", "P11_V045", "P11_V046", "P11_V047", "P11_V048"]},
    {"15_19": ["P11_V049", "P11_V050", "P11_V051", "P11_V052", "P11_V053"]},
    {"20_24": ["P11_V054", "P11_V055", "P11_V056", "P11_V057", "P11_V058"]},
    {"25_29": ["P11_V059", "P11_V060", "P11_V061", "P11_V062", "P11_V063"]},
    {"30_34": ["P11_V064", "P11_V065", "P11_V066", "P11_V067", "P11_V068"]},
    {"35_39": ["P11_V069", "P11_V070", "P11_V071", "P11_V072", "P11_V073"]},
    {"40_44": ["P11_V074", "P11_V075", "P11_V076", "P11_V077", "P11_V078"]},
    {"45_49": ["P11_V079", "P11_V080", "P11_V081", "P11_V082", "P11_V083"]},
    {"50_54": ["P11_V084", "P11_V085", "P11_V086", "P11_V087", "P11_V088"]},
    {"55_59": ["P11_V089", "P11_V090", "P11_V091", "P11_V092", "P11_V093"]},
    {"60_64": ["P11_V094", "P11_V095", "P11_V096", "P11_V097", "P11_V098"]},
    {"65_69": ["P11_V099", "P11_V100", "P11_V101", "P11_V102", "P11_V103"]},
    {"70_74": ["P11_V104", "P11_V105", "P11_V106", "P11_V107", "P11_V108"]},
    {"75_79": ["P11_V109", "P11_V110", "P11_V111", "P11_V112", "P11_V113"]},
    {"80_84": ["P11_V114", "P11_V115", "P11_V116", "P11_V117", "P11_V118"]},
    {"85_89": ["P11_V119", "P11_V120", "P11_V121", "P11_V122", "P11_V123"]},
    {"90_94": ["P11_V124", "P11_V125", "P11_V126", "P11_V127", "P11_V128"]},
    {"95_99": ["P11_V129", "P11_V130", "P11_V131", "P11_V132", "P11_V133"]},
    {"100": ["P11_V134"]}
]

In [None]:
conn, cur = open_connection()

In [None]:
%%time
for index, row in df.iterrows():
    data = []
    
    for col in cols:
        for key, values in col.items():
            values = [float(row[value]) for value in values]
            result = sum(values)
            data.append([key, result])
    
    # logger.info(f"Inserting row {index}...")
    insert_data(conn, cur, 'populacao_residente_mas', int(row['Cod_str']), data)

In [None]:
close_connection(conn, cur)