In [None]:
import logging
import os
import pandas as pd
import psycopg2

In [None]:
def open_connection():
    conn = \
        psycopg2.connect( \
            host = 'localhost',
            database = 'queroponto',
            user = 'postgres',
            password = 'postgres')

    cur = conn.cursor()
    
    return conn, cur

In [None]:
def close_connection(conn, cur):
    cur.close()
    conn.close()

In [None]:
def insert_data(conn, cur, id_sector, data):
    col_names = ', '.join([item[0] for item in data])
    col_values = ['NULL' if item[1] != item[1] else item[1] for item in data]
    col_values = ', '.join(map(str, [item for item in col_values]))
    
    query = f"INSERT INTO public.responsavel_renda (id_setor_censitario, {col_names}) VALUES ({id_sector}, {col_values});"
    
    try:
        cur.execute(query)
    except psycopg2.IntegrityError as e:
        conn.rollback()
        # logger.error(f"Failed: {e}")
    else:
        conn.commit()

---

In [None]:
logger = logging.getLogger()
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s %(name)s %(levelname)s %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)

In [None]:
file = '../../../files/csv/AC.csv' # 12

In [None]:
cols = [
    {"classe_a": ["RR_V075"]},
    {"classe_b": ["RR_V073", "RR_V074"]},
    {"classe_c": ["RR_V072"]},
    {"classe_d": ["RR_V070", "RR_V071"]},
    {"classe_e": ["RR_V067", "RR_V068", "RR_V069"]}
]

In [None]:
base_cols = []

for col in cols:
    for base in col.values():
        base_cols.append(base)
        
base_cols = [col for cols in base_cols for col in cols]
base_cols.insert(0, 'Cod_str')

In [None]:
%%time
df = pd.read_csv(file, usecols = base_cols, sep = ',', low_memory = False)

In [None]:
logger.info(f"{file.split('/')[-1]} with {len(df)} rows.")

In [None]:
%%time
df = df.apply(pd.to_numeric, errors = 'coerce')

In [None]:
conn, cur = open_connection()

In [None]:
%%time
for index, row in df.iterrows():
    data = []
    
    for col in cols:
        for key, values in col.items():
            values = [float(row[value]) for value in values]
            result = sum(values)
            data.append([key, result])
    
    # insert_data(conn, cur, int(row['Cod_str']), data)

In [None]:
close_connection(conn, cur)