In [16]:
# Tratamento do arquivo inf_diario e gravando dados no banco.

In [97]:
from CkanDataset import CKANDataSet


In [1]:
import os
import re
import requests

In [2]:
param_dic = {
    "host"      : "localhost",
    "database"  : "postgres",
    "user"      : "postgres",
    "password"  : "postgres"
}

In [56]:
def delete_local_file(file_path: str) -> bool:

    if os.path.exists(file_path):
        try:
            os.remove(file_path)
            print("File deleted with sucess")
            return True
        except OSError as err:
            raise OSError(err) from err

In [33]:
def format_doc(doc):
    '''
        Funcão que formata CPF e CNPJ
    '''
    if pd.isna(doc):
        return None
    doc = re.sub(r"[^0-9]", "", doc)
    return doc.zfill(14)

In [89]:
url = "http://dados.cvm.gov.br/dados/FI/DOC/INF_DIARIO/DADOS/inf_diario_fi_201803.csv"
file_name = "inf_diario_fi_201803.csv"
temp_dir = "tmp"

In [90]:
def download_file(url: str, filename: str, temp_dir: str) -> dict:
    """
    Download file from url and save on temp_dir
    """
    temp_file = f"{temp_dir}/{filename}"
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        with open(temp_file, "wb") as new_file:
            for partial in response.iter_content(chunk_size=256):
                new_file.write(partial)
        return {
            "file_path": temp_file,
            "file_name": filename,
            "date_ref": re.sub(r"[^0-9]", "", url),
        }
    else:
        response.raise_for_status()
        
response = download_file(url, file_name, temp_dir)

print(response)

{'file_path': 'tmp/inf_diario_fi_201803.csv', 'file_name': 'inf_diario_fi_201803.csv', 'date_ref': '201803'}


In [91]:
import pandas as pd

def open_file(file: str):
    return pd.read_csv(
        file,
        sep=";",
        encoding="latin-1",
    )

In [92]:
# Abrindo o arquivo com pandas
df = open_file(response.get('file_path'))
pd.set_option('display.precision', 5)

In [93]:
# Trandando os dados do DataFrame
# - 1 Filtrar por tipo de Fundo (Validar com Lucas)
# - 1 Limpar dados vazios (Validar com Lucas)
# - 2 Formatando o CPF 

df = df.loc[
        (df["CNPJ_FUNDO"] == "03.168.062/0001-03")
        | (df["CNPJ_FUNDO"] == "10.237.480/0001-62")
        | (df["CNPJ_FUNDO"] == "04.515.848/0001-04")
        | (df["CNPJ_FUNDO"] == "30.921.203/0001-81")
        | (df["CNPJ_FUNDO"] == "30.910.553/0001-42")
        | (df["CNPJ_FUNDO"] == "32.102.131/0001-76")
        | (df["CNPJ_FUNDO"] == "30.910.199/0001-56")
        | (df["CNPJ_FUNDO"] == "37.368.116/0001-98")
        | (df["CNPJ_FUNDO"] == "06.041.290/0001-06")
        | (df["CNPJ_FUNDO"] == "17.414.721/0001-40")
        | (df["CNPJ_FUNDO"] == "35.927.302/0001-94")
        | (df["CNPJ_FUNDO"] == "31.353.579/0001-08")
        | (df["CNPJ_FUNDO"] == "03.879.361/0001-48")    
    ]

# Format documents
df["CNPJ_FUNDO"] = df["CNPJ_FUNDO"].apply(format_doc)

# Rename columns
rename_map = {
    "CNPJ_FUNDO": "fund_doc",
    "DT_COMPTC": "competency_date",
    "VL_TOTAL": "amount",
    "VL_QUOTA": "share_value",
    "VL_PATRIM_LIQ": "equity_value",
    "NR_COTST": "quotaholder_number",
}
df = df.rename(columns=rename_map)

# Columns to keep
cols_keep = [
    "fund_doc",
    "competency_date",
    "amount",
    "share_value",
    "equity_value",
    "quotaholder_number",
]
df = df[cols_keep]

In [94]:
# Adicionando a data de referencia do arquivo no dataframe
df['data_ref']=response.get('date_ref')

In [95]:
from sqlalchemy import create_engine
connect = "postgresql+psycopg2://%s:%s@%s:54325/%s" % (
    param_dic['user'],
    param_dic['password'],
    param_dic['host'],
    param_dic['database']
)
def save_to_db(df):
    engine = create_engine(connect)
    df.to_sql(
        'fi_inf_diario', 
        con=engine,
        schema='cvm_ckan',
        index=False, 
        if_exists='append'
    )
    print("=== to_sql() done with sqlalchemy ===")

In [96]:
save_to_db(df)
delete_local_file(response.get('file_path'))

=== to_sql() done with sqlalchemy ===
File deleted with sucess


True