In [13]:
import csv
import psycopg2
from psycopg2 import sql
import chardet
import pandas as pd
from sqlalchemy import create_engine, text

def detect_file_encoding(csv_file):
    with open(csv_file, 'rb') as rawdata:
        result = chardet.detect(rawdata.read(800000))
    encoding = result['encoding']
    print(f"Codificação detectada: {encoding}")
    return encoding

def create_table_from_csv(csv_file, table_name, db_config):
    encoding = detect_file_encoding(csv_file)

    try:
        print("Conectando ao banco de dados PostgreSQL...")
        engine = create_engine(f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['dbname']}")
        
        df = pd.read_csv(csv_file, delimiter=';', encoding=encoding, quotechar='"', na_values=[''], keep_default_na=False)
        print("Amostra dos dados:\n", df.head())

        # Mapeia os tipos de dados
        type_mapping = {
            "ChEMBL ID": "VARCHAR(20)",
            "Name": "VARCHAR(255)",
            "Synonyms": "TEXT",
            "Type": "VARCHAR(50)",
            "Max Phase": "DECIMAL(2,1)",
            "Molecular Weight": "FLOAT",
            "Targets": "INT",
            "Bioactivities": "INT",
            "AlogP": "FLOAT",
            "Polar Surface Area": "FLOAT",
            "HBA": "INT",
            "HBD": "INT",
            "#RO5 Violations": "INT",
            "#Rotatable Bonds": "INT",
            "Passes Ro3": "CHAR(1)",
            "QED Weighted": "FLOAT",
            "CX Acidic pKa": "FLOAT",
            "CX Basic pKa": "FLOAT",
            "CX LogP": "FLOAT",
            "CX LogD": "FLOAT",
            "Aromatic Rings": "INT",
            "Structure Type": "VARCHAR(10)",
            "Inorganic Flag": "BOOLEAN",
            "Heavy Atoms": "INT",
            "HBA (Lipinski)": "INT",
            "HBD (Lipinski)": "INT",
            "#RO5 Violations (Lipinski)": "INT",
            "Molecular Weight (Monoisotopic)": "FLOAT",
            "Np Likeness Score": "FLOAT",
            "Molecular Species": "VARCHAR(20)",
            "Molecular Formula": "VARCHAR(50)",
            "Smiles": "TEXT",
            "Inchi Key": "VARCHAR(27)",
            "Inchi": "TEXT",
            "Withdrawn Flag": "BOOLEAN",
            "Orphan": "INT"
        }

        # Cria a tabela no PostgreSQL
        create_table_query = f"CREATE TABLE IF NOT EXISTS {table_name} (\n"
        create_table_query += ",\n".join([f'"{col}" {type_mapping.get(col, "TEXT")}' for col in df.columns])
        create_table_query += "\n);"

        with engine.connect() as conn:
            conn.execute(text(create_table_query))
            print(f"Tabela {table_name} criada com sucesso!")

        # Converte 'None' strings para None do Python
        df = df.applymap(lambda x: None if x == 'None' else x)

        # Converte colunas booleanas
        bool_columns = ['Inorganic Flag', 'Withdrawn Flag']
        for col in bool_columns:
            if col in df.columns:
                df[col] = df[col].map({'0': False, '1': True})

        # Insere os dados usando to_sql do pandas
        df.to_sql(table_name, engine, if_exists='append', index=False, method='multi', chunksize=1000)
        print(f"Dados inseridos com sucesso na tabela {table_name}!")

        # Salva os dados em uma planilha Excel
        output_file = f"{table_name}.xlsx"
        df.to_excel(output_file, index=False)
        print(f"Dados salvos em {output_file}!")

    except Exception as error:
        print("Erro ao processar os dados:", error)

# Configuração do banco de dados
db_config = {
    "dbname": "dtn",
    "user": "postgres",
    "password": "password",
    "host": "localhost",
    "port": "54321"
}

# Uso do script
csv_file = 'leishmania.csv'
table_name = 'doenca_leishmaniose'

try:
    create_table_from_csv(csv_file, table_name, db_config)
except Exception as e:
    print(f"Ocorreu um erro ao executar o script: {e}")

Codificação detectada: ascii
Conectando ao banco de dados PostgreSQL...
Amostra dos dados:
        ChEMBL ID                   Name  \
0  CHEMBL2109460             SCH-708980   
1  CHEMBL2079699  SODIUM STIBOGLUCONATE   
2  CHEMBL1330792             ACETARSONE   
3       CHEMBL55            PENTAMIDINE   
4   CHEMBL290960             NIFURTIMOX   

                                            Synonyms            Type  \
0                                         SCH-708980        Antibody   
1  ANTIMONY SODIUM GLUCONATE|PENTOSTAM|SODIUM STI...  Small molecule   
2     ACETARSOL|ACETARSONE|GYNOPLIX|NSC-13160|S.V.C.  Small molecule   
3  GNF-Pf-3680|MB 800 FREE BASE|MB 800 [AS ISETHI...  Small molecule   
4  BAY 2502|BAY A2502|BAY-2502|BAY-A2502|BAYER 25...  Small molecule   

   Max Phase  Molecular Weight  Targets  Bioactivities AlogP  \
0        2.0               NaN      NaN            NaN   NaN   
1        4.0            907.88     13.0          181.0  None   
2        4.0            

  df = df.applymap(lambda x: None if x == 'None' else x)
