In [5]:
import pandas as pd # import pandas for file manipulation, read and write, plus other functions
import sqlite3 # import SQLite3
import os # import os to check if directory and file exist
from openpyxl import Workbook # import Workbook to create files

pd.options.display.float_format = '{:.2f}'.format # display float numbers with 2 decimals  to help with visibility during coding

In [6]:
# Read file with stocks operation

assets_df = pd.read_excel(r"C:\Users\guilh\OneDrive\Investimentos\Operações na Bolsa.xlsx") # read file with registered operations

# Sort dataframe by Operation Date in ascending order

assets_df = assets_df.sort_values(by='Data da Operação', ascending=True).reset_index(drop=True)

db_df = assets_df.drop(columns=['Valor nominal total', 'Valor total com Taxas'])

db_df['Data da Operação'] = pd.to_datetime(db_df['Data da Operação']).dt.date

db_df[db_df.columns[7:13]] = db_df[db_df.columns[7:13]].fillna(0) # replace NaN for 0 in number columns to avoid math errors

db_df.columns = ['timestamp', 'date', 'asset_type', 'ticker', 'operation_type', 'quantity', 'unit_price', 'liquidation_fee', 'emoluments_fee', 'taxes', 'other_fees', 'operational_fee', 'irrf']

db_df['id'] = None

db_df.head(5) # show sample

Unnamed: 0,timestamp,date,asset_type,ticker,operation_type,quantity,unit_price,liquidation_fee,emoluments_fee,taxes,other_fees,operational_fee,irrf,id
0,2023-12-30 12:39:37.608,2023-11-06,Ação,BBSE3,Compra,31,31.5,0.19,0.04,0.52,0.29,4.9,0.0,
1,2023-12-30 12:41:33.760,2023-11-06,Ação,BBDC4,Compra,67,14.72,0.19,0.04,0.52,0.29,4.9,0.0,
2,2023-12-30 12:44:19.580,2023-11-06,Ação,BRAP4,Compra,20,24.1,0.19,0.04,0.52,0.29,4.9,0.0,
3,2023-12-30 12:46:25.333,2023-11-06,Ação,BBAS3,Compra,19,50.2,0.19,0.04,0.52,0.29,4.9,0.0,
4,2023-12-30 12:46:25.333,2023-11-06,Ação,GGBR4,Compra,43,22.91,0.19,0.04,0.52,0.29,4.9,0.0,


In [7]:
ops_type_pt = ['Compra','Venda','Bonificação','Subscrição']

ops_type_en = ['Purchase','Sale','Prize','Subscription']

ops_type_df = pd.DataFrame({'Portuguese': ops_type_pt, 'English': ops_type_en})

asset_type_pt = ['Ação','FII','BDR']

asset_type_en = ['Stock','Real Estate','BDR']

asset_type_df = pd.DataFrame({'Portuguese': asset_type_pt, 'English': asset_type_en})

db_df['operation_type'] = db_df['operation_type'].replace(ops_type_df.set_index('Portuguese')['English'])

db_df['asset_type'] = db_df['asset_type'].replace(asset_type_df.set_index('Portuguese')['English'])

db_df = db_df[['id', 'asset_type', 'operation_type', 'ticker', 'date', 'unit_price', 'quantity', 'liquidation_fee', 'emoluments_fee', 'taxes', 'operational_fee', 'other_fees', 'irrf', 'timestamp']]

db_df.tail(5) # show sample to verify changes

Unnamed: 0,id,asset_type,operation_type,ticker,date,unit_price,quantity,liquidation_fee,emoluments_fee,taxes,operational_fee,other_fees,irrf,timestamp
100,,Real Estate,Purchase,VRTA11,2024-12-23,73.6,7,0.14,0.02,0.0,0.0,0.0,0.0,2024-12-24 11:25:24.929
101,,Real Estate,Purchase,IRDM11,2024-12-23,59.74,9,0.14,0.03,0.0,0.0,0.0,0.0,2024-12-24 11:26:46.432
102,,Real Estate,Purchase,LVBI11,2024-12-23,94.89,5,0.13,0.02,0.0,0.0,0.0,0.0,2024-12-24 11:23:37.250
103,,Stock,Purchase,EGIE3,2024-12-23,36.66,14,0.13,0.02,0.0,0.0,0.0,0.0,2024-12-24 11:24:48.023
104,,Real Estate,Purchase,RECR11,2024-12-23,67.88,8,0.14,0.03,0.0,0.0,0.0,0.0,2024-12-24 11:27:20.105


In [8]:
# Create a connection to the SQLite database
conn = sqlite3.connect('database/operations.db')

# Upload the dataframe to the database
db_df.to_sql('operations', conn, if_exists='append', index=False)

# Close the connection
conn.close()