In [1]:
from llm.llm.chatgpt import ChatGPT
from setup_db import create_chroma_db, connect_to_db, execute_query, DBHUB
from llm_test import MappingTable, find_suitable_column, reasoning_text2SQL, CoT_reasoning

In [2]:
llm = ChatGPT()

In [3]:
db_name = 'test_db'
user = 'postgres'
password = '12345678'
port = '5433'
host = 'localhost'

In [4]:
conn = connect_to_db(db_name, user, password, host, port)


Connecting to database test_db, postgres...


In [5]:
collection_chromadb = 'category_bank_chroma'
persist_directory = 'data/category_bank_chroma'
bank_vector_store = create_chroma_db(collection_chromadb, persist_directory)

collection_chromadb = 'category_non_bank_chroma'
persist_directory = 'data/category_non_bank_chroma'
none_bank_vector_store = create_chroma_db(collection_chromadb, persist_directory)

collection_chromadb = 'company_name_chroma'
persist_directory = 'data/company_name_chroma'
vector_db_company = create_chroma_db(collection_chromadb, persist_directory)

collection_chromadb = 'sql_query'
persist_directory = 'data/sql_query'
vector_db_sql = create_chroma_db(collection_chromadb, persist_directory)

In [6]:
db = DBHUB(conn, bank_vector_store, none_bank_vector_store, vector_db_company, vector_db_sql)

In [13]:
prompt = "Calculate ROA, ROE of the bank VIB in financial year 2023"

In [14]:
sql = reasoning_text2SQL(llm, prompt, db, 5, verbose=True)

CompletionUsage(completion_tokens=40, prompt_tokens=166, total_tokens=206, prompt_tokens_details={'cached_tokens': 0}, completion_tokens_details={'reasoning_tokens': 0})
Find suitable column response: 
```json
{
    "bank_column_name": [
        "Net Income",
        "Total Assets",
        "Shareholders' Equity"
    ],
    "non_bank_column_name": []
}
```
Bank column:    category_code                                         en_caption
0         IS_003                                Net interest income
1         IS_006          Net profit/(loss) from service activities
2         IS_012            Net profit/(loss) from other activities
3         IS_021                                   Profit after tax
4         IS_023                           Basic earnings per share
5         BS_220                                       Fixed assets
6         BS_250                                       Other assets
7         BS_254                                       Other assets
8         BS_300

In [15]:
sql

[{'language': 'sql',
  'code': "WITH net_profit AS (\n    SELECT data AS net_profit, stock_code\n    FROM bank_financial_report\n    WHERE stock_code = 'VIB'\n      AND year = 2023\n      AND quarter = 0\n      AND category_code = 'IS_021'\n),\n\ntotal_assets AS (\n    SELECT data AS total_assets, stock_code\n    FROM bank_financial_report\n    WHERE stock_code = 'VIB'\n      AND year = 2023\n      AND quarter = 0\n      AND category_code = 'BS_300'\n),\n\nshareholders_equity AS (\n    SELECT data AS shareholders_equity, stock_code\n    FROM bank_financial_report\n    WHERE stock_code = 'VIB'\n      AND year = 2023\n      AND quarter = 0\n      AND category_code = 'BS_500'\n)\n\nSELECT \n    np.stock_code,\n    np.net_profit,\n    ta.total_assets,\n    se.shareholders_equity,\n    (np.net_profit / ta.total_assets) AS ROA,\n    (np.net_profit / se.shareholders_equity) AS ROE\nFROM net_profit np\nJOIN total_assets ta ON np.stock_code = ta.stock_code\nJOIN shareholders_equity se ON np.sto

In [16]:
print(sql[-1]['code'])

WITH net_profit AS (
    SELECT data AS net_profit, stock_code
    FROM bank_financial_report
    WHERE stock_code = 'VIB'
      AND year = 2023
      AND quarter = 0
      AND category_code = 'IS_021'
),

total_assets AS (
    SELECT data AS total_assets, stock_code
    FROM bank_financial_report
    WHERE stock_code = 'VIB'
      AND year = 2023
      AND quarter = 0
      AND category_code = 'BS_300'
),

shareholders_equity AS (
    SELECT data AS shareholders_equity, stock_code
    FROM bank_financial_report
    WHERE stock_code = 'VIB'
      AND year = 2023
      AND quarter = 0
      AND category_code = 'BS_500'
)

SELECT 
    np.stock_code,
    np.net_profit,
    ta.total_assets,
    se.shareholders_equity,
    (np.net_profit / ta.total_assets) AS ROA,
    (np.net_profit / se.shareholders_equity) AS ROE
FROM net_profit np
JOIN total_assets ta ON np.stock_code = ta.stock_code
JOIN shareholders_equity se ON np.stock_code = se.stock_code;



In [17]:
db.query(sql[-1]['code'], return_type='dataframe')

Unnamed: 0,stock_code,net_profit,total_assets,shareholders_equity,roa,roe
0,VIB,8563053,409881373,25369708,0.0208915397577727,0.3375306093392955


In [18]:
res = bank_vector_store.similarity_search('Asset', 5)
res

[Document(metadata={'code': 'BS_254', 'lang': 'en'}, page_content='Other assets'),
 Document(metadata={'code': 'BS_250', 'lang': 'en'}, page_content='Other assets'),
 Document(metadata={'code': 'BS_220', 'lang': 'en'}, page_content='Fixed assets'),
 Document(metadata={'code': 'BS_440', 'lang': 'en'}, page_content='Asset revaluation difference'),
 Document(metadata={'code': 'BS_416', 'lang': 'en'}, page_content='Other capital')]