In [1]:
from llm.llm.chatgpt import ChatGPT
from setup_db import create_chroma_db, connect_to_db, execute_query, DB
from llm_test import MappingTable, find_suitable_column, reasoning_text2SQL, CoT_reasoning

In [2]:
llm = ChatGPT()

In [3]:
db_name = 'test_db'
user = 'postgres'
password = '12345678'
port = '5433'
host = 'localhost'

In [4]:
db = DB(db_name, user, password, host, port)


Connecting to database test_db, postgres...


In [5]:
collection_chromadb = 'category_bank_chroma'
persist_directory = 'data/category_bank_chroma'
bank_vector_store = create_chroma_db(collection_chromadb, persist_directory)

collection_chromadb = 'category_non_bank_chroma'
persist_directory = 'data/category_non_bank_chroma'
none_bank_vector_store = create_chroma_db(collection_chromadb, persist_directory)

In [6]:
mapping_table = MappingTable(vector_db_bank=bank_vector_store, vector_db_non_bank=none_bank_vector_store, db=db)

In [15]:
prompt = "Calculate ROA, ROE of the bank VIB in Q2 2024"

In [16]:
find_suitable_column(llm, prompt)

CompletionUsage(completion_tokens=240, prompt_tokens=154, total_tokens=394, completion_tokens_details=CompletionTokensDetails(reasoning_tokens=0))
To calculate the Return on Assets (ROA) and Return on Equity (ROE) for a bank such as VIB, we need to focus on specific columns from the financial statement that pertain to the bank's assets and equity.

1. **Return on Assets (ROA)** is calculated using the formula:
   \[
   ROA = \frac{Net \ Income}{Total \ Assets}
   \]
   - For this, we need the "Net Income" and "Total Assets" columns.

2. **Return on Equity (ROE)** is calculated using the formula:
   \[
   ROE = \frac{Net \ Income}{Total \ Equity}
   \]
   - For this, we need the "Net Income" and "Total Equity" columns.

In a bank's financial statement, the relevant columns would typically be labeled as follows:

- "Net Income"
- "Total Assets"
- "Total Equity"

Based on this analysis, the suitable columns for the bank's financial statements are:

```json
{
    "bank_column_name": ["Net 

{'bank_column_name': ['Net Income', 'Total Assets', 'Total Equity'],
 'non_bank_column_name': []}

In [17]:
sql = reasoning_text2SQL(llm, prompt, mapping_table.search_return_df, 5)

CompletionUsage(completion_tokens=189, prompt_tokens=154, total_tokens=343, completion_tokens_details=CompletionTokensDetails(reasoning_tokens=0))
To calculate the Return on Assets (ROA) and Return on Equity (ROE) for a bank, the relevant columns in the financial statements are as follows:

- For ROA, you would typically use:
  - **Net Income**: This represents the profit of the bank after all expenses and taxes.
  - **Average Total Assets**: This is the average value of all the assets owned by the bank during the period.

- For ROE, you would typically use:
  - **Net Income**: Same as above.
  - **Average Shareholder’s Equity**: This is the average equity available to the shareholders during the period.

Hence, the suggested column names for a bank's financial statements would be:

```json
{
    "bank_column_name": [
        "Net Income",
        "Average Total Assets",
        "Average Shareholder’s Equity"
    ],
    "non_bank_column_name": []
}
```
CompletionUsage(completion_tokens

In [18]:
print(sql[0]['code'])

WITH net_income AS (
    SELECT bfs.data AS net_income, bfs.stock_code as stock_code, bfs.quarter as quarter, bfs.year as year
    FROM bank_financial_report bfs
    JOIN map_category_code_bank mc 
      ON bfs.category_code = mc.category_code
    WHERE bfs.stock_code = 'VIB'
      AND bfs.year = 2024
      AND bfs.quarter = 2
      AND mc.en_caption = 'Profit after Tax'
),
total_assets AS (
    SELECT bfs.data AS total_assets, bfs.stock_code as stock_code, bfs.quarter as quarter, bfs.year as year
    FROM bank_financial_report bfs
    JOIN map_category_code_bank mc 
      ON bfs.category_code = mc.category_code
    WHERE bfs.stock_code = 'VIB'
      AND bfs.year = 2024
      AND bfs.quarter = 2
      AND mc.en_caption = 'Total Assets'
),
equity AS (
    SELECT bfs.data AS equity, bfs.stock_code as stock_code, bfs.quarter as quarter, bfs.year as year
    FROM bank_financial_report bfs
    JOIN map_category_code_bank mc 
      ON bfs.category_code = mc.category_code
    WHERE bfs.stock_

In [19]:
db.query(sql[0]['code'], return_type='dataframe')

Unnamed: 0,stock_code,year,quarter,net_income,total_assets,equity,roa,roe
0,VIB,2024,2,1682.962,430962.08,38350.429,0.003905,0.043884


In [12]:
res = bank_vector_store.similarity_search('Asset', 5)
res

[Document(page_content='Other Assets', metadata={'code': 'BS_250', 'lang': 'en'}),
 Document(page_content='Fixed Assets', metadata={'code': 'BS_220', 'lang': 'en'}),
 Document(page_content='Total Assets', metadata={'code': 'BS_300', 'lang': 'en'}),
 Document(page_content='Asset Revaluation Differences', metadata={'code': 'BS_440', 'lang': 'en'}),
 Document(page_content='Equity', metadata={'code': 'BS_500', 'lang': 'en'})]