In [45]:
import pandas as pd
import numpy as np 

In [46]:
universal_report = pd.read_parquet(r'D:\python\financial statement prj\chatbot_financial_statement\data\financial_statement_v3.parquet')
universal_report

Unnamed: 0,data,stock_code,quarter,year,date_added,category_code
0,1.931497e+07,BID,0,2015,2015-12-30,IS_150
1,2.339361e+07,BID,0,2016,2016-12-30,IS_150
2,3.095533e+07,BID,0,2017,2017-12-30,IS_150
3,3.472085e+07,BID,0,2018,2018-12-30,IS_150
4,3.597781e+07,BID,0,2019,2019-12-30,IS_150
...,...,...,...,...,...,...
829034,-4.082407e+04,YEG,3,2023,2023-09-30,IS_057
829035,-8.856448e+04,YEG,4,2023,2023-12-30,IS_057
829036,-7.546336e+04,YEG,1,2024,2024-03-30,IS_057
829037,-8.250625e+04,YEG,2,2024,2024-06-30,IS_057


In [None]:
import pandas as pd

def calculate_4NQ(df):

    df_sorted = df.sort_values(by=['stock_code', 'category_code', 'year', 'quarter']).reset_index(drop=True)

    df_sorted['period'] = df_sorted['year'] * 10 + df_sorted['quarter']

    df_lookup = df_sorted[['stock_code', 'category_code', 'year', 'quarter', 'data', 'period']].copy()

    df_lookup['lookback1'] = df_lookup['period']
    df_lookup['lookback2'] = df_lookup['period'] - 1
    df_lookup['lookback3'] = df_lookup['period'] - 2
    df_lookup['lookback4'] = df_lookup['period'] - 3

    df_lookup.loc[df_lookup['quarter'] == 3, 'lookback4'] = (df_lookup['year'] - 1) * 10 + 4
    df_lookup.loc[df_lookup['quarter'] == 2, 'lookback3'] = (df_lookup['year'] - 1) * 10 + 4
    df_lookup.loc[df_lookup['quarter'] == 2, 'lookback4'] = (df_lookup['year'] - 1) * 10 + 3
    df_lookup.loc[df_lookup['quarter'] == 1, 'lookback2'] = (df_lookup['year'] - 1) * 10 + 4
    df_lookup.loc[df_lookup['quarter'] == 1, 'lookback3'] = (df_lookup['year'] - 1) * 10 + 3
    df_lookup.loc[df_lookup['quarter'] == 1, 'lookback4'] = (df_lookup['year'] - 1) * 10 + 2

    df_long = df_lookup.melt(
        id_vars=['stock_code', 'category_code', 'data', 'period'],
        value_vars=['lookback1', 'lookback2', 'lookback3', 'lookback4'],
        var_name='lookback_type',
        value_name='lookup_period'
    )

    df_merged = df_long.merge(
        df_sorted[['stock_code', 'category_code', 'period', 'data']],
        left_on=['stock_code', 'category_code', 'lookup_period'],
        right_on=['stock_code', 'category_code', 'period'],
        how='left',
        suffixes=('', '_past')
    )

    df_final = df_merged.groupby(['stock_code', 'category_code', 'period'])['data_past'].sum().reset_index()

    df_final['year'] = df_final['period'] // 10
    df_final['quarter'] = df_final['period'] % 10

    df_final.rename(columns={'data_past': 'data'}, inplace=True)

    return df_final

is_report = universal_report[universal_report['category_code'].str.startswith('IS')]
cf_report = universal_report[universal_report['category_code'].str.startswith('CF')]

is_report = is_report[is_report['quarter'] != 0]
cf_report = cf_report[cf_report['quarter'] != 0]

is_report_4NQ = calculate_4NQ(is_report)
cf_report_4NQ = calculate_4NQ(cf_report)

report_4NQ = pd.concat([is_report_4NQ, cf_report_4NQ], ignore_index=True)

report_4NQ['category_code'] = report_4NQ['category_code'] + '_4NQ'

report_4NQ['original_category_code'] = report_4NQ['category_code'].str.replace('_4NQ', '', regex=True)

report_4NQ = report_4NQ.merge(
    universal_report[['stock_code', 'year', 'quarter', 'category_code', 'date_added']],
    left_on=['stock_code', 'year', 'quarter', 'original_category_code'],
    right_on=['stock_code', 'year', 'quarter', 'category_code'],
    how='left',
    suffixes=('', '_original')
)

report_4NQ.drop(columns=['category_code_original', 'original_category_code'], inplace=True)

missing_cols = [col for col in universal_report.columns if col not in report_4NQ.columns]
for col in missing_cols:
    report_4NQ[col] = None  

report_4NQ = report_4NQ[universal_report.columns]

report_4NQ


Unnamed: 0,data,stock_code,quarter,year,date_added,category_code
0,9.927316e+05,ABB,1,2015,2015-03-30,IS_020_4NQ
1,2.059845e+06,ABB,2,2015,2015-06-30,IS_020_4NQ
2,3.062800e+06,ABB,3,2015,2015-09-30,IS_020_4NQ
3,4.093853e+06,ABB,4,2015,2015-12-30,IS_020_4NQ
4,4.144439e+06,ABB,1,2016,2016-03-30,IS_020_4NQ
...,...,...,...,...,...,...
252498,5.483616e+04,YEG,3,2023,2023-09-30,CF_130_4NQ
252499,3.213779e+04,YEG,4,2023,2023-12-30,CF_130_4NQ
252500,2.752085e+04,YEG,1,2024,2024-03-30,CF_130_4NQ
252501,2.727661e+04,YEG,2,2024,2024-06-30,CF_130_4NQ


In [48]:
result = report_4NQ[(report_4NQ['category_code'] == 'IS_020_4NQ') & 
                    (report_4NQ['stock_code'] == 'VIC') & 
                    
                    (report_4NQ['year'] == 2023)]
print(result)

              data stock_code  quarter  year date_added category_code
90311  122257932.0        VIC        1  2023 2023-03-30    IS_020_4NQ
90312  156168812.0        VIC        2  2023 2023-06-30    IS_020_4NQ
90313  175374362.0        VIC        3  2023 2023-09-30    IS_020_4NQ
90314  161634280.0        VIC        4  2023 2023-12-30    IS_020_4NQ


In [49]:
result_test = universal_report[(universal_report['category_code'] == 'IS_020') & 
                    (universal_report['stock_code'] == 'VIC') & 
                    (universal_report['quarter'].isin([1, 2, 3, 4])) & 
                    (universal_report['year'].isin([2023, 2024]))]
print(result_test)

              data stock_code  quarter  year date_added category_code
523007  38963375.0        VIC        1  2023 2023-03-30        IS_020
523008  47295227.0        VIC        2  2023 2023-06-30        IS_020
523009  47947906.0        VIC        3  2023 2023-09-30        IS_020
523010  27427772.0        VIC        4  2023 2023-12-30        IS_020
523011  21738502.0        VIC        1  2024 2024-03-30        IS_020
523012  42327114.0        VIC        2  2024 2024-06-30        IS_020
523013  62850017.0        VIC        3  2024 2024-09-30        IS_020


In [38]:
numbers = [38963375.0 , 47295227.0, 47947906.0,  27427772.0]
total_sum = sum(numbers)
print(total_sum)

161634280.0


In [50]:
combined_report = pd.concat([universal_report, report_4NQ], ignore_index=True)
combined_report

Unnamed: 0,data,stock_code,quarter,year,date_added,category_code
0,1.931497e+07,BID,0,2015,2015-12-30,IS_150
1,2.339361e+07,BID,0,2016,2016-12-30,IS_150
2,3.095533e+07,BID,0,2017,2017-12-30,IS_150
3,3.472085e+07,BID,0,2018,2018-12-30,IS_150
4,3.597781e+07,BID,0,2019,2019-12-30,IS_150
...,...,...,...,...,...,...
1081298,5.483616e+04,YEG,3,2023,2023-09-30,CF_130_4NQ
1081299,3.213779e+04,YEG,4,2023,2023-12-30,CF_130_4NQ
1081300,2.752085e+04,YEG,1,2024,2024-03-30,CF_130_4NQ
1081301,2.727661e+04,YEG,2,2024,2024-06-30,CF_130_4NQ


# add in map

In [39]:
map_universal = pd.read_csv(r'D:\python\financial statement prj\chatbot_financial_statement\csv\v3\map_category_code_universal.csv')
map_universal
                            

Unnamed: 0,Corp,corp_code,Securities,sec_code,Bank,bank_code,en_caption,category_code,parent_code
0,A. CURRENT ASSETS,BS_100,A. CURRENT ASSETS,BS_100,,,(Balance sheet) A. CURRENT ASSETS,BS_100,BS_100
1,,,I. Financial Assets,BS_110,,,(Balance sheet) (Securities) Financial Assets,BS_101,BS_101
2,I. Cash and cash equivalents,BS_110,1. Cash and cash equivalents,BS_111,,,(Balance sheet) Cash and cash equivalents,BS_110,BS_110
3,Cash,BS_111,1.1. Cash,BS_111.1,"Cash, precious metals, gemstones",BS_110,(Balance sheet) Cash,BS_111,BS_110
4,,,,,Deposits at the Central Bank,BS_120,(Balance sheet) (Bank) Deposits at the Central...,BS_112,BS_110
...,...,...,...,...,...,...,...,...,...
340,Lưu chuyển tiền thuần từ hoạt động tài chính,CF_040,Lưu chuyển tiền tệ từ hoạt động tài chính,CF_080,III.Lưu chuyển tiền thuần từ hoạt động tài chính,CF_041,(Cash flow) Net cash flow from financing activ...,CF_090,CF_090
341,Lưu chuyển tiền thuần trong kỳ (50 = 20+30+40),CF_050,LƯU CHUYỂN TIỀN THUẦN TRONG KỲ,CF_090,IV.Lưu chuyển tiền thuần trong kỳ,CF_042,(Cash flow) Net cash flow during the period,CF_100,CF_100
342,Tiền và tương đương tiền đầu kỳ,CF_060,TIỀN VÀ TƯƠNG ĐƯƠNG TIỀN ĐẦU KỲ,CF_101,V. Tiền và các khoản tương đương tiền tại thời...,CF_043,(Cash flow) Cash and cash equivalents at the b...,CF_110,CF_110
343,Ảnh hưởng của thay đổi tỷ giá hối đoái quy đổi...,CF_061,,,VI. Điều chỉnh ảnh hưởng của thay đổi tỷ giá,CF_044,(Cash flow) Effects of exchange rate changes o...,CF_120,CF_120


In [43]:
def add_4NQ_records(map_universal):
 
    filtered_df = map_universal[map_universal['category_code'].str.startswith(('IS_', 'CF_'), na=False)].copy()

    filtered_df['category_code'] = filtered_df['category_code'] + '_4NQ'
    filtered_df['corp_code'] = filtered_df['corp_code'].astype(str) + '_4NQ'
    filtered_df['sec_code'] = filtered_df['sec_code'].astype(str) + '_4NQ'
    filtered_df['bank_code'] = filtered_df['bank_code'].astype(str) + '_4NQ'

    # Append ' (4 nearest quarters)' suffix to relevant name columns (handling NaN values)
    filtered_df['en_caption'] = filtered_df['en_caption'].astype(str) + ' (4 nearest quarters)'
    filtered_df['Corp'] = filtered_df['Corp'].astype(str) + ' (4 nearest quarters)'
    filtered_df['Securities'] = filtered_df['Securities'].astype(str) + ' (4 nearest quarters)'
    filtered_df['Bank'] = filtered_df['Bank'].astype(str) + ' (4 nearest quarters)'

    # Handle NaN cases (if some columns had missing values)
    filtered_df.replace({'nan (4 nearest quarters)': None}, inplace=True)
    filtered_df.replace({'nan_4NQ': None}, inplace=True)

    # Append the new records to the original table
    map_universal_extended = pd.concat([map_universal, filtered_df], ignore_index=True)

    return map_universal_extended

# Apply the transformation
map_universal_extended = add_4NQ_records(map_universal)
map_universal_extended

Unnamed: 0,Corp,corp_code,Securities,sec_code,Bank,bank_code,en_caption,category_code,parent_code
0,A. CURRENT ASSETS,BS_100,A. CURRENT ASSETS,BS_100,,,(Balance sheet) A. CURRENT ASSETS,BS_100,BS_100
1,,,I. Financial Assets,BS_110,,,(Balance sheet) (Securities) Financial Assets,BS_101,BS_101
2,I. Cash and cash equivalents,BS_110,1. Cash and cash equivalents,BS_111,,,(Balance sheet) Cash and cash equivalents,BS_110,BS_110
3,Cash,BS_111,1.1. Cash,BS_111.1,"Cash, precious metals, gemstones",BS_110,(Balance sheet) Cash,BS_111,BS_110
4,,,,,Deposits at the Central Bank,BS_120,(Balance sheet) (Bank) Deposits at the Central...,BS_112,BS_110
...,...,...,...,...,...,...,...,...,...
496,Lưu chuyển tiền thuần từ hoạt động tài chính (...,CF_040_4NQ,Lưu chuyển tiền tệ từ hoạt động tài chính (4 n...,CF_080_4NQ,III.Lưu chuyển tiền thuần từ hoạt động tài chí...,CF_041_4NQ,(Cash flow) Net cash flow from financing activ...,CF_090_4NQ,CF_090
497,Lưu chuyển tiền thuần trong kỳ (50 = 20+30+40)...,CF_050_4NQ,LƯU CHUYỂN TIỀN THUẦN TRONG KỲ (4 nearest quar...,CF_090_4NQ,IV.Lưu chuyển tiền thuần trong kỳ (4 nearest q...,CF_042_4NQ,(Cash flow) Net cash flow during the period (4...,CF_100_4NQ,CF_100
498,Tiền và tương đương tiền đầu kỳ (4 nearest qua...,CF_060_4NQ,TIỀN VÀ TƯƠNG ĐƯƠNG TIỀN ĐẦU KỲ (4 nearest qua...,CF_101_4NQ,V. Tiền và các khoản tương đương tiền tại thời...,CF_043_4NQ,(Cash flow) Cash and cash equivalents at the b...,CF_110_4NQ,CF_110
499,Ảnh hưởng của thay đổi tỷ giá hối đoái quy đổi...,CF_061_4NQ,,,VI. Điều chỉnh ảnh hưởng của thay đổi tỷ giá (...,CF_044_4NQ,(Cash flow) Effects of exchange rate changes o...,CF_120_4NQ,CF_120
