# `ROIC-Preprocessing_ver2.ipynb`

-   ROIC ÂàÜÊûê„ÅÆÂâçÂá¶ÁêÜÁî® notebook
-   Ë°å„ÅÜ„Åì„Å®
    1. ÂèñÂæó„Åó„Åü„Éá„Éº„Çø„Çí„Éá„Éº„Çø„Éô„Éº„Çπ„Å´ËêΩ„Å®„ÅóËæº„ÇÄ
    2. „Éô„É≥„ÉÅ„Éû„Éº„ÇØ„ÅÆ„Éó„É©„Ç§„Çπ„Å®„É™„Çø„Éº„É≥„Éá„Éº„ÇøÁî®ÊÑè
    3. „Éï„Ç°„ÇØ„Çø„ÉºË®àÁÆó


In [2]:
%load_ext autoreload
%autoreload 2

import datetime
import gc
import itertools
import os
import sqlite3
import sys
import warnings
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path

import numpy as np
import pandas as pd
import yaml
from dotenv import load_dotenv
from tqdm import tqdm

warnings.simplefilter("ignore")
load_dotenv()

UNIVERSE_CODE = "MSXJPN_AD"
BLOOMBERG_UNIVERSE_TICKER = "MXKO Index"

QUANTS_DIR = Path(os.environ.get("QUANTS_DIR"))  # type: ignore
FACTSET_ROOT_DIR = Path(os.environ.get("FACTSET_ROOT_DIR"))  # type: ignore
FACTSET_FINANCIALS_DIR = Path(os.environ.get("FACTSET_FINANCIALS_DIR"))  # type: ignore
FACTSET_INDEX_CONSTITUENTS_DIR = Path(os.environ.get("FACTSET_INDEX_CONSTITUENTS_DIR"))  # type: ignore
INDEX_DIR = FACTSET_FINANCIALS_DIR / UNIVERSE_CODE
BPM_ROOT_DIR = Path(os.environ.get("BPM_ROOT_DIR"))  # type: ignore
BLOOMBERG_ROOT_DIR = Path(os.environ.get("BLOOMBERG_ROOT_DIR"))  # type: ignore
BLOOMBERG_DATA_DIR = Path(os.environ.get("BLOOMBERG_DATA_DIR"))  # type: ignore

sys.path.insert(0, str(QUANTS_DIR))
import src.calculate_performance_metrics as performance_metrics_utils
import src.database_utils as db_utils
import src.ROIC_make_data_files_ver2 as roic_utils

from src import bloomberg_utils, factset_utils

financials_db_path = INDEX_DIR / "Financials_and_Price.db"
factset_index_db_path = FACTSET_INDEX_CONSTITUENTS_DIR / "Index_Constituents.db"
bloomberg_index_db_path = BLOOMBERG_ROOT_DIR / "Index_Price_and_Returns.db"
bloomberg_valuation_db_path = BLOOMBERG_ROOT_DIR / "Valuation.db"
bpm_db_path = BPM_ROOT_DIR / "Index_Constituents.db"

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 0. ÁèæÂú®„ÅÆ„Éá„Éº„Çø„Éô„Éº„Çπ„ÉÅ„Çß„ÉÉ„ÇØ


In [None]:
tables = sorted(db_utils.get_table_names(financials_db_path))
display(tables)

with sqlite3.connect(financials_db_path) as conn:
    df_tables = pd.read_sql("SELECT * FROM FF_ASSETS", con=conn, parse_dates=["date"])
    display(df_tables)

## 1. BPM „Å® Factset „Åã„Çâ„ÉÄ„Ç¶„É≥„É≠„Éº„Éâ„Åó„Åü„Éá„Éº„Çø„Çí sqlite3 „Å´‰øùÂ≠ò

-   „Ç§„É≥„Éá„ÉÉ„ÇØ„ÇπÂà•„Å´„ÉÜ„Éº„Éñ„É´„Çí‰ΩúÊàê„Åô„Çã
-   ÂÖÉ„Éá„Éº„Çø„ÅØ"Index_Constituents_with_Factset_code-compressed-\*.paruqet" -> ÂúßÁ∏Æ„Åó„Å¶ÈÄÅ‰ø°„Åó„Åü
-   BPM „Åã„ÇâÂèñÂæó„Åó„ÅüÊßãÊàêÊØî„ÇÑÈäòÊüÑ ID „Å™„Å©„ÅÆ„Éá„Éº„Çø„Å®„ÄÅFactset „Åß„ÉÄ„Ç¶„É≥„É≠„Éº„Éâ„Åó„Åü seol, cusip, isin, code_jp „Å´„Åù„Çå„Åû„ÇåÂØæÂøú„Åô„Çã P_SYMBOL „Åä„Çà„Å≥ FG_COMPANY_NAME „ÇíÊ†ºÁ¥ç„Åó„Åü„Éá„Éº„Çø„ÄÇ


In [None]:
compressed_files = list(
    FACTSET_INDEX_CONSTITUENTS_DIR.glob(
        "Index_Constituents_with_Factset_code-compressed-*.parquet"
    )
)

dfs = [pd.read_parquet(f) for f in compressed_files]
df = (
    pd.concat(dfs)
    .assign(
        date=lambda x: pd.to_datetime(x["date"]),
        SEDOL=lambda x: x["SEDOL"].astype(str),
    )
    .replace("N/A", np.nan)
)
df[["Holdings", "Weight (%)", "Mkt Value"]] = df[
    ["Holdings", "Weight (%)", "Mkt Value"]
].astype(float)

head_cols = ["Universe", "Universe_code_BPM", "date"]
other_cols = [col for col in df.columns if col not in head_cols]
df = df.reindex(columns=head_cols + other_cols).sort_values(
    ["Universe", "date", "Name"], ignore_index=True
)

for universe_code in df["Universe_code_BPM"].unique():
    df_slice = df.loc[df["Universe_code_BPM"] == universe_code].reset_index(drop=True)
    factset_utils.store_to_database(
        df=df_slice,
        db_path=factset_index_db_path,
        table_name=universe_code,
        unique_cols=["date", "Name", "Asset ID"],
    )

table_names = db_utils.get_table_names(db_path=factset_index_db_path)
display(table_names)

Êó¢Â≠ò„ÅÆ 47072 Ë°å„Å®„ÅÆÈáçË§á„Çí„ÉÅ„Çß„ÉÉ„ÇØ„Åó„Åæ„Åó„Åü„ÄÇ92 Ë°å„ÇíÊñ∞„Åü„Å´ËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> MSSUD: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
Êó¢Â≠ò„ÅÆ 319851 Ë°å„Å®„ÅÆÈáçË§á„Çí„ÉÅ„Çß„ÉÉ„ÇØ„Åó„Åæ„Åó„Åü„ÄÇ1182 Ë°å„ÇíÊñ∞„Åü„Å´ËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> MSASD: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
Êó¢Â≠ò„ÅÆ 344002 Ë°å„Å®„ÅÆÈáçË§á„Çí„ÉÅ„Çß„ÉÉ„ÇØ„Åó„Åæ„Åó„Åü„ÄÇ1234 Ë°å„ÇíÊñ∞„Åü„Å´ËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> MSACAPFAD: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
Êó¢Â≠ò„ÅÆ 247340 Ë°å„Å®„ÅÆÈáçË§á„Çí„ÉÅ„Çß„ÉÉ„ÇØ„Åó„Åæ„Åó„Åü„ÄÇ1054 Ë°å„ÇíÊñ∞„Åü„Å´ËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> MSAPFXJ_AD: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
Êó¢Â≠ò„ÅÆ 223596 Ë°å„Å®„ÅÆÈáçË§á„Çí„ÉÅ„Çß„ÉÉ„ÇØ„Åó„Åæ„Åó„Åü„ÄÇ1002 Ë°å„ÇíÊñ∞„Åü„Å´ËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> MSCAFXJAD: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
Êó¢Â≠ò„ÅÆ 788150 Ë°å„Å®„ÅÆÈáçË§á„Çí„ÉÅ„Çß„ÉÉ„ÇØ„Åó„Åæ„Åó„Åü„ÄÇ2511 Ë°å„ÇíÊñ∞„Åü„Å´ËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> MSAWIF_AD: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
Êó¢Â≠ò„ÅÆ 1188

['MSSUD',
 'MSASD',
 'MSACAPFAD',
 'MSAPFXJ_AD',
 'MSCAFXJAD',
 'MSAWIF_AD',
 'MSCIHKGD',
 'MSCIINDD',
 'MSFIDND',
 'MSXJPN_AD',
 'MSFKORD',
 'MSFMALD',
 'MSFPHID',
 'MSCISIND',
 'MSFTAID',
 'MSFTHAD']

In [None]:
with sqlite3.connect(factset_index_db_path) as conn:
    df = pd.read_sql(
        f"SELECT * FROM `{UNIVERSE_CODE}`", parse_dates=["date"], con=conn
    ).drop_duplicates()
    df["P_SYMBOL_missing"] = df["P_SYMBOL"].isna()
    display(df)

    g = df.groupby(["date", "P_SYMBOL_missing"])["Weight (%)"].agg(["count", "sum"])
    display(g)

Unnamed: 0,Universe,Universe_code_BPM,date,Name,Bloomberg Ticker,BloombergID,Asset ID,Asset ID Type,SEDOL,Country,...,P_SYMBOL_CUSIP,ISIN,FG_COMPANY_NAME_ISIN,P_SYMBOL_ISIN,CODE_JP,FG_COMPANY_NAME_CODE_JP,P_SYMBOL_CODE_JP,P_SYMBOL,FG_COMPANY_NAME,P_SYMBOL_missing
0,MSCI KOKUSAI - Daily,MSXJPN_AD,2000-01-31,21ST CENTURY FOX,,,AUSBIN2,BARRAID,662075,AUS,...,,,,,,,,FOXLV-AU,Twenty-First Century Fox Inc. Class A CDI,False
1,MSCI KOKUSAI - Daily,MSXJPN_AD,2000-01-31,21ST CENTURY FOX,,,AUSBIN1,BARRAID,688692,AUS,...,,,,,,,,FOX-AU,Twenty-First Century Fox Inc. Class B CDI,False
2,MSCI KOKUSAI - Daily,MSXJPN_AD,2000-01-31,3I GROUP PLC,,,UKIENL1,BARRAID,0888693,GBR,...,TGOPF-US,GB0008886938,„Çπ„É™„Éº„Ç¢„Ç§„Éª„Ç∞„É´„Éº„Éó,III-GB,,,,III-GB,„Çπ„É™„Éº„Ç¢„Ç§„Éª„Ç∞„É´„Éº„Éó,False
3,MSCI KOKUSAI - Daily,MSXJPN_AD,2000-01-31,3M CO,,,USAJ8P1,BARRAID,2595708,USA,...,MMM-US,US6040591058,3M„Ç´„É≥„Éë„Éã„Éº,MMM-US,,,,MMM-US,3M„Ç´„É≥„Éë„Éã„Éº,False
4,MSCI KOKUSAI - Daily,MSXJPN_AD,2000-01-31,ABB LTD,,,SWIAAN1,BARRAID,5661190,CHE,...,,CH0003846620,ABB,ABBN-CH,,,,ABBN-CH,ABB,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
400859,MSCI KOKUSAI - Daily,MSXJPN_AD,2025-10-31,ZIMMER BIOMET HOLDINGS INC,,,USA4JT1,BARRAID,2783815,USA,...,ZBH-US,US98956P1021,„Ç∏„É≥„Éû„Éº„Éª„Éê„Ç§„Ç™„É°„ÉÉ„Éà„Éª„Éõ„Éº„É´„Éá„Ç£„É≥„Ç∞„Çπ,ZBH-US,,,,ZBH-US,„Ç∏„É≥„Éû„Éº„Éª„Éê„Ç§„Ç™„É°„ÉÉ„Éà„Éª„Éõ„Éº„É´„Éá„Ç£„É≥„Ç∞„Çπ,False
400860,MSCI KOKUSAI - Daily,MSXJPN_AD,2025-10-31,ZOETIS INC,,,USBANZ1,BARRAID,B95WG16,USA,...,ZTS-US,US98978V1035,„Çæ„Ç®„ÉÜ„Ç£„Çπ Class A,ZTS-US,,,,ZTS-US,„Çæ„Ç®„ÉÜ„Ç£„Çπ Class A,False
400861,MSCI KOKUSAI - Daily,MSXJPN_AD,2025-10-31,ZOOM COMMUNICATIONS INC,,,USBEOV1,BARRAID,BGSP7M9,USA,...,ZM-US,US98980L1017,„Ç∫„Éº„É†„Éª„Éì„Éá„Ç™„Éª„Ç≥„Éü„É•„Éã„Ç±„Éº„Ç∑„Éß„É≥„Ç∫ Class A,ZM-US,,,,ZM-US,„Ç∫„Éº„É†„Éª„Éì„Éá„Ç™„Éª„Ç≥„Éü„É•„Éã„Ç±„Éº„Ç∑„Éß„É≥„Ç∫ Class A,False
400862,MSCI KOKUSAI - Daily,MSXJPN_AD,2025-10-31,ZSCALER INC,,,USBDYI1,BARRAID,BZ00V34,USA,...,ZS-US,US98980G1022,„Çº„ÉÉ„Éà„Çπ„Ç±„Ç§„É©„Éº,ZS-US,,,,ZS-US,„Çº„ÉÉ„Éà„Çπ„Ç±„Ç§„É©„Éº,False


Unnamed: 0_level_0,Unnamed: 1_level_0,count,sum
date,P_SYMBOL_missing,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-31,False,991,97.959673
2000-01-31,True,74,2.040327
2000-02-29,False,989,97.900800
2000-02-29,True,73,2.099194
2000-03-31,False,989,98.129503
...,...,...,...
2025-07-31,False,1139,100.000012
2025-08-31,False,1140,100.000009
2025-09-30,False,1140,99.999991
2025-10-31,False,1139,99.958187


## 2. Factset „Åã„Çâ„ÉÄ„Ç¶„É≥„É≠„Éº„Éâ„Åó„Åü„Éá„Éº„Çø„Çí„Åæ„Å®„ÇÅ„Çã

Financials „Åä„Çà„Å≥ Price „ÅÆ„Éá„Éº„Çø„Çí„Éá„Éº„Çø„Éô„Éº„Çπ„Å´Ê†ºÁ¥ç„Åô„Çã„ÄÇ


In [None]:
file_list = list(INDEX_DIR.glob("Financials_and_Price-compressed-*.parquet"))
# file_list = [s for s in file_list if not "20241129" in s.name]
dfs = [pd.read_parquet(f) for f in tqdm(file_list, desc="loading parquet files")]
df = (
    pd.concat(dfs)
    .drop_duplicates()
    .sort_values(["variable", "P_SYMBOL", "date"], ignore_index=True)
    .assign(value=lambda x: x["value"].astype(float))
)

for variable in df["variable"].unique():
    df_slice = df.loc[df["variable"] == variable]
    factset_utils.store_to_database(
        df=df_slice,
        db_path=financials_db_path,
        table_name=variable,
        unique_cols=["date", "P_SYMBOL", "variable"],
        verbose=True,
    )

table_names = db_utils.get_table_names(db_path=financials_db_path)
display(table_names)

loading parquet files: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 9/9 [00:04<00:00,  2.08it/s]


„ÉÜ„Éº„Éñ„É´ 'FF_ASSETS' „ÅØÂ≠òÂú®„Åó„Åæ„Åõ„Çì„ÄÇÊñ∞„Åó„ÅÑ„ÉÜ„Éº„Éñ„É´„Å®„Åó„Å¶„ÄÅ„Åô„Åπ„Å¶„ÅÆ 791263 Ë°å„ÇíËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> FF_ASSETS: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
„ÉÜ„Éº„Éñ„É´ 'FF_BPS' „ÅØÂ≠òÂú®„Åó„Åæ„Åõ„Çì„ÄÇÊñ∞„Åó„ÅÑ„ÉÜ„Éº„Éñ„É´„Å®„Åó„Å¶„ÄÅ„Åô„Åπ„Å¶„ÅÆ 791263 Ë°å„ÇíËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> FF_BPS: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
„ÉÜ„Éº„Éñ„É´ 'FF_BPS_TANG' „ÅØÂ≠òÂú®„Åó„Åæ„Åõ„Çì„ÄÇÊñ∞„Åó„ÅÑ„ÉÜ„Éº„Éñ„É´„Å®„Åó„Å¶„ÄÅ„Åô„Åπ„Å¶„ÅÆ 791263 Ë°å„ÇíËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> FF_BPS_TANG: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
„ÉÜ„Éº„Éñ„É´ 'FF_CAPEX' „ÅØÂ≠òÂú®„Åó„Åæ„Åõ„Çì„ÄÇÊñ∞„Åó„ÅÑ„ÉÜ„Éº„Éñ„É´„Å®„Åó„Å¶„ÄÅ„Åô„Åπ„Å¶„ÅÆ 791263 Ë°å„ÇíËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> FF_CAPEX: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
„ÉÜ„Éº„Éñ„É´ 'FF_CASH_ST' „ÅØÂ≠òÂú®„Åó„Åæ„Åõ„Çì„ÄÇÊñ∞„Åó„ÅÑ„ÉÜ„Éº„Éñ„É´„Å®„Åó„Å¶„ÄÅ„Åô„Åπ„Å¶„ÅÆ 791263 Ë°å„ÇíËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> FF_CASH_ST: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
„ÉÜ„Éº„Éñ„É´ 'FF_COGS' „ÅØÂ≠òÂú®„Åó„Åæ„Åõ„

['FF_ASSETS',
 'FF_BPS',
 'FF_BPS_TANG',
 'FF_CAPEX',
 'FF_CASH_ST',
 'FF_COGS',
 'FF_COM_EQ',
 'FF_CURR_RATIO',
 'FF_DEBT',
 'FF_DEBT_ENTRPR_VAL',
 'FF_DEBT_EQ',
 'FF_DEBT_LT',
 'FF_DEBT_ST',
 'FF_DEP_AMORT_EXP',
 'FF_DIV_YLD',
 'FF_DPS',
 'FF_EBITDA_OPER',
 'FF_EBITDA_OPER_MGN',
 'FF_EBIT_OPER',
 'FF_EBIT_OPER_MGN',
 'FF_ENTRPR_VAL_EBITDA_OPER',
 'FF_ENTRPR_VAL_EBIT_OPER',
 'FF_ENTRPR_VAL_SALES',
 'FF_EPS',
 'FF_EPS_DIL',
 'FF_FREE_CF',
 'FF_FREE_PS_CF',
 'FF_GROSS_INC',
 'FF_GROSS_MGN',
 'FF_INC_TAX',
 'FF_INT_EXP_NET',
 'FF_LIABS',
 'FF_LIABS_SHLDRS_EQ',
 'FF_MIN_INT_ACCUM',
 'FF_NET_DEBT',
 'FF_NET_INC',
 'FF_NET_MGN',
 'FF_OPER_CF',
 'FF_OPER_INC',
 'FF_OPER_MGN',
 'FF_OPER_PS_NET_CF',
 'FF_PAY_OUT_RATIO',
 'FF_PBK',
 'FF_PE',
 'FF_PFD_STK',
 'FF_PPE_NET',
 'FF_PSALES',
 'FF_PTX_INC',
 'FF_PTX_MGN',
 'FF_QUICK_RATIO',
 'FF_ROA',
 'FF_ROE',
 'FF_ROIC',
 'FF_ROTC',
 'FF_SALES',
 'FF_SALES_PS',
 'FF_SGA',
 'FF_SHLDRS_EQ',
 'FF_STK_OPT_EXP',
 'FF_STK_PURCH_CF',
 'FF_TAX_RATE',
 'FF_W

### ‚ö†Ô∏è1AY/Êñ∞Ë¶è„Éá„Éº„ÇøÈ†ÖÁõÆ„ÅÆÂ∑ÆÂàÜÊõ¥Êñ∞„Åå„ÅÇ„ÇãÂ†¥Âêà


In [None]:
def update_value(row, rtol=1e-5, atol=1e-3):
    """Êó¢Â≠òÂÄ§„Çí„Ç¢„ÉÉ„Éó„Éá„Éº„Éà„Åô„ÇãÈñ¢Êï∞"""
    existing = row["value_existing"]
    new = row["value_new"]

    # 1. ‰∏°ÊñπNaN ‚Üí NaN
    if pd.isna(existing) and pd.isna(new):
        return np.nan

    # 2. Êó¢Â≠ò„ÅåNaN ‚Üí Êñ∞Ë¶èÂÄ§
    if pd.isna(existing):
        return new

    # 3. Êñ∞Ë¶è„ÅåNaN ‚Üí Êó¢Â≠òÂÄ§
    if pd.isna(new):
        return existing

    # 4. ‰∏°ÊñπÊúâÂäπ ‚Üí Êï∞ÂÄ§ÊØîËºÉ
    if np.isclose(existing, new, rtol=rtol, atol=atol):
        return existing  # Âêå„Åò„Å™„ÇâÊó¢Â≠òÂÄ§
    else:
        return new  # Áï∞„Å™„Çå„Å∞Êñ∞Ë¶èÂÄ§„ÅßÊõ¥Êñ∞


update_file = (
    INDEX_DIR / "Financials_and_Price-compressed-20241129_20251031.parquet"
)  # „Éï„Ç°„Ç§„É´Âêç„ÅØÊâãÂãïÊåáÂÆö„Åô„Çã
df_update = pd.read_parquet(update_file)
variable_list = df_update["variable"].sort_values().unique().tolist()
date_list = df_update["date"].sort_values().unique().tolist()
start_date = min(date_list)
end_date = max(date_list)

# „Éá„Éº„Çø„Éô„Éº„Çπ„ÅÆÊó¢Â≠ò„ÉÜ„Éº„Éñ„É´
existing_tables = db_utils.get_table_names(financials_db_path)
# Êñ∞„Åó„ÅèËøΩÂä†„Åï„Çå„Åü„Éá„Éº„ÇøÈ†ÖÁõÆ„Åå„ÅÇ„Çã„Åã„ÉÅ„Çß„ÉÉ„ÇØ
added_variables = list(set(variable_list) - set(existing_tables))
if len(added_variables) > 0:
    # Êñ∞Ë¶è„Éá„Éº„ÇøÈ†ÖÁõÆ„ÅÆ„ÉÜ„Éº„Éñ„É´‰ΩúÊàê
    pass

# 1AY„ÅßÂèñÂæó„Åó„Åü„Éá„Éº„Çø„ÇíÊõ¥Êñ∞„Åô„Çã„ÉÜ„Éº„Éñ„É´‰∏ÄË¶ß
update_tables = sorted(list(set(existing_tables) & set(variable_list)))
total_updated = 0
with sqlite3.connect(financials_db_path) as conn:
    for idx, table in enumerate(update_tables, 1):
        # date_list„ÅÆÊó•‰ªò„ÅÆ„Åø„Éá„Éº„Çø„Éô„Éº„Çπ„Åã„ÇâË™≠„ÅøÂèñ„Çä„ÄÅdf_update„Å®„ÅÆÂ∑ÆÂàÜ„ÇíÁ¢∫Ë™ç
        # Êó¢Â≠ò„ÅÆ„Éá„Éº„Çø„ÅåÊ¨†Êêç„Åó„Å¶„ÅÑ„Çå„Å∞Êõ¥Êñ∞„Éá„Éº„Çø„Åßfillna„Åô„ÇãÔºà‰∏°ÊñπÊ¨†Êêç„Åó„Å¶„ÅÑ„Çå„Å∞„Åù„ÅÆ„Åæ„ÅæÔºâ
        # Êó¢Â≠ò„Éá„Éº„Çø„Å®Êñ∞Ë¶è„Éá„Éº„Çø„ÅÆÊï∞ÂÄ§„ÅåÁï∞„Å™„Å£„Å¶„ÅÑ„Çå„Å∞Êñ∞Ë¶è„Éá„Éº„Çø„Åßupdate„Åô„Çã

        print(f"\n[{idx}/{len(update_tables)}] Âá¶ÁêÜ‰∏≠: {table}")

        query = f"""
            SELECT
                *
            FROM
                {table}
            WHERE
                date >= '{start_date.strftime("%Y-%m-%d")}' AND date <= '{end_date.strftime("%Y-%m-%d")}'
        """
        df_existing = (
            pd.read_sql(query, con=conn, parse_dates=["date"])
            .rename(columns={"value": "value_existing"})
            .reindex(columns=["date", "P_SYMBOL", "variable", "value_existing"])
        )
        df_update_slice = (
            df_update.loc[df_update["variable"] == table]
            .copy()
            .rename(columns={"value": "value_new"})
            .reindex(columns=["date", "P_SYMBOL", "variable", "value_new"])
        )
        df_merged = pd.merge(
            df_update_slice,
            df_existing,
            on=["date", "P_SYMBOL", "variable"],
            how="outer",
        )
        df_merged["value"] = df_merged.apply(update_value, axis=1)

        # Â§âÊõ¥Ê§úÂá∫Ôºà„Éô„ÇØ„Éà„É´ÂåñÁâàÔºâ
        changed_mask = (
            # Êó¢Â≠ò„ÅåNaN„ÅßÊñ∞Ë¶è„Å´ÂÄ§„Åå„ÅÇ„Çã
            (df_merged["value_existing"].isna() & ~df_merged["value_new"].isna())
            |
            # ‰∏°ÊñπÊúâÂäπ„ÅßÊï∞ÂÄ§„ÅåÁï∞„Å™„Çã
            (
                ~df_merged["value_existing"].isna()
                & ~df_merged["value_new"].isna()
                & ~np.isclose(
                    df_merged["value_existing"],
                    df_merged["value_new"],
                    rtol=1e-5,
                    atol=1e-3,
                )
            )
        )

        df_to_update = df_merged[changed_mask].reset_index(drop=True)[
            ["date", "P_SYMBOL", "variable", "value"]
        ]

        if len(df_to_update) > 0:
            print(f"  Êõ¥Êñ∞ÂØæË±°: {len(df_to_update):,}Ë°å")

            # „Éá„Éº„Çø„Éô„Éº„ÇπÊõ¥Êñ∞
            rows_affected = factset_utils.upsert_financial_data(
                df_to_update, conn, table, method="auto"
            )

            total_updated += rows_affected
        else:
            print("  Â§âÊõ¥„Å™„Åó")

print(f"\n{'=' * 50}")
print("üìä Êõ¥Êñ∞ÂÆå‰∫Ü")
print(f"{'=' * 50}")
print(f"Á∑èÊõ¥Êñ∞Ë°åÊï∞: {total_updated:,}Ë°å")


[1/63] Âá¶ÁêÜ‰∏≠: FF_INT_EXP_NET
  Êõ¥Êñ∞ÂØæË±°: 984Ë°å
‚ÑπÔ∏è  SQLite 3.50.4: upsertÊñπÂºè„Çí‰ΩøÁî®„Åó„Åæ„Åô
‚ùå „Ç®„É©„Éº: near "DO": syntax error


OperationalError: near "DO": syntax error

In [None]:
update_file = (
    INDEX_DIR / "Financials_and_Price-compressed-20241129_20251031.parquet"
)  # „Éï„Ç°„Ç§„É´Âêç„ÅØÊâãÂãïÊåáÂÆö„Åô„Çã
df_update = pd.read_parquet(update_file)
variable_list = df_update["variable"].sort_values().unique().tolist()
date_list = df_update["date"].sort_values().unique().tolist()
start_date = min(date_list)
end_date = max(date_list)

# „Éá„Éº„Çø„Éô„Éº„Çπ„ÅÆÊó¢Â≠ò„ÉÜ„Éº„Éñ„É´
existing_tables = db_utils.get_table_names(financials_db_path)
# Êñ∞„Åó„ÅèËøΩÂä†„Åï„Çå„Åü„Éá„Éº„ÇøÈ†ÖÁõÆ„Åå„ÅÇ„Çã„Åã„ÉÅ„Çß„ÉÉ„ÇØ
added_variables = list(set(variable_list) - set(existing_tables))
if len(added_variables) > 0:
    # Êñ∞Ë¶è„Éá„Éº„ÇøÈ†ÖÁõÆ„ÅÆ„ÉÜ„Éº„Éñ„É´‰ΩúÊàê
    pass

# 1AY„ÅßÂèñÂæó„Åó„Åü„Éá„Éº„Çø„ÇíÊõ¥Êñ∞„Åô„Çã„ÉÜ„Éº„Éñ„É´‰∏ÄË¶ß
update_tables = list(set(existing_tables) & set(variable_list))
total_updated = 0
with sqlite3.connect(financials_db_path) as conn:
    for idx, table in enumerate(update_tables, 1):
        # date_list„ÅÆÊó•‰ªò„ÅÆ„Åø„Éá„Éº„Çø„Éô„Éº„Çπ„Åã„ÇâË™≠„ÅøÂèñ„Çä„ÄÅdf_update„Å®„ÅÆÂ∑ÆÂàÜ„ÇíÁ¢∫Ë™ç
        # Êó¢Â≠ò„ÅÆ„Éá„Éº„Çø„ÅåÊ¨†Êêç„Åó„Å¶„ÅÑ„Çå„Å∞Êõ¥Êñ∞„Éá„Éº„Çø„Åßfillna„Åô„ÇãÔºà‰∏°ÊñπÊ¨†Êêç„Åó„Å¶„ÅÑ„Çå„Å∞„Åù„ÅÆ„Åæ„ÅæÔºâ
        # Êó¢Â≠ò„Éá„Éº„Çø„Å®Êñ∞Ë¶è„Éá„Éº„Çø„ÅÆÊï∞ÂÄ§„ÅåÁï∞„Å™„Å£„Å¶„ÅÑ„Çå„Å∞Êñ∞Ë¶è„Éá„Éº„Çø„Åßupdate„Åô„Çã

        print(f"\n[{idx}/{len(update_tables)}] Âá¶ÁêÜ‰∏≠: {table}")

        query = f"""
            SELECT
                *
            FROM
                {table}
            WHERE
                date >= '{start_date.strftime("%Y-%m-%d")}' AND date <= '{end_date.strftime("%Y-%m-%d")}'
        """
        df_existing = (
            pd.read_sql(query, con=conn, parse_dates=["date"])
            .rename(columns={"value": "value_existing"})
            .reindex(columns=["date", "P_SYMBOL", "variable", "value_existing"])
        )
        df_update_slice = (
            df_update.loc[df_update["variable"] == table]
            .copy()
            .rename(columns={"value": "value_new"})
            .reindex(columns=["date", "P_SYMBOL", "variable", "value_new"])
        )
        df_merged = pd.merge(
            df_update_slice,
            df_existing,
            on=["date", "P_SYMBOL", "variable"],
            how="outer",
        )
        df_merged["value"] = df_merged.apply(update_value, axis=1)

        # Â§âÊõ¥Ê§úÂá∫Ôºà„Éô„ÇØ„Éà„É´ÂåñÁâàÔºâ
        changed_mask = (
            # Êó¢Â≠ò„ÅåNaN„ÅßÊñ∞Ë¶è„Å´ÂÄ§„Åå„ÅÇ„Çã
            (df_merged["value_existing"].isna() & ~df_merged["value_new"].isna())
            |
            # ‰∏°ÊñπÊúâÂäπ„ÅßÊï∞ÂÄ§„ÅåÁï∞„Å™„Çã
            (
                ~df_merged["value_existing"].isna()
                & ~df_merged["value_new"].isna()
                & ~np.isclose(
                    df_merged["value_existing"],
                    df_merged["value_new"],
                    rtol=1e-5,
                    atol=1e-3,
                )
            )
        )

        df_to_update = df_merged[changed_mask].reset_index(drop=True)[
            ["date", "P_SYMBOL", "variable", "value"]
        ]

        if len(df_to_update) > 0:
            print(f"  Êõ¥Êñ∞ÂØæË±°: {len(df_to_update):,}Ë°å")

            # „Éá„Éº„Çø„Éô„Éº„ÇπÊõ¥Êñ∞
            rows_affected = factset_utils.upsert_financial_data(
                df_to_update,
                conn,
                table,
                method="upsert",  # „Åæ„Åü„ÅØ "upsert"
            )

            total_updated += rows_affected
        else:
            print("  Â§âÊõ¥„Å™„Åó")

print(f"\n{'=' * 50}")
print("üìä Êõ¥Êñ∞ÂÆå‰∫Ü")
print(f"{'=' * 50}")
print(f"Á∑èÊõ¥Êñ∞Ë°åÊï∞: {total_updated:,}Ë°å")


[1/63] Âá¶ÁêÜ‰∏≠: FF_DEBT_ST
  Êõ¥Êñ∞ÂØæË±°: 11,175Ë°å
‚ö†Ô∏è  „ÉÜ„Éº„Éñ„É´ 'FF_DEBT_ST' „Å´UNIQUEÂà∂Á¥Ñ„Åå„ÅÇ„Çä„Åæ„Åõ„Çì
   Âà∂Á¥Ñ„ÇíËøΩÂä†„Åô„Çã„Å´„ÅØ„ÄÅ„ÉÜ„Éº„Éñ„É´„ÇíÂÜç‰ΩúÊàê„Åô„ÇãÂøÖË¶Å„Åå„ÅÇ„Çä„Åæ„Åô
‚ö†Ô∏è  UPSERTÊñπÂºè„Çí‰ΩøÁî®„Åß„Åç„Åæ„Åõ„Çì„ÄÇdelete_insertÊñπÂºè„Å´Âàá„ÇäÊõø„Åà„Åæ„Åô
  ÂâäÈô§: 11175Ë°å, ÊåøÂÖ•: 11175Ë°å
‚úÖ 11175Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü

[2/63] Âá¶ÁêÜ‰∏≠: FF_TAX_RATE
  Êõ¥Êñ∞ÂØæË±°: 10,469Ë°å
‚ö†Ô∏è  „ÉÜ„Éº„Éñ„É´ 'FF_TAX_RATE' „Å´UNIQUEÂà∂Á¥Ñ„Åå„ÅÇ„Çä„Åæ„Åõ„Çì
   Âà∂Á¥Ñ„ÇíËøΩÂä†„Åô„Çã„Å´„ÅØ„ÄÅ„ÉÜ„Éº„Éñ„É´„ÇíÂÜç‰ΩúÊàê„Åô„ÇãÂøÖË¶Å„Åå„ÅÇ„Çä„Åæ„Åô
‚ö†Ô∏è  UPSERTÊñπÂºè„Çí‰ΩøÁî®„Åß„Åç„Åæ„Åõ„Çì„ÄÇdelete_insertÊñπÂºè„Å´Âàá„ÇäÊõø„Åà„Åæ„Åô
  ÂâäÈô§: 10469Ë°å, ÊåøÂÖ•: 10469Ë°å
‚úÖ 10469Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü

[3/63] Âá¶ÁêÜ‰∏≠: FF_ROE
  Êõ¥Êñ∞ÂØæË±°: 10,836Ë°å
‚ö†Ô∏è  „ÉÜ„Éº„Éñ„É´ 'FF_ROE' „Å´UNIQUEÂà∂Á¥Ñ„Åå„ÅÇ„Çä„Åæ„Åõ„Çì
   Âà∂Á¥Ñ„ÇíËøΩÂä†„Åô„Çã„Å´„ÅØ„ÄÅ„ÉÜ„Éº„Éñ„É´„ÇíÂÜç‰ΩúÊàê„Åô„ÇãÂøÖË¶Å„Åå„ÅÇ„Çä„Åæ„Åô
‚ö†Ô∏è  UPSERTÊñπÂºè„Çí‰ΩøÁî®„Åß„Åç„Åæ„Åõ„Çì„ÄÇdelete_insertÊ

In [None]:
with sqlite3.connect(financials_db_path) as conn:
    df = pd.read_sql(
        "SELECT * FROM FF_SALES",
        con=conn,
        parse_dates=["date"],
    )
    display(df.dropna(subset=["date"]))

Unnamed: 0,date,P_SYMBOL,value,variable
0,2005-08-31,0HSW-GB,528.570465,FF_SALES
1,2005-09-30,0HSW-GB,556.819159,FF_SALES
2,2005-10-31,0HSW-GB,556.819159,FF_SALES
3,2005-11-30,0HSW-GB,556.819159,FF_SALES
4,2005-12-30,0HSW-GB,,FF_SALES
...,...,...,...,...
791258,2025-06-30,ZURN-CH,37031.999926,FF_SALES
791259,2025-07-31,ZURN-CH,37031.999926,FF_SALES
791260,2025-08-29,ZURN-CH,37031.999926,FF_SALES
791261,2025-09-30,ZURN-CH,37031.999926,FF_SALES


### „Éá„Éº„Çø„Éô„Éº„Çπ„ÉÅ„Çß„ÉÉ„ÇØ


In [None]:
# „Éá„Éº„Çø„Éô„Éº„Çπ„ÅÆ‰∏≠Ë∫´„ÉÅ„Çß„ÉÉ„ÇØ
with sqlite3.connect(financials_db_path) as conn:
    df = pd.read_sql(
        "SELECT * FROM FF_ASSETS ORDER BY date", parse_dates=["date"], con=conn
    )
display(df)
display(df.drop_duplicates(subset=["date", "P_SYMBOL"]))
display(df["date"].unique().tolist())

Unnamed: 0,date,P_SYMBOL,value,variable
0,2005-08-31,0HSW-GB,2448.485005,FF_ASSETS
1,2005-08-31,0II3.XX1-GB,1950.065394,FF_ASSETS
2,2005-08-31,0MDJ-GB,18381.562024,FF_ASSETS
3,2005-08-31,0N1N-GB,2893.729277,FF_ASSETS
4,2005-08-31,0N3I-GB,4176.764098,FF_ASSETS
...,...,...,...,...
791258,2025-10-31,ZBRA-US,8067.000000,FF_ASSETS
791259,2025-10-31,ZM-US,,FF_ASSETS
791260,2025-10-31,ZS-US,,FF_ASSETS
791261,2025-10-31,ZTS-US,15159.000000,FF_ASSETS


Unnamed: 0,date,P_SYMBOL,value,variable
0,2005-08-31,0HSW-GB,2448.485005,FF_ASSETS
1,2005-08-31,0II3.XX1-GB,1950.065394,FF_ASSETS
2,2005-08-31,0MDJ-GB,18381.562024,FF_ASSETS
3,2005-08-31,0N1N-GB,2893.729277,FF_ASSETS
4,2005-08-31,0N3I-GB,4176.764098,FF_ASSETS
...,...,...,...,...
791258,2025-10-31,ZBRA-US,8067.000000,FF_ASSETS
791259,2025-10-31,ZM-US,,FF_ASSETS
791260,2025-10-31,ZS-US,,FF_ASSETS
791261,2025-10-31,ZTS-US,15159.000000,FF_ASSETS


[Timestamp('2005-08-31 00:00:00'),
 Timestamp('2005-09-30 00:00:00'),
 Timestamp('2005-10-31 00:00:00'),
 Timestamp('2005-11-30 00:00:00'),
 Timestamp('2005-12-30 00:00:00'),
 Timestamp('2006-01-31 00:00:00'),
 Timestamp('2006-02-28 00:00:00'),
 Timestamp('2006-03-31 00:00:00'),
 Timestamp('2006-04-28 00:00:00'),
 Timestamp('2006-05-31 00:00:00'),
 Timestamp('2006-06-30 00:00:00'),
 Timestamp('2006-07-31 00:00:00'),
 Timestamp('2006-08-31 00:00:00'),
 Timestamp('2006-09-29 00:00:00'),
 Timestamp('2006-10-31 00:00:00'),
 Timestamp('2006-11-30 00:00:00'),
 Timestamp('2006-12-29 00:00:00'),
 Timestamp('2007-01-31 00:00:00'),
 Timestamp('2007-02-28 00:00:00'),
 Timestamp('2007-03-30 00:00:00'),
 Timestamp('2007-04-30 00:00:00'),
 Timestamp('2007-05-31 00:00:00'),
 Timestamp('2007-06-29 00:00:00'),
 Timestamp('2007-07-31 00:00:00'),
 Timestamp('2007-08-31 00:00:00'),
 Timestamp('2007-09-28 00:00:00'),
 Timestamp('2007-10-31 00:00:00'),
 Timestamp('2007-11-30 00:00:00'),
 Timestamp('2007-12-

## 3. „É™„Çø„Éº„É≥&„Éï„Ç°„ÇØ„Çø„ÉºÁî®„ÉÜ„Éº„Éñ„É´‰ΩúÊàê


### 3-1. „É™„Çø„Éº„É≥„ÅÆ„ÉÜ„Éº„Éñ„É´„Çí‰ΩúÊàê


In [None]:
df_price = roic_utils.load_FG_PRICE(db_path=financials_db_path)
df_return = roic_utils.calculate_Return(
    df_price=df_price,
    date_column="date",
    symbol_column="P_SYMBOL",
    price_column="FG_PRICE",
)
print("--- return data ---")
display(df_return.head(3))
print("-" * 20)

# ------------------------------------------------------------------------------------
# „Éá„Éº„Çø„ÉÅ„Çß„ÉÉ„ÇØ
# ÈäòÊüÑ„Å´„Çà„Å£„Å¶„ÅØdate„Åå1„Ç´Êúà„Åö„Å§ÈÄ£Á∂ö„Åß„Éá„Éº„Çø„Åå„ÅÇ„Çã„Å®„ÅØÈôê„Çâ„Å™„ÅÑ
# FG_PRICE„Åå„Å™„ÅÑÂ†¥Âêà„Å´pct_change„ÇíÁ¥†Áõ¥„Å´ÂÆüË°å„Åô„Çã„Å®„É™„Çø„Éº„É≥„ÅÆÊúüÈñì„Åå‰ªñ„ÅÆÈäòÊüÑ„Å®„Åö„Çå„Çã
# „Åù„ÅÆ„Åü„ÇÅ„ÄÅÂÖ®date„ÅÆÈï∑„Åï„Å®ÈäòÊüÑ„Åî„Å®„ÅÆdate„ÅÆÈï∑„Åï„ÇíÊØîËºÉ„Åô„Çã
# ------------------------------------------------------------------------------------

df_check = df_return.reset_index()
symbol_date_counts = df_check.groupby("P_SYMBOL")["date"].nunique()
all_date_len = len(df_check["date"].unique())
not_enough_len_symbols = symbol_date_counts[symbol_date_counts != all_date_len].index
if len(not_enough_len_symbols) > 0:
    print("ÂïèÈ°å„ÅÇ„Çä")
    display(not_enough_len_symbols)
else:  # ÂïèÈ°å„Å™„Åë„Çå„Å∞„Éá„Éº„Çø„Éô„Éº„Çπ„Å´‰øùÂ≠ò
    print("ÂïèÈ°å„Å™„Åó")
    df_return.reset_index(inplace=True)
    display(df_return.head(5))
    for col in [
        s
        for s in df_return.columns
        if s.startswith("Return") or s.startswith("Forward_Return")
    ]:
        df_slice = (
            df_return[["date", "P_SYMBOL", col]]
            .rename(columns={col: "value"})
            .assign(variable=col)
        )
        df_slice["value"] = df_slice["value"].astype(float)
        df_slice["date"] = pd.to_datetime(df_slice["date"])
        db_utils.delete_table_from_database(db_path=financials_db_path, table_name=col)
        factset_utils.store_to_database(
            df=df_slice, db_path=financials_db_path, table_name=col
        )

--- return data ---


Unnamed: 0_level_0,Unnamed: 1_level_0,FG_PRICE,Return_1M,Forward_Return_1M,Return_1M_annlzd,Forward_Return_1M_annlzd,Return_3M,Forward_Return_3M,Return_3M_annlzd,Forward_Return_3M_annlzd,Return_6M,...,Return_12M_annlzd,Forward_Return_12M_annlzd,Return_3Y,Forward_Return_3Y,Return_3Y_annlzd,Forward_Return_3Y_annlzd,Return_5Y,Forward_Return_5Y,Return_5Y_annlzd,Forward_Return_5Y_annlzd
P_SYMBOL,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0HSW-GB,2005-08-31,3.23,,0.13,,1.53,,0.34,,1.37,,...,,0.55,,0.85,,0.28,,0.85,,0.17
0II3.XX1-GB,2005-08-31,4.17,,0.01,,0.18,,0.06,,0.24,,...,,-0.12,,0.12,,0.04,,0.12,,0.02
0MDJ-GB,2005-08-31,6.09,,0.05,,0.55,,0.02,,0.06,,...,,0.02,,0.04,,0.01,,0.42,,0.08


--------------------
ÂïèÈ°å„Å™„Åó


Unnamed: 0,P_SYMBOL,date,FG_PRICE,Return_1M,Forward_Return_1M,Return_1M_annlzd,Forward_Return_1M_annlzd,Return_3M,Forward_Return_3M,Return_3M_annlzd,...,Return_12M_annlzd,Forward_Return_12M_annlzd,Return_3Y,Forward_Return_3Y,Return_3Y_annlzd,Forward_Return_3Y_annlzd,Return_5Y,Forward_Return_5Y,Return_5Y_annlzd,Forward_Return_5Y_annlzd
0,0HSW-GB,2005-08-31,3.23,,0.13,,1.53,,0.34,,...,,0.55,,0.85,,0.28,,0.85,,0.17
1,0II3.XX1-GB,2005-08-31,4.17,,0.01,,0.18,,0.06,,...,,-0.12,,0.12,,0.04,,0.12,,0.02
2,0MDJ-GB,2005-08-31,6.09,,0.05,,0.55,,0.02,,...,,0.02,,0.04,,0.01,,0.42,,0.08
3,0N1N-GB,2005-08-31,5.66,,0.04,,0.47,,0.02,,...,,0.05,,0.4,,0.13,,0.37,,0.07
4,0N3I-GB,2005-08-31,2.81,,0.03,,0.34,,0.03,,...,,0.01,,-0.47,,-0.16,,0.15,,0.03


„ÉÜ„Éº„Éñ„É´ 'Return_1M' „ÅØÂ≠òÂú®„Åó„Åæ„Åõ„Çì„ÄÇÊñ∞„Åó„ÅÑ„ÉÜ„Éº„Éñ„É´„Å®„Åó„Å¶„ÄÅ„Åô„Åπ„Å¶„ÅÆ 794853 Ë°å„ÇíËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> Return_1M: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
„ÉÜ„Éº„Éñ„É´ 'Forward_Return_1M' „ÅØÂ≠òÂú®„Åó„Åæ„Åõ„Çì„ÄÇÊñ∞„Åó„ÅÑ„ÉÜ„Éº„Éñ„É´„Å®„Åó„Å¶„ÄÅ„Åô„Åπ„Å¶„ÅÆ 794853 Ë°å„ÇíËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> Forward_Return_1M: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
„ÉÜ„Éº„Éñ„É´ 'Return_1M_annlzd' „ÅØÂ≠òÂú®„Åó„Åæ„Åõ„Çì„ÄÇÊñ∞„Åó„ÅÑ„ÉÜ„Éº„Éñ„É´„Å®„Åó„Å¶„ÄÅ„Åô„Åπ„Å¶„ÅÆ 794853 Ë°å„ÇíËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> Return_1M_annlzd: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
„ÉÜ„Éº„Éñ„É´ 'Forward_Return_1M_annlzd' „ÅØÂ≠òÂú®„Åó„Åæ„Åõ„Çì„ÄÇÊñ∞„Åó„ÅÑ„ÉÜ„Éº„Éñ„É´„Å®„Åó„Å¶„ÄÅ„Åô„Åπ„Å¶„ÅÆ 794853 Ë°å„ÇíËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> Forward_Return_1M_annlzd: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
„ÉÜ„Éº„Éñ„É´ 'Return_3M' „ÅØÂ≠òÂú®„Åó„Åæ„Åõ„Çì„ÄÇÊñ∞„Åó„ÅÑ„ÉÜ„Éº„Éñ„É´„Å®„Åó„Å¶„ÄÅ„Åô„Åπ„Å¶„ÅÆ 794853 Ë°å„ÇíËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> Return_3M: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆ

### 3-2. „Ç§„É≥„Éá„ÉÉ„ÇØ„Çπ„ÅÆ‰æ°Ê†º„Å®„É™„Çø„Éº„É≥„Éá„Éº„Çø„ÇíÂèñÂæó via Blpapi

‚ö†Ô∏è Ê≥®ÊÑè ‚ö†Ô∏è Bloomberg Terminal „ÇíËµ∑Âãï„Åó„Å¶„ÅÑ„ÇãÂøÖË¶Å„ÅÇ„Çä„ÄÇ


In [None]:
# „Éá„Éº„Çø„Éô„Éº„ÇπÁ¢∫Ë™ç
with sqlite3.connect(bloomberg_index_db_path) as conn:
    df = pd.read_sql("SELECT * FROM PX_LAST", con=conn, parse_dates=["Date"])
display(df)
print(
    f"Date: {df['Date'].min().strftime('%Y-%m-%d')} „Äú {df['Date'].max().strftime('%Y-%m-%d')} ({len(df['Date'].unique()):,}Êó•)"
)
print(f"Ticker: {df['Ticker'].nunique():,}ÈäòÊüÑ\n\t{df['Ticker'].unique().tolist()}")

DatabaseError: Execution failed on sql 'SELECT * FROM PX_LAST': no such table: PX_LAST

In [None]:
# yaml„ÅßË®≠ÂÆö„Åó„ÅüÈäòÊüÑ„É™„Çπ„Éà„ÅÆË™≠„ÅøËæº„ÅøÔºàBloomberg TickerÔºâ
BLOOMBERG_TICKER_YAML = BLOOMBERG_ROOT_DIR / "ticker-description.yaml"
EQUITY_TYPES = {"equity_index", "equity_sector_index", "equity_industry_index"}

with open(BLOOMBERG_TICKER_YAML, "r", encoding="utf-8") as f:
    ticker_descriptions = yaml.safe_load(f)

tickers_to_download = [
    ticker["bloomberg_ticker"]
    for ticker in ticker_descriptions
    if ticker.get("type") in EQUITY_TYPES
]

# -----------------------------------------------------
# Bloomberg„Åã„Çâ‰æ°Ê†º„Éá„Éº„Çø„Çí„ÉÄ„Ç¶„É≥„É≠„Éº„Éâ„Åó„Å¶„Éá„Éº„Çø„Éô„Éº„Çπ„ÇíÊõ¥Êñ∞
# -----------------------------------------------------


blp = bloomberg_utils.BlpapiCustom()
# Êñ∞Ë¶è„ÉÜ„Ç£„ÉÉ„Ç´„Éº„Åå„ÅÇ„ÇãÂ†¥Âêà

# df = blp.get_historical_data(
#     securities=tickers_to_download,
#     fields=["PX_LAST"],
#     start_date="20000101",
#     end_date=datetime.datetime.today().strftime("%Y%m%d"),
# )
# df = pd.melt(
#     df.reset_index(), id_vars=["Date"], var_name="Ticker", value_name="value"
# ).assign(variable="PX_LAST")
# display(df)
# blp.store_to_database(
#     df=df,
#     db_path=bloomberg_index_db_path,
#     table_name="PX_LAST",
#     primary_keys=["Date", "Ticker", "variable"],
#     verbose=True,
# )

# „ÄÄÊó¢Â≠ò„Éá„Éº„Çø„ÅÆÊõ¥Êñ∞

rows_updated = blp.update_historical_data(
    db_path=bloomberg_index_db_path,
    table_name="PX_LAST",
    tickers=tickers_to_download,
    id_type="ticker",
    field="PX_LAST",
    default_start_date=datetime.datetime(2000, 1, 1),
    verbose=True,
)
print(f"\n{'=' * 60}")
print(f"Âá¶ÁêÜÂÆå‰∫Ü: {rows_updated:,}Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü")
print(f"{'=' * 60}")

üÜï ÂàùÊúü„Éá„Éº„ÇøÂèñÂæó„É¢„Éº„Éâ
   ÂèñÂæóÊúüÈñì: 2000-01-01 ~ 2025-12-08
   ÂØæË±°ÈäòÊüÑ: 70ÈäòÊüÑ
Bloomberg„Çª„ÉÉ„Ç∑„Éß„É≥„ÇíÈñãÂßã„Åó„Å¶„ÅÑ„Åæ„Åô...
„Çª„ÉÉ„Ç∑„Éß„É≥ÈñãÂßãÊàêÂäü„ÄÇ
‚úÖ „Çµ„Éº„Éì„Çπ„Ç™„Éº„Éó„É≥ÂÆå‰∫Ü„ÄÇ„É™„ÇØ„Ç®„Çπ„Éà‰ΩúÊàê‰∏≠...
üì° „É™„ÇØ„Ç®„Çπ„Éà„ÇíÈÄÅ‰ø°„Åó„Åæ„Åô [DAILY]
   ÊúüÈñì: 2000-01-01 - 2025-12-08
‚ùå MXWDJ0HC Index (TICKER) „Åß„Ç®„É©„Éº: Unknown/Invalid securityInvalid Security [nid:53728]
‚ùå MXWDJ0IN Index (TICKER) „Åß„Ç®„É©„Éº: Unknown/Invalid securityInvalid Security [nid:53728]
‚ùå MXWDJ0IT Index (TICKER) „Åß„Ç®„É©„Éº: Unknown/Invalid securityInvalid Security [nid:53728]
‚ùå MXWDJ0MT Index (TICKER) „Åß„Ç®„É©„Éº: Unknown/Invalid securityInvalid Security [nid:53728]
‚ùå MXWDJ0RE Index (TICKER) „Åß„Ç®„É©„Éº: Unknown/Invalid securityInvalid Security [nid:53728]
‚ùå MXWDJ0UT Index (TICKER) „Åß„Ç®„É©„Éº: Unknown/Invalid securityInvalid Security [nid:53728]
‚ùå MXWDJ0CS Index (TICKER) „Åß„Ç®„É©„Éº: Unknown/Invalid securityInvalid Security [nid:53822]
‚ù

In [None]:
# Êó¢Â≠ò„ÅÆFG_PRICE„ÅÆ„ÉÜ„Éº„Éñ„É´„Åã„Çâ„É™„Çø„Éº„É≥Ë®àÁÆó„Åô„Åπ„ÅçÊó•‰ªò„ÇíÂèñÂæó
df_index_price_filtered = (
    db_utils.get_rows_by_unique_values(
        source_db_path=financials_db_path,
        target_db_path=bloomberg_index_db_path,
        source_table="FG_PRICE",
        target_table="PX_LAST",
        source_column="date",
        target_column="Date",
    )
    .query(f"Ticker == '{BLOOMBERG_UNIVERSE_TICKER}'")
    .reset_index(drop=True)
    .assign(Date=lambda x: pd.to_datetime(x["Date"]))
)


# „Ç§„É≥„Éá„ÉÉ„ÇØ„Çπ„Å´„Å§„ÅÑ„Å¶ÂêåÊßò„Å´„É™„Çø„Éº„É≥„ÇíË®àÁÆó„Åó„Å¶„Éá„Éº„Çø„Éô„Éº„Çπ„Å´‰øùÂ≠ò
df_index_return = roic_utils.calculate_Return(
    df_price=df_index_price_filtered,
    date_column="Date",
    symbol_column="Ticker",
    price_column="value",
)
print("--- return data ---")
display(df_index_return.tail(3))
print("-" * 20)

# ------------------------------------------------------------------------------------
# „Éá„Éº„Çø„ÉÅ„Çß„ÉÉ„ÇØ
# ÈäòÊüÑ„Å´„Çà„Å£„Å¶„ÅØdate„Åå1„Ç´Êúà„Åö„Å§ÈÄ£Á∂ö„Åß„Éá„Éº„Çø„Åå„ÅÇ„Çã„Å®„ÅØÈôê„Çâ„Å™„ÅÑ
# ‰æ°Ê†º„Éá„Éº„Çø„Åå„Å™„ÅÑÂ†¥Âêà„Å´pct_change„ÇíÁ¥†Áõ¥„Å´ÂÆüË°å„Åô„Çã„Å®„É™„Çø„Éº„É≥„ÅÆÊúüÈñì„Åå‰ªñ„ÅÆÈäòÊüÑ„Å®„Åö„Çå„Çã
# „Åù„ÅÆ„Åü„ÇÅ„ÄÅÂÖ®date„ÅÆÈï∑„Åï„Å®ÈäòÊüÑ„Åî„Å®„ÅÆdate„ÅÆÈï∑„Åï„ÇíÊØîËºÉ„Åô„Çã
# ------------------------------------------------------------------------------------

df_check = df_index_return.reset_index()
symbol_date_counts = df_check.groupby("Ticker")["Date"].nunique()
all_date_len = len(df_check["Date"].unique())
not_enough_len_symbols = symbol_date_counts[symbol_date_counts != all_date_len].index
if len(not_enough_len_symbols) > 0:
    print("ÂïèÈ°å„ÅÇ„Çä")
    display(not_enough_len_symbols)
else:  # ÂïèÈ°å„Å™„Åë„Çå„Å∞„Éá„Éº„Çø„Éô„Éº„Çπ„Å´‰øùÂ≠ò
    print("ÂïèÈ°å„Å™„Åó")
    df_index_return.reset_index(inplace=True)
    display(df_index_return.tail(5))
    for col in [
        s
        for s in df_index_return.columns
        if s.startswith("Return") or s.startswith("Forward_Return")
    ]:
        df_slice = (
            df_index_return[["Date", "Ticker", col]]
            .rename(columns={col: "value"})
            .assign(variable=col)
        )
        df_slice["value"] = df_slice["value"].astype(float)
        df_slice["Date"] = pd.to_datetime(df_slice["Date"])
        db_utils.delete_table_from_database(
            db_path=bloomberg_index_db_path, table_name=col
        )
        blp.store_to_database(
            df=df_slice,
            db_path=bloomberg_index_db_path,
            table_name=col,
            primary_keys=["Date", "Ticker", "variable"],
        )

--- return data ---


Unnamed: 0_level_0,Unnamed: 1_level_0,value,variable,Return_1M,Forward_Return_1M,Return_1M_annlzd,Forward_Return_1M_annlzd,Return_3M,Forward_Return_3M,Return_3M_annlzd,Forward_Return_3M_annlzd,...,Return_12M_annlzd,Forward_Return_12M_annlzd,Return_3Y,Forward_Return_3Y,Return_3Y_annlzd,Forward_Return_3Y_annlzd,Return_5Y,Forward_Return_5Y,Return_5Y_annlzd,Forward_Return_5Y_annlzd
Ticker,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
MXKO Index,2025-08-29,4339.7,PX_LAST,0.022468,0.031714,0.269611,0.38057,0.081942,,0.327766,,...,0.143251,,0.598328,,0.199443,,0.726859,,0.145372,
MXKO Index,2025-09-30,4477.33,PX_LAST,0.031714,0.018609,0.38057,0.223312,0.069486,,0.277946,,...,0.157818,,0.81918,,0.27306,,0.854051,,0.17081,
MXKO Index,2025-10-31,4560.65,PX_LAST,0.018609,,0.223312,,0.074525,,0.298101,,...,0.202551,,0.72576,,0.24192,,0.952408,,0.190482,


--------------------
ÂïèÈ°å„Å™„Åó


Unnamed: 0,Ticker,Date,value,variable,Return_1M,Forward_Return_1M,Return_1M_annlzd,Forward_Return_1M_annlzd,Return_3M,Forward_Return_3M,...,Return_12M_annlzd,Forward_Return_12M_annlzd,Return_3Y,Forward_Return_3Y,Return_3Y_annlzd,Forward_Return_3Y_annlzd,Return_5Y,Forward_Return_5Y,Return_5Y_annlzd,Forward_Return_5Y_annlzd
238,MXKO Index,2025-06-30,4186.43,PX_LAST,0.043729,0.013833,0.524753,0.165993,0.109511,0.069486,...,0.148449,,0.591242,,0.197081,,0.865951,,0.17319,
239,MXKO Index,2025-07-31,4244.34,PX_LAST,0.013833,0.022468,0.165993,0.269611,0.119433,0.074525,...,0.147621,,0.493686,,0.164562,,0.797764,,0.159553,
240,MXKO Index,2025-08-29,4339.7,PX_LAST,0.022468,0.031714,0.269611,0.38057,0.081942,,...,0.143251,,0.598328,,0.199443,,0.726859,,0.145372,
241,MXKO Index,2025-09-30,4477.33,PX_LAST,0.031714,0.018609,0.38057,0.223312,0.069486,,...,0.157818,,0.81918,,0.27306,,0.854051,,0.17081,
242,MXKO Index,2025-10-31,4560.65,PX_LAST,0.018609,,0.223312,,0.074525,,...,0.202551,,0.72576,,0.24192,,0.952408,,0.190482,


‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'Return_1M' „Å´ 243 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'Forward_Return_1M' „Å´ 243 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'Return_1M_annlzd' „Å´ 243 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'Forward_Return_1M_annlzd' „Å´ 243 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'Return_3M' „Å´ 243 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'Forward_Return_3M' „Å´ 243 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'Return_3M_annlzd' „Å´ 243 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'Forward_Return_3M_annlzd' „Å´ 243 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'Return_6M' „Å´ 243 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'Forward_Return_6M' „Å´ 243 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'Return_6M

### 3-3. Active Return Ë®àÁÆó

„ÉÜ„Éº„Éñ„É´‰ΩúÊàê„Åó„ÄÅ„Éá„Éº„Çø„Éô„Éº„Çπ„Å´‰øùÂ≠ò


In [None]:
return_cols = [
    "Return_1M",
    "Return_1M_annlzd",
    "Forward_Return_1M",
    "Forward_Return_1M_annlzd",
    "Return_3M",
    "Return_3M_annlzd",
    "Forward_Return_3M",
    "Forward_Return_3M_annlzd",
    "Return_6M",
    "Return_6M_annlzd",
    "Forward_Return_6M",
    "Forward_Return_6M_annlzd",
    "Return_12M",
    "Return_12M_annlzd",
    "Forward_Return_12M",
    "Forward_Return_12M_annlzd",
    "Return_3Y",
    "Return_3Y_annlzd",
    "Forward_Return_3Y",
    "Forward_Return_3Y_annlzd",
    "Return_5Y",
    "Return_5Y_annlzd",
    "Forward_Return_5Y",
    "Forward_Return_5Y_annlzd",
]

# ------------------------------------
# „Ç§„É≥„Éá„ÉÉ„ÇØ„Çπ„ÅÆ„É™„Çø„Éº„É≥
# ------------------------------------
union_queries_index = []
for table in return_cols:
    union_queries_index.append(
        f"SELECT Date, Ticker, value, variable FROM '{table}' WHERE Ticker = '{BLOOMBERG_UNIVERSE_TICKER}'"
    )
union_query_index = " UNION ALL ".join(union_queries_index)

# „Éá„Éº„ÇøÂèñÂæó
with sqlite3.connect(bloomberg_index_db_path) as conn:
    df_return_index = pd.read_sql(
        union_query_index, con=conn, parse_dates=["Date"]
    ).rename(columns={"Date": "date", "Ticker": "symbol"})

df_return_index = df_return_index.drop_duplicates(ignore_index=True)
print(f"‚úÖ „Ç§„É≥„Éá„ÉÉ„ÇØ„Çπ„Éá„Éº„Çø: {len(df_return_index):,}‰ª∂")


# ------------------------------------
# ÂÄãÂà•ÈäòÊüÑ„ÅÆ„É™„Çø„Éº„É≥
# ------------------------------------
union_queries_security = []
for table in return_cols:
    union_queries_security.append(
        f"SELECT date, P_SYMBOL, value, variable FROM '{table}'"
    )

union_query_security = " UNION ALL ".join(union_queries_security)

# „Éá„Éº„ÇøÂèñÂæó
with sqlite3.connect(financials_db_path) as conn:
    df_return_security = pd.read_sql(
        union_query_security, con=conn, parse_dates=["date"]
    ).rename(columns={"P_SYMBOL": "symbol"})

df_return_security = df_return_security.drop_duplicates(ignore_index=True)

print(f"‚úÖ ÂÄãÂà•ÈäòÊüÑ„Éá„Éº„Çø: {len(df_return_security):,}‰ª∂")

# ------------------------------------
# concatenate returns
# ------------------------------------
df_returns = pd.concat([df_return_index, df_return_security], ignore_index=True)
display(df_returns.tail(3))

# ------------------------------------
# „Ç¢„ÇØ„ÉÜ„Ç£„Éñ„É™„Çø„Éº„É≥Ë®àÁÆó
# ------------------------------------
df_active_returns = performance_metrics_utils.calculate_active_returns_vectorized(
    df_returns=df_returns,
    return_cols=return_cols,
    benchmark_ticker=BLOOMBERG_UNIVERSE_TICKER,
    verbose=False,
)
display(df_active_returns.tail(3))

# ------------------------------------
# Active return: „Éá„Éº„Çø„Éô„Éº„Çπ‰øùÂ≠ò
# ------------------------------------
# Êé®Â•®ÊñπÊ≥ïÔºöÁõ¥ÂàóÊõ∏„ÅçËæº„ÅøÁâà
results = factset_utils.insert_active_returns_optimized_sqlite(
    df_active_returns=df_active_returns,
    return_cols=return_cols,
    db_path=financials_db_path,
    benchmark_ticker=BLOOMBERG_UNIVERSE_TICKER,
    batch_size=10000,
    verbose=True,
)

‚úÖ „Ç§„É≥„Éá„ÉÉ„ÇØ„Çπ„Éá„Éº„Çø: 5,832‰ª∂
‚úÖ ÂÄãÂà•ÈäòÊüÑ„Éá„Éº„Çø: 19,076,472‰ª∂


Unnamed: 0,date,symbol,value,variable
19082301,2025-10-31,ZS-US,,Forward_Return_5Y_annlzd
19082302,2025-10-31,ZTS-US,,Forward_Return_5Y_annlzd
19082303,2025-10-31,ZURN-CH,,Forward_Return_5Y_annlzd


Unnamed: 0,date,symbol,value,variable
19076469,2025-08-29,ZURN-CH,,Forward_Active_Return_5Y_annlzd
19076470,2025-09-30,ZURN-CH,,Forward_Active_Return_5Y_annlzd
19076471,2025-10-31,ZURN-CH,,Forward_Active_Return_5Y_annlzd


‚ö° „Ç¢„ÇØ„ÉÜ„Ç£„Éñ„É™„Çø„Éº„É≥ÊúÄÈÅ©Âåñ„Éê„ÉÉ„ÉÅ‰øùÂ≠òÔºàSQLiteÊ©üËÉΩÊ¥ªÁî®ÁâàÔºâ
 ¬† Âá¶ÁêÜÂàóÊï∞: 24Âàó
 ¬† „Éá„Éº„ÇøË°åÊï∞: 19,076,472Ë°å
 ¬† „Éê„ÉÉ„ÉÅ„Çµ„Ç§„Ç∫ (executemany): 10,000Ë°å
‚è≥ „Éá„Éº„ÇøÂâçÂá¶ÁêÜ‰∏≠...
‚úÖ ÂâçÂá¶ÁêÜÂÆå‰∫Ü (69.37Áßí)
 ¬† Âá¶ÁêÜÂØæË±°: 24„ÉÜ„Éº„Éñ„É´


üíæ ‰øùÂ≠ò‰∏≠:   4%|‚ñç         | 1/24 [00:09<03:36,  9.43s/it]

‚úÖ Active_Return_1M: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:   8%|‚ñä         | 2/24 [00:18<03:26,  9.38s/it]

‚úÖ Active_Return_1M_annlzd: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  12%|‚ñà‚ñé        | 3/24 [00:27<03:14,  9.25s/it]

‚úÖ Forward_Active_Return_1M: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  17%|‚ñà‚ñã        | 4/24 [00:39<03:24, 10.20s/it]

‚úÖ Forward_Active_Return_1M_annlzd: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  21%|‚ñà‚ñà        | 5/24 [00:47<02:56,  9.29s/it]

‚úÖ Active_Return_3M: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  25%|‚ñà‚ñà‚ñå       | 6/24 [00:56<02:47,  9.30s/it]

‚úÖ Active_Return_3M_annlzd: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  29%|‚ñà‚ñà‚ñâ       | 7/24 [01:06<02:41,  9.47s/it]

‚úÖ Forward_Active_Return_3M: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  33%|‚ñà‚ñà‚ñà‚ñé      | 8/24 [01:19<02:47, 10.49s/it]

‚úÖ Forward_Active_Return_3M_annlzd: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  38%|‚ñà‚ñà‚ñà‚ñä      | 9/24 [01:27<02:25,  9.70s/it]

‚úÖ Active_Return_6M: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 10/24 [01:36<02:13,  9.56s/it]

‚úÖ Active_Return_6M_annlzd: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 11/24 [01:45<02:04,  9.59s/it]

‚úÖ Forward_Active_Return_6M: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 12/24 [02:00<02:12, 11.01s/it]

‚úÖ Forward_Active_Return_6M_annlzd: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 13/24 [02:09<01:56, 10.59s/it]

‚úÖ Active_Return_12M: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 14/24 [02:21<01:48, 10.90s/it]

‚úÖ Active_Return_12M_annlzd: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 15/24 [02:32<01:39, 11.10s/it]

‚úÖ Forward_Active_Return_12M: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 16/24 [02:46<01:33, 11.70s/it]

‚úÖ Forward_Active_Return_12M_annlzd: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 17/24 [02:54<01:14, 10.65s/it]

‚úÖ Active_Return_3Y: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 18/24 [03:04<01:02, 10.39s/it]

‚úÖ Active_Return_3Y_annlzd: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 19/24 [03:14<00:52, 10.42s/it]

‚úÖ Forward_Active_Return_3Y: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 20/24 [03:27<00:44, 11.23s/it]

‚úÖ Forward_Active_Return_3Y_annlzd: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 21/24 [03:36<00:31, 10.43s/it]

‚úÖ Active_Return_5Y: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 22/24 [03:46<00:20, 10.35s/it]

‚úÖ Active_Return_5Y_annlzd: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠:  96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 23/24 [03:57<00:10, 10.44s/it]

‚úÖ Forward_Active_Return_5Y: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ


üíæ ‰øùÂ≠ò‰∏≠: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 24/24 [04:10<00:00, 10.42s/it]

‚úÖ Forward_Active_Return_5Y_annlzd: 794,853‰ª∂ÔºàÊåøÂÖ•Ë©¶Ë°åÔºâ
üìä „Éê„ÉÉ„ÉÅ‰øùÂ≠òÂÆå‰∫ÜÁµ±Ë®à
 ¬† ÊàêÂäü: 24/24„ÉÜ„Éº„Éñ„É´
 ¬† Â§±Êïó: 0„ÉÜ„Éº„Éñ„É´
 ¬† Á∑èË©¶Ë°å‰ª∂Êï∞: 19,076,472‰ª∂
 ¬† ÂâçÂá¶ÁêÜÊôÇÈñì: 69.37Áßí
 ¬† ‰øùÂ≠òÊôÇÈñì: 250.06Áßí
 ¬† ÂêàË®àÊôÇÈñì: 319.43Áßí
 ¬† „Çπ„É´„Éº„Éó„ÉÉ„Éà (Ë©¶Ë°å‰ª∂Êï∞„Éô„Éº„Çπ): 59,720‰ª∂/Áßí
 ¬† ÊàêÂäüÁéá: 100.0%





### üß™PE ÂèñÂæó„ÉÜ„Çπ„Éà from BloombergÔºà‚ö†Ô∏è „Éá„Éº„ÇøÂèñÂæó„ÅØÈõ£„Åó„Åù„ÅÜÔºâ

-   Forward PE(BEST_PE_RATIO), Trailing PE(PE_RATIO), „Å® Forward EPS(BEST_EPS), Trailing EPS(TRAIL_12M_EPS_BEF_XO_ITEM)„ÇíÂèñÂæó

-   (- Forward PE „Å® Trailing PE „ÅÆÂ∑ÆÂàÜ„ÅÆ„Ç´„É©„É†„ÇíËøΩÂä†
-   PEG ratio = BEST_EPS / BEST_PE_RATIO „ÅÆ„Ç´„É©„É†„ÇíËøΩÂä†
    )


In [None]:
fields = ["BEST_PE_RATIO", "BEST_EPS", "PE_RATIO", "TRAIL_12M_EPS_BEF_XO_ITEM"]
# --------------------------------------------------------
# sedol„Å®date„ÅÆ„É™„Çπ„Éà„ÇíÂèñÂæó
# --------------------------------------------------------
with sqlite3.connect(factset_index_db_path) as conn:
    sedol_list = pd.read_sql(f"SELECT DISTINCT `SEDOL` FROM {UNIVERSE_CODE}", con=conn)[
        "SEDOL"
    ].tolist()
    sedol_list = [s + " Equity" for s in sedol_list]
    date_list = pd.read_sql(
        f"SELECT DISTINCT `date` FROM {UNIVERSE_CODE} ORDER BY `date`",
        con=conn,
        parse_dates=["date"],
    )["date"].tolist()
print("SEDOL„Å®Êó•‰ªò„ÅÆ„É™„Çπ„ÉàÂèñÂæóÂÆå‰∫Ü")

blp = bloomberg_utils.BlpapiCustom()
for field in fields:
    df = (
        blp.get_historical_data_with_overrides(
            securities=sedol_list,
            id_type="sedol",
            fields=[field],
            start_date=min(date_list).strftime("%Y%m%d"),
            end_date=max(date_list).strftime("%Y%m%d"),
            periodicity="MONTHLY",
            # overrides={"BEST_FPERIOD_OVERRIDE": "1FQ"},   # „Éá„Éï„Ç©„É´„Éà„Åß1FQ
            verbose=True,
        )
        .sort_values(["Date", "Identifier"], ignore_index=True)
        .drop(columns=["ID_Type"])
        .assign(Date=lambda x: pd.to_datetime(x["Date"]))
    )

    df = (
        pd.melt(
            df,
            id_vars=["Date", "Identifier"],
            value_vars=[field],
            var_name="variable",
        )
        .rename(columns={"Identifier": "SEDOL"})
        .assign(SEDOL=lambda x: x["SEDOL"].str.replace(" Equity", ""))
    )

    # blp.store_to_database(
    #     df=df,
    #     db_path=bloomberg_valuation_db_path,
    #     table_name=field,
    #     primary_keys=["Date", "SEDOL", "variable"],
    # )

SEDOL„Å®Êó•‰ªò„ÅÆ„É™„Çπ„ÉàÂèñÂæóÂÆå‰∫Ü
Bloomberg„Çª„ÉÉ„Ç∑„Éß„É≥„ÇíÈñãÂßã„Åó„Å¶„ÅÑ„Åæ„Åô...
„Çª„ÉÉ„Ç∑„Éß„É≥ÈñãÂßãÊàêÂäü„ÄÇ
‚úÖ „Çµ„Éº„Éì„Çπ„Ç™„Éº„Éó„É≥ÂÆå‰∫Ü„ÄÇ„É™„ÇØ„Ç®„Çπ„Éà‰ΩúÊàê‰∏≠...
üì° „É™„ÇØ„Ç®„Çπ„Éà„ÇíÈÄÅ‰ø°„Åó„Åæ„Åô[SEDOL] [MONTHLY]
   ÊúüÈñì: 2000-01-31 - 2025-10-31

‚úÖ „Éá„Éº„ÇøÂèñÂæóÂÆå‰∫Ü„ÄÇÊé•Á∂ö„ÇíÁµÇ‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ

üìä ÂèñÂæó„Éá„Éº„Çø:
   Ë°åÊï∞: 21Ë°å
   Êó•‰ªòÁØÑÂõ≤: 2023-11-30 00:00:00 ~ 2025-07-31 00:00:00
   „É¶„Éã„Éº„ÇØÊó•Êï∞: 21Êó•
   Ë≠òÂà•Â≠êÊï∞: 1
   Ë≠òÂà•Â≠ê„Çø„Ç§„Éó: SEDOL
   Âë®Êúü: MONTHLY
Bloomberg„Çª„ÉÉ„Ç∑„Éß„É≥„ÇíÈñãÂßã„Åó„Å¶„ÅÑ„Åæ„Åô...
„Çª„ÉÉ„Ç∑„Éß„É≥ÈñãÂßãÊàêÂäü„ÄÇ
‚úÖ „Çµ„Éº„Éì„Çπ„Ç™„Éº„Éó„É≥ÂÆå‰∫Ü„ÄÇ„É™„ÇØ„Ç®„Çπ„Éà‰ΩúÊàê‰∏≠...
üì° „É™„ÇØ„Ç®„Çπ„Éà„ÇíÈÄÅ‰ø°„Åó„Åæ„Åô[SEDOL] [MONTHLY]
   ÊúüÈñì: 2000-01-31 - 2025-10-31

‚úÖ „Éá„Éº„ÇøÂèñÂæóÂÆå‰∫Ü„ÄÇÊé•Á∂ö„ÇíÁµÇ‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ

üìä ÂèñÂæó„Éá„Éº„Çø:
   Ë°åÊï∞: 13Ë°å
   Êó•‰ªòÁØÑÂõ≤: 2023-11-30 00:00:00 ~ 2025-09-30 00:00:00
   „É¶„Éã„Éº„ÇØÊó•Êï∞: 13Êó•
   Ë≠òÂà•Â≠êÊï∞: 1
   Ë≠òÂà•Â≠ê„Çø„Ç§„Éó

‚úÖ „Éá„Éº„Çø„Éô„Éº„Çπ„ÉÅ„Çß„ÉÉ„ÇØ


In [None]:
with sqlite3.connect(bloomberg_valuation_db_path) as conn:
    df = pd.read_sql("SELECT * FROM BEST_PE_RATIO", con=conn, parse_dates=["Date"])
    display(df["Date"].sort_values().unique())
    display(df.sort_values("Date", ignore_index=True).drop_duplicates())

<DatetimeArray>
['2000-01-31 00:00:00', '2000-02-29 00:00:00', '2000-03-31 00:00:00',
 '2000-04-28 00:00:00', '2000-05-31 00:00:00', '2000-06-30 00:00:00',
 '2000-07-31 00:00:00', '2000-08-31 00:00:00', '2000-09-29 00:00:00',
 '2000-10-31 00:00:00',
 ...
 '2025-04-30 00:00:00', '2025-05-29 00:00:00', '2025-05-30 00:00:00',
 '2025-06-30 00:00:00', '2025-07-31 00:00:00', '2025-08-29 00:00:00',
 '2025-08-31 00:00:00', '2025-09-30 00:00:00', '2025-10-30 00:00:00',
 '2025-10-31 00:00:00']
Length: 334, dtype: datetime64[ns]

Unnamed: 0,Date,SEDOL,variable,value
0,2000-01-31,2206301,BEST_PE_RATIO,11.744
1,2000-01-31,2213981,BEST_PE_RATIO,10.710
2,2000-02-29,2206301,BEST_PE_RATIO,13.396
3,2000-02-29,2213981,BEST_PE_RATIO,10.454
4,2000-03-31,2206301,BEST_PE_RATIO,14.650
...,...,...,...,...
61076,2025-10-31,2852533,BEST_PE_RATIO,26.723
61077,2025-10-31,2849472,BEST_PE_RATIO,13.087
61078,2025-10-31,2842255,BEST_PE_RATIO,33.487
61079,2025-10-31,2838555,BEST_PE_RATIO,22.003


‚úÖ Ê¨†ÊêçÂâ≤Âêà„ÉÅ„Çß„ÉÉ„ÇØ


In [None]:
df_weight = factset_utils.load_index_constituents(
    factset_index_db_path=factset_index_db_path, UNIVERSE_CODE=UNIVERSE_CODE
)

# bloomberg per
with sqlite3.connect(bloomberg_valuation_db_path) as conn:
    df_forward_pe = pd.read_sql(
        "SELECT * FROM BEST_PE_RATIO", parse_dates=["Date"], con=conn
    ).rename(columns={"value": "BEST_PE_RATIO", "Date": "date"})
    df_actual_pe = pd.read_sql(
        "SELECT * FROM PE_RATIO", parse_dates=["Date"], con=conn
    ).rename(columns={"value": "PE_RATIO", "Date": "date"})
    df_actual_pe.drop(columns=["variable"], inplace=True)
    df_pe = pd.merge(
        df_forward_pe, df_actual_pe, on=["date", "SEDOL"], how="outer"
    ).assign(date=lambda x: x["date"] + pd.offsets.MonthEnd(0))

df_merged = pd.merge(df_weight, df_pe, on=["date", "SEDOL"], how="outer").dropna(
    subset=["Weight (%)", "BEST_PE_RATIO", "PE_RATIO"], how="any", ignore_index=True
)

display(df_merged.tail(5))

Unnamed: 0,date,P_SYMBOL,SEDOL,FG_COMPANY_NAME,GICS Sector,GICS Industry Group,Weight (%),variable,BEST_PE_RATIO,PE_RATIO
39444,2025-10-31,SBAC-US,BZ6TS23,SBA„Ç≥„Éü„É•„Éã„Ç±„Éº„Ç∑„Éß„É≥„Ç∫ Class A,Real Estate,Equity Real Estate Investment Trusts (REITs),0.026502,BEST_PE_RATIO,22.317,20.739
39445,2025-10-31,CHTR-US,BZ6VT82,„ÉÅ„É£„Éº„Çø„Éº„Éª„Ç≥„Éü„É•„Éã„Ç±„Éº„Ç∑„Éß„É≥„Ç∫ Class A,Communication Services,Media & Entertainment,0.026749,BEST_PE_RATIO,5.755,5.8559
39446,2025-10-31,WTC-AU,BZ8GX83,„ÉØ„Ç§„Ç∫„ÉÜ„ÉÉ„ÇØ„Éª„Ç∞„É≠„Éº„Éê„É´,Information Technology,Software & Services,0.011702,BEST_PE_RATIO,57.203,74.8462
39447,2025-10-31,VST-US,BZ8VJQ8,„Éì„Çπ„Éà„É©,Utilities,Utilities,0.082303,BEST_PE_RATIO,21.327,39.5135
39448,2025-10-31,BCP-PT,BZCNN35,Banco Comercial Portugues S.A.,Financials,Banks,0.009457,BEST_PE_RATIO,10.628,11.8584


In [None]:
# Ê¨†Êêç
g = pd.DataFrame(df_merged.groupby(["date"])["Weight (%)"].agg("sum")).reset_index()
display(g.query("date>='2023-01-01'"))

Unnamed: 0,date,Weight (%)
271,2023-01-31,54.865295
272,2023-02-28,58.768044
273,2023-03-31,61.187943
274,2023-04-30,61.089016
275,2023-05-31,62.986537
276,2023-06-30,63.706548
277,2023-07-31,64.796422
278,2023-08-31,66.780949
279,2023-09-30,67.820945
280,2023-10-31,67.305035


#### Bloomberg valuation „Éá„Éº„Çø„Å´„Å§„ÅÑ„Å¶„É°„É¢

-   Áõ¥Ëøë 1,2 Âπ¥ÂàÜÁ®ãÂ∫¶„Åó„Åã forward+actual pe „ÅØÂèñ„Çå„Å™„ÅÑ


## 4. Factor Ë®àÁÆó


### 4-1. ÊàêÈï∑ÁéáË®àÁÆóÔºàFactsetÔºâ


#### 1. QoQ, YoY, 3Yr CAGR, 5Yr CAGR „ÅÆÂÄ§„ÇíË®àÁÆó„Åó„ÄÅ„Éá„Éº„Çø„Éô„Éº„Çπ„Å´‰øùÂ≠ò„Åô„Çã„ÄÇ


In [None]:
factor_list = [
    "FF_SALES",
    "FF_EBITDA_OPER",
    "FF_EBIT_OPER",
    "FF_EPS",
    "FF_OPER_CF",
    "FF_ASSETS",
    "FF_COM_EQ",
    "FF_DEBT",
    "FF_DEBT_LT",
    "FF_DEBT_ST",
    "FF_OPER_INC",
    "FF_CAPEX",
    "FF_FREE_CF",
    "FF_EPS_DIL",
]
period_list = ["QoQ", "YoY", "CAGR_3Y", "CAGR_5Y"]

In [None]:
query = [f"SELECT * FROM `{table}`" for table in factor_list]
query = " UNION ALL ".join(query)

# ------------------------------------------------------
# load from database
# ------------------------------------------------------
with sqlite3.connect(financials_db_path) as conn:
    df_all = (
        pd.read_sql(query, con=conn, parse_dates=["date"])
        .sort_values("date", ignore_index=True)
        .assign(
            variable=lambda x: x["variable"].astype("category"),
            P_SYMBOL=lambda x: x["P_SYMBOL"].astype("category"),
        )
    ).sort_values(["variable", "P_SYMBOL", "date"], ignore_index=True)

display(df_all)
# groupby("variable")„Çí‰Ωø„ÅÜ„Åì„Å®„Åß„ÄÅdf_allÂÖ®‰Ωì„Çí‰ΩïÂ∫¶„ÇÇËµ∞Êüª(loc)„Åô„Çã„Ç≥„Çπ„Éà„Çí„Çº„É≠„Å´„Åô„Çã
# observed=True „ÅØcategoryÂûã„Çí‰Ωø„ÅÜÂ†¥Âêà„ÅÆÈ´òÈÄüÂåñ„Ç™„Éó„Ç∑„Éß„É≥
grouped = df_all.groupby("variable", observed=True)

# ------------------------------------------------------
# calculate growth and store to database
# ------------------------------------------------------
total_steps = len(factor_list)
for factor_name, df_factor in tqdm(grouped, total=total_steps, desc="Factors"):
    # df_factor„ÅØView(ÂèÇÁÖß)„ÅÆÂèØËÉΩÊÄß„Åå„ÅÇ„Çã„Åü„ÇÅ„ÄÅË®àÁÆóÁî®„Å´„Ç≥„Éî„Éº„Çí‰ΩúÊàê
    # „Åì„Åì„Åß„É°„É¢„É™„ÇíÈ£ü„ÅÜ„Åå„ÄÅfactorÂçò‰Ωç„Å™„ÅÆ„ÅßÂÖ®‰Ωì„Ç≥„Éî„Éº„Çà„Çä„ÅØËªΩ„ÅÑ
    # „Åã„Å§„ÄÅ„É´„Éº„Éó„ÅÆÊúÄÂæå„ÅßËß£Êîæ„Åï„Çå„Çã
    df_base = df_factor.copy()

    for growth in period_list:
        new_variable_name = f"{factor_name}_{growth}"
        df_result = roic_utils.calculate_growth(
            df=df_base, data_name=str(factor_name), growth_type=growth
        )

        # store to database
        db_utils.delete_table_from_database(
            db_path=financials_db_path, table_name=new_variable_name
        )
        factset_utils.store_to_database(
            df=df_result,
            db_path=financials_db_path,
            table_name=new_variable_name,
            verbose=False,
        )

    # ---- „É°„É¢„É™ÁÆ°ÁêÜ ----
    # ‰∏Ä„Å§„ÅÆ„Éï„Ç°„ÇØ„Çø„ÉºÂá¶ÁêÜ„ÅåÁµÇ„Çè„Å£„Åü„Çâ„ÄÅ‰ΩøÁî®„Åó„Åü‰∏ÄÊôÇÂ§âÊï∞„ÇíÂâäÈô§„Åó„Å¶GCÂÆüË°å
    del df_base
    gc.collect()

Unnamed: 0,date,P_SYMBOL,value,variable
0,2005-08-31,0HSW-GB,2448.485005,FF_ASSETS
1,2005-09-30,0HSW-GB,2333.443621,FF_ASSETS
2,2005-10-31,0HSW-GB,2333.443621,FF_ASSETS
3,2005-11-30,0HSW-GB,2333.443621,FF_ASSETS
4,2005-12-30,0HSW-GB,,FF_ASSETS
...,...,...,...,...
11077677,2025-06-30,ZURN-CH,37031.999926,FF_SALES
11077678,2025-07-31,ZURN-CH,37031.999926,FF_SALES
11077679,2025-08-29,ZURN-CH,37031.999926,FF_SALES
11077680,2025-09-30,ZURN-CH,37031.999926,FF_SALES


Factors: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 14/14 [05:52<00:00, 25.15s/it]


#### 2. „Éï„Ç°„ÇØ„Çø„Éº„ÅÆ„É©„É≥„ÇØ„ÇíË®àÁÆó„Åó„ÄÅ„Éá„Éº„Çø„Éô„Éº„Çπ„Å´‰øùÂ≠ò„Åô„Çã„ÄÇ


In [None]:
# -----------------------------------
# load data
# -----------------------------------
# ÊßãÊàêÈäòÊüÑÊÉÖÂ†±
df_weight = factset_utils.load_index_constituents(
    factset_index_db_path=factset_index_db_path, UNIVERSE_CODE=UNIVERSE_CODE
).assign(date=lambda x: pd.to_datetime(x["date"]))
factset_utils.process_rank_calculation_store_to_db(
    df_weight=df_weight,
    factor_list=factor_list,
    financials_db_path=financials_db_path,
    period_list=period_list,
)

üöÄ Âá¶ÁêÜÈñãÂßã: 56 ‰ª∂„ÅÆ„Çø„Çπ„ÇØ


RankË®àÁÆóÈÄ≤Êçó: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 56/56 [06:31<00:00,  7.00s/it]


üéâ ÂÖ®„Å¶„ÅÆ„É©„É≥„ÇØË®àÁÆó„Éª‰øùÂ≠ò„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü


In [None]:
table_names = db_utils.get_table_names(financials_db_path)
display([s for s in table_names if ("QoQ" in s) or ("YoY" in s) or ("CAGR" in s)])

with sqlite3.connect(financials_db_path) as conn:
    df = pd.read_sql(
        "SELECT * FROM FF_OPER_INC_QoQ_PctRank ORDER BY 'date'",
        con=conn,
        parse_dates=["date"],
    )
    display(df.head())

['FF_EBITDA_OPER_MGN_CHANGE_QoQ',
 'FF_EBITDA_OPER_MGN_CHANGE_YoY',
 'FF_EBIT_OPER_MGN_CHANGE_QoQ',
 'FF_EBIT_OPER_MGN_CHANGE_YoY',
 'FF_NET_MGN_CHANGE_QoQ',
 'FF_NET_MGN_CHANGE_YoY',
 'FF_OPER_MGN_CHANGE_QoQ',
 'FF_OPER_MGN_CHANGE_YoY',
 'FF_PTX_MGN_CHANGE_QoQ',
 'FF_PTX_MGN_CHANGE_YoY',
 'FF_GROSS_MGN_CHANGE_QoQ',
 'FF_GROSS_MGN_CHANGE_YoY',
 'FF_ROA_CHANGE_QoQ',
 'FF_ROA_CHANGE_YoY',
 'FF_ROE_CHANGE_QoQ',
 'FF_ROE_CHANGE_YoY',
 'FF_ROIC_CHANGE_QoQ',
 'FF_ROIC_CHANGE_YoY',
 'FF_ROTC_CHANGE_QoQ',
 'FF_ROTC_CHANGE_YoY',
 'FF_ASSETS_QoQ',
 'FF_ASSETS_YoY',
 'FF_ASSETS_CAGR_3Y',
 'FF_ASSETS_CAGR_5Y',
 'FF_CAPEX_QoQ',
 'FF_CAPEX_YoY',
 'FF_CAPEX_CAGR_3Y',
 'FF_CAPEX_CAGR_5Y',
 'FF_COM_EQ_QoQ',
 'FF_COM_EQ_YoY',
 'FF_COM_EQ_CAGR_3Y',
 'FF_COM_EQ_CAGR_5Y',
 'FF_DEBT_QoQ',
 'FF_DEBT_YoY',
 'FF_DEBT_CAGR_3Y',
 'FF_DEBT_CAGR_5Y',
 'FF_DEBT_LT_QoQ',
 'FF_DEBT_LT_YoY',
 'FF_DEBT_LT_CAGR_3Y',
 'FF_DEBT_LT_CAGR_5Y',
 'FF_DEBT_ST_QoQ',
 'FF_DEBT_ST_YoY',
 'FF_DEBT_ST_CAGR_3Y',
 'FF_DEBT_ST_CAGR_5Y'

Unnamed: 0,date,P_SYMBOL,variable,value
0,2005-11-30,0HSW-GB,FF_OPER_INC_QoQ_PctRank,0.066176
1,2005-11-30,0II3.XX1-GB,FF_OPER_INC_QoQ_PctRank,0.78481
2,2005-11-30,0MDJ-GB,FF_OPER_INC_QoQ_PctRank,0.426966
3,2005-11-30,0N1N-GB,FF_OPER_INC_QoQ_PctRank,0.475124
4,2005-11-30,0N3I-GB,FF_OPER_INC_QoQ_PctRank,0.343284


### 4-2. „Éû„Éº„Ç∏„É≥ÊîπÂñÑÁéá


#### 1. QoQ, YoY, 3Yr, 5Yr „ÅÆÂ§âÂåñÂÄ§ÔºàdeltaÔºâ„ÇíË®àÁÆó„Åó„ÄÅ„Éá„Éº„Çø„Éô„Éº„Çπ„Å´‰øùÂ≠ò„Åô„Çã„ÄÇ


In [None]:
factor_list = [
    "FF_EBITDA_OPER_MGN",
    "FF_EBIT_OPER_MGN",
    "FF_NET_MGN",
    "FF_OPER_MGN",
    "FF_PTX_MGN",
    "FF_GROSS_MGN",
    "FF_ROA",
    "FF_ROE",
    "FF_ROIC",
    "FF_ROTC",
]

period_list = ["CHANGE_QoQ", "CHANGE_YoY", "CHANGE_3Y", "CHANGE_5Y"]

query = [
    f"SELECT `date`, `P_SYMBOL`, `variable`, `value` FROM `{table}`"
    for table in factor_list
]
query = " UNION ALL ".join(query)

In [None]:
# ------------------------------------------------------
# load from database
# ------------------------------------------------------
with sqlite3.connect(financials_db_path) as conn:
    df_all = (
        pd.read_sql(query, con=conn, parse_dates=["date"])
        .sort_values("date", ignore_index=True)
        .assign(
            variable=lambda x: x["variable"].astype("category"),
            P_SYMBOL=lambda x: x["P_SYMBOL"].astype("category"),
        )
    ).sort_values(["variable", "P_SYMBOL", "date"], ignore_index=True)

display(df_all)
# groupby("variable")„Çí‰Ωø„ÅÜ„Åì„Å®„Åß„ÄÅdf_allÂÖ®‰Ωì„Çí‰ΩïÂ∫¶„ÇÇËµ∞Êüª(loc)„Åô„Çã„Ç≥„Çπ„Éà„Çí„Çº„É≠„Å´„Åô„Çã
# observed=True „ÅØcategoryÂûã„Çí‰Ωø„ÅÜÂ†¥Âêà„ÅÆÈ´òÈÄüÂåñ„Ç™„Éó„Ç∑„Éß„É≥
grouped = df_all.groupby("variable", observed=True)

# ------------------------------------------------------
# calculate growth and store to database
# ------------------------------------------------------
total_steps = len(factor_list)
for factor_name, df_factor in tqdm(grouped, total=total_steps, desc="Factors"):
    # df_factor„ÅØView(ÂèÇÁÖß)„ÅÆÂèØËÉΩÊÄß„Åå„ÅÇ„Çã„Åü„ÇÅ„ÄÅË®àÁÆóÁî®„Å´„Ç≥„Éî„Éº„Çí‰ΩúÊàê
    # „Åì„Åì„Åß„É°„É¢„É™„ÇíÈ£ü„ÅÜ„Åå„ÄÅfactorÂçò‰Ωç„Å™„ÅÆ„ÅßÂÖ®‰Ωì„Ç≥„Éî„Éº„Çà„Çä„ÅØËªΩ„ÅÑ
    # „Åã„Å§„ÄÅ„É´„Éº„Éó„ÅÆÊúÄÂæå„ÅßËß£Êîæ„Åï„Çå„Çã
    df_base = df_factor.copy()

    for growth in period_list:
        new_variable_name = f"{factor_name}_{growth}"
        df_result = roic_utils.calculate_margin_improvement(
            df=df_base, data_name=str(factor_name), growth_type=growth
        )

        # store to database
        db_utils.delete_table_from_database(
            db_path=financials_db_path, table_name=new_variable_name
        )
        factset_utils.store_to_database(
            df=df_result,
            db_path=financials_db_path,
            table_name=new_variable_name,
            verbose=False,
        )

    # ---- „É°„É¢„É™ÁÆ°ÁêÜ ----
    # ‰∏Ä„Å§„ÅÆ„Éï„Ç°„ÇØ„Çø„ÉºÂá¶ÁêÜ„ÅåÁµÇ„Çè„Å£„Åü„Çâ„ÄÅ‰ΩøÁî®„Åó„Åü‰∏ÄÊôÇÂ§âÊï∞„ÇíÂâäÈô§„Åó„Å¶GCÂÆüË°å
    del df_base
    gc.collect()

Unnamed: 0,date,P_SYMBOL,value,variable
0,2005-08-31,0HSW-GB,9.473684,FF_EBITDA_OPER_MGN
1,2005-09-30,0HSW-GB,4.166667,FF_EBITDA_OPER_MGN
2,2005-10-31,0HSW-GB,4.166667,FF_EBITDA_OPER_MGN
3,2005-11-30,0HSW-GB,4.166667,FF_EBITDA_OPER_MGN
4,2005-12-30,0HSW-GB,,FF_EBITDA_OPER_MGN
...,...,...,...,...
7912625,2025-06-30,ZURN-CH,21.581542,FF_ROTC
7912626,2025-07-31,ZURN-CH,21.581542,FF_ROTC
7912627,2025-08-29,ZURN-CH,21.581542,FF_ROTC
7912628,2025-09-30,ZURN-CH,21.581542,FF_ROTC


Factors: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10/10 [04:27<00:00, 26.74s/it]


#### 2. „Éï„Ç°„ÇØ„Çø„Éº„ÅÆ„É©„É≥„ÇØ„ÇíË®àÁÆó„Åó„ÄÅ„Éá„Éº„Çø„Éô„Éº„Çπ„Å´‰øùÂ≠ò„Åô„Çã


In [None]:
# -----------------------------------
# load data
# -----------------------------------
# ÊßãÊàêÈäòÊüÑÊÉÖÂ†±
df_weight = factset_utils.load_index_constituents(
    factset_index_db_path=factset_index_db_path, UNIVERSE_CODE=UNIVERSE_CODE
).assign(date=lambda x: pd.to_datetime(x["date"]))
factset_utils.process_rank_calculation_store_to_db(
    df_weight=df_weight,
    factor_list=factor_list,
    financials_db_path=financials_db_path,
    period_list=period_list,
)

üöÄ Âá¶ÁêÜÈñãÂßã: 40 ‰ª∂„ÅÆ„Çø„Çπ„ÇØ


RankË®àÁÆóÈÄ≤Êçó: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 40/40 [05:10<00:00,  7.75s/it]


üéâ ÂÖ®„Å¶„ÅÆ„É©„É≥„ÇØË®àÁÆó„Éª‰øùÂ≠ò„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü


### 4-3. Composite Growth Factor


#### 1. „Éá„Éº„Çø„É≠„Éº„Éâ&Ê¨†ÊêçÂÄ§Á¢∫Ë™ç -> Ê¨†ÊêçÂÄ§„ÅØ„Çª„ÇØ„Çø„Éº‰∏≠Â§ÆÂÄ§„ÅßË£úÂÆå


In [None]:
# growth„Éï„Ç°„ÇØ„Çø„ÉºË®àÁÆó„ÅÆ„Åü„ÇÅ„ÅÆ„Éá„Ç£„Çπ„ÇØ„É™„Éó„Çø„Éº„ÇíÊåáÂÆö
factor_list = [
    "FF_SALES_CAGR_3Y_PctRank",
    "FF_OPER_INC_CAGR_3Y_PctRank",
    "FF_OPER_MGN_CHANGE_3Y_PctRank",
    "FF_GROSS_MGN_CHANGE_3Y_PctRank",
    "FF_NET_MGN_CHANGE_3Y_PctRank",
]

# -----------------------------------
# load data
# -----------------------------------

df_factor = factset_utils.load_financial_data(
    financials_db_path=financials_db_path, factor_list=factor_list
)

df_weight = factset_utils.load_index_constituents(
    factset_index_db_path=factset_index_db_path, UNIVERSE_CODE=UNIVERSE_CODE
)
df = (
    factset_utils.merge_idx_constituents_and_financials(
        df_weight=df_weight, df_factor=df_factor
    )
    .fillna(np.nan)
    .dropna(subset=factor_list, how="all")
    .dropna(subset=["Weight (%)"], ignore_index=True)
)
display(df.head())

factset_utils.check_missing_value_and_fill_by_sector_median(
    df=df, factor_list=factor_list
)

# -----------------------------------
# „Éá„Éº„ÇøÁ¢∫Ë™ç
# -----------------------------------
display(df)
g = pd.DataFrame(df.groupby(["date"])["Weight (%)"].agg("sum"))
g_count = pd.DataFrame(df.groupby(["date"])["SEDOL"].count())
g_merged = pd.merge(g, g_count, left_index=True, right_index=True)

display(g_merged)

Unnamed: 0,date,P_SYMBOL,SEDOL,Asset ID,FG_COMPANY_NAME,GICS Sector,GICS Industry Group,Weight (%),FF_GROSS_MGN_CHANGE_3Y_PctRank,FF_NET_MGN_CHANGE_3Y_PctRank,FF_OPER_INC_CAGR_3Y_PctRank,FF_OPER_MGN_CHANGE_3Y_PctRank,FF_SALES_CAGR_3Y_PctRank
0,2008-08-31,0MDJ-GB,B2PF6M7,UKIBBF1,Cadbury PLC,Consumer Staples,Food Beverage & Tobacco,0.070948,,0.337079,0.034483,0.114943,0.072165
1,2008-08-31,0N3I-GB,0896265,UKIDBM1,Tomkins PLC,Industrials,Capital Goods,0.010869,0.351145,0.010929,0.408284,0.136095,0.79397
2,2008-08-31,0P7J-GB,0028262,UKIAPY1,Amec Foster Wheeler plc,Energy,Energy,0.023192,0.644444,0.954545,0.527473,0.956044,0.019608
3,2008-08-31,1-HK,6190273,HKGAAE1,CK„Éè„ÉÅ„ÇΩ„É≥„Éª„Éõ„Éº„É´„Éá„Ç£„É≥„Ç∞„Çπ,Financials,Real Estate,0.090836,0.235955,0.117886,0.875,0.470852,0.92517
4,2008-08-31,10-HK,6408352,HKGAGG1,Hang Lung Group Limited,Financials,Real Estate,0.016922,0.58427,0.743902,,,0.112245


üìã Ê¨†ÊêçÂÄ§„ÅÆÁä∂Ê≥ÅÔºàË£úÂÆåÂâçÔºâ
FF_SALES_CAGR_3Y_PctRank                     :    365‰ª∂ (  0.1%)
FF_OPER_INC_CAGR_3Y_PctRank                  : 29,077‰ª∂ ( 11.2%)
FF_OPER_MGN_CHANGE_3Y_PctRank                : 29,264‰ª∂ ( 11.2%)
FF_GROSS_MGN_CHANGE_3Y_PctRank               : 47,146‰ª∂ ( 18.1%)
FF_NET_MGN_CHANGE_3Y_PctRank                 : 48,076‰ª∂ ( 18.5%)

‚è≥ „Çª„ÇØ„Çø„Éº‰∏≠Â§ÆÂÄ§„ÅßË£úÂÆå‰∏≠...

üìã Ê¨†ÊêçÂÄ§„ÅÆÁä∂Ê≥ÅÔºà„Çª„ÇØ„Çø„Éº‰∏≠Â§ÆÂÄ§Ë£úÂÆåÂæåÔºâ
‚úÖ FF_SALES_CAGR_3Y_PctRank                     :      0‰ª∂ (  0.0%) | Ë£úÂÆå: 365‰ª∂
‚úÖ FF_OPER_INC_CAGR_3Y_PctRank                  :      0‰ª∂ (  0.0%) | Ë£úÂÆå: 29,077‰ª∂
‚úÖ FF_OPER_MGN_CHANGE_3Y_PctRank                :      0‰ª∂ (  0.0%) | Ë£úÂÆå: 29,264‰ª∂
‚úÖ FF_GROSS_MGN_CHANGE_3Y_PctRank               :      0‰ª∂ (  0.0%) | Ë£úÂÆå: 47,146‰ª∂
‚úÖ FF_NET_MGN_CHANGE_3Y_PctRank                 :      0‰ª∂ (  0.0%) | Ë£úÂÆå: 48,076‰ª∂

‚úÖ ÊúÄÁµÇÊ¨†ÊêçÂÄ§„ÉÅ„Çß„ÉÉ„ÇØ
‚úÖ FF_SALES_CAGR_3Y_PctRank                    

Unnamed: 0,date,P_SYMBOL,SEDOL,Asset ID,FG_COMPANY_NAME,GICS Sector,GICS Industry Group,Weight (%),FF_GROSS_MGN_CHANGE_3Y_PctRank,FF_NET_MGN_CHANGE_3Y_PctRank,FF_OPER_INC_CAGR_3Y_PctRank,FF_OPER_MGN_CHANGE_3Y_PctRank,FF_SALES_CAGR_3Y_PctRank
0,2008-08-31,0MDJ-GB,B2PF6M7,UKIBBF1,Cadbury PLC,Consumer Staples,Food Beverage & Tobacco,0.070948,0.505814,0.337079,0.034483,0.114943,0.072165
1,2008-08-31,0N3I-GB,0896265,UKIDBM1,Tomkins PLC,Industrials,Capital Goods,0.010869,0.351145,0.010929,0.408284,0.136095,0.793970
2,2008-08-31,0P7J-GB,0028262,UKIAPY1,Amec Foster Wheeler plc,Energy,Energy,0.023192,0.644444,0.954545,0.527473,0.956044,0.019608
3,2008-08-31,1-HK,6190273,HKGAAE1,CK„Éè„ÉÅ„ÇΩ„É≥„Éª„Éõ„Éº„É´„Éá„Ç£„É≥„Ç∞„Çπ,Financials,Real Estate,0.090836,0.235955,0.117886,0.875000,0.470852,0.925170
4,2008-08-31,10-HK,6408352,HKGAGG1,Hang Lung Group Limited,Financials,Real Estate,0.016922,0.584270,0.743902,0.502232,0.502242,0.112245
...,...,...,...,...,...,...,...,...,...,...,...,...,...
260260,2025-10-31,ZAL-DE,BQV0SV7,GERZQZ1,„Ç∂„É©„É≥„Éâ,Consumer Discretionary,Consumer Discretionary Distribution & Retail,0.008099,0.524390,0.507937,0.507042,0.507042,0.755814
260261,2025-10-31,ZBH-US,2783815,USA4JT1,„Ç∏„É≥„Éû„Éº„Éª„Éê„Ç§„Ç™„É°„ÉÉ„Éà„Éª„Éõ„Éº„É´„Éá„Ç£„É≥„Ç∞„Çπ,Health Care,Health Care Equipment & Services,0.025627,0.595506,0.511905,0.510870,0.510870,0.438776
260262,2025-10-31,ZBRA-US,2989356,USAP8H1,„Çº„Éñ„É©„Éª„ÉÜ„ÇØ„Éé„É≠„Ç∏„Éº„Ç∫„Éª„Ç≥„Éº„Éù„É¨„Éº„Ç∑„Éß„É≥ Class A,Information Technology,Technology Hardware & Equipment,0.017637,0.819277,0.136364,0.366197,0.633803,0.190476
260263,2025-10-31,ZTS-US,B95WG16,USBANZ1,„Çæ„Ç®„ÉÜ„Ç£„Çπ Class A,Health Care,Pharmaceuticals Biotechnology & Life Sciences,0.082630,0.707865,0.666667,0.597826,0.608696,0.459184


Unnamed: 0_level_0,Weight (%),SEDOL
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2008-08-31,96.671657,1309
2008-09-30,96.652476,1305
2008-10-31,96.929219,1305
2008-11-30,96.983162,1277
2008-12-31,96.490301,1272
...,...,...
2025-06-30,99.285484,1123
2025-07-31,99.335974,1122
2025-08-31,99.333187,1123
2025-09-30,98.696542,1096


#### 2. Composite Growth Factor Ë®àÁÆó


In [None]:
# -----------------------------------
# Composite Growth Factor„ÅÆË®àÁÆó
# -----------------------------------
# „Ç¶„Çß„Ç§„ÉàË®≠ÂÆö
blend_weight = {
    "FF_SALES_CAGR_3Y_PctRank": 0.20,
    "FF_OPER_INC_CAGR_3Y_PctRank": 0.20,
    "FF_OPER_MGN_CHANGE_3Y_PctRank": 0.20,
    "FF_GROSS_MGN_CHANGE_3Y_PctRank": 0.20,
    "FF_NET_MGN_CHANGE_3Y_PctRank": 0.20,
}
factor_name = "Factor_Composite_Growth"
df = factset_utils.create_factor(
    df=df, factor_name=factor_name, blend_weight=blend_weight
)
# -----------------------------------
# Store to database
# -----------------------------------
for variable in ["Factor_Composite_Growth_Score", "Factor_Composite_Growth_Score_Rank"]:
    df_slice = (
        df[["date", "P_SYMBOL", variable]]
        .assign(variable=variable, date=lambda x: pd.to_datetime(x["date"]))
        .rename(columns={variable: "value"})
    )
    db_utils.delete_table_from_database(db_path=financials_db_path, table_name=variable)
    factset_utils.store_to_database(
        df=df_slice,
        db_path=financials_db_path,
        table_name=variable,
        verbose=True,
    )


üìä Composite Growth Factor Ë®àÁÆó
„Ç¶„Çß„Ç§„ÉàË®≠ÂÆö:
  FF_SALES_CAGR_3Y_PctRank                     : 20.0%
  FF_OPER_INC_CAGR_3Y_PctRank                  : 20.0%
  FF_OPER_MGN_CHANGE_3Y_PctRank                : 20.0%
  FF_GROSS_MGN_CHANGE_3Y_PctRank               : 20.0%
  FF_NET_MGN_CHANGE_3Y_PctRank                 : 20.0%

‚úÖ Composite Growth Score Ë®àÁÆóÂÆå‰∫Ü
   Âπ≥Âùá: 0.5049
   Ê®ôÊ∫ñÂÅèÂ∑Æ: 0.1889
   ÊúÄÂ∞èÂÄ§: 0.0098
   ÊúÄÂ§ßÂÄ§: 1.0000

üìä Composite Growth Rank ÂàÜÂ∏É:
  rank5: 52,142‰ª∂ ( 20.0%)
  rank4: 52,007‰ª∂ ( 20.0%)
  rank3: 52,015‰ª∂ ( 20.0%)
  rank2: 52,008‰ª∂ ( 20.0%)
  rank1: 52,093‰ª∂ ( 20.0%)

üéØ ÊúÄÁµÇ„Éá„Éº„Çø„Çµ„É≥„Éó„É´:


Unnamed: 0,date,P_SYMBOL,SEDOL,GICS Sector,Factor_Composite_Growth_Score,Factor_Composite_Growth_Score_Rank
260245,2025-10-31,WTB-GB,B1KJJ40,Consumer Discretionary,0.604619,rank2
260246,2025-10-31,WTC-AU,BZ8GX83,Information Technology,0.429761,rank4
260247,2025-10-31,WTRG-US,BLCF3J9,Utilities,0.589394,rank2
260248,2025-10-31,WTW-US,BDB6Q21,Financials,0.71771,rank1
260249,2025-10-31,WY-US,2958936,Real Estate,0.053492,rank5
260250,2025-10-31,X-CA,B8KH5G7,Financials,0.561373,rank3
260251,2025-10-31,XEL-US,2614807,Utilities,0.396061,rank4
260252,2025-10-31,XOM-US,2326618,Energy,0.490537,rank3
260253,2025-10-31,XRO-AU,B8P4LP4,Information Technology,0.763432,rank1
260254,2025-10-31,XYL-US,B3P2CN8,Industrials,0.637115,rank2



üìä „Çª„ÇØ„Çø„ÉºÂà•„ÅÆÂπ≥ÂùáGrowth ScoreÔºàÊúÄÊñ∞Êó•‰ªòÔºâ:


GICS Sector
Energy                    0.511042
Communication Services    0.510529
Real Estate               0.508650
Utilities                 0.508333
Consumer Staples          0.507167
Materials                 0.507054
Consumer Discretionary    0.506787
Information Technology    0.506727
Health Care               0.505508
Financials                0.503845
Industrials               0.502700
Name: Factor_Composite_Growth_Score, dtype: float64

Êó¢Â≠ò„ÅÆ 0 Ë°å„Å®„ÅÆÈáçË§á„Çí„ÉÅ„Çß„ÉÉ„ÇØ„Åó„Åæ„Åó„Åü„ÄÇ260250 Ë°å„ÇíÊñ∞„Åü„Å´ËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> Factor_Composite_Growth_Score: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
Êó¢Â≠ò„ÅÆ 0 Ë°å„Å®„ÅÆÈáçË§á„Çí„ÉÅ„Çß„ÉÉ„ÇØ„Åó„Åæ„Åó„Åü„ÄÇ260250 Ë°å„ÇíÊñ∞„Åü„Å´ËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> Factor_Composite_Growth_Score_Rank: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ


### 4-4. ValuationÔºàBloombergÔºâ -> ‚ö†Ô∏è „Éá„Éº„Çø‰∏çË∂≥„ÅÆ„Åü„ÇÅ„Çπ„Ç≠„ÉÉ„Éó

`BEST_EPS`„Å®`TRAIL_12M_EPS_BEF_XO_ITEM`„ÅÆ QoQ, YoY, 3Yr CAGR, 5Yr CAGR „ÅÆÂÄ§„ÇíË®àÁÆó„Åó„ÄÅ„Éá„Éº„Çø„Éô„Éº„Çπ„Å´‰øùÂ≠ò„Åô„Çã„ÄÇ


In [None]:
data_list = ["BEST_PE_RATIO", "BEST_EPS", "PE_RATIO", "TRAIL_12M_EPS_BEF_XO_ITEM"]

blp = bloomberg_utils.BlpapiCustom()

with sqlite3.connect(bloomberg_valuation_db_path) as conn:
    for data in tqdm(data_list):
        df = (
            pd.read_sql(f"SELECT * FROM `{data}`", con=conn, parse_dates=["Date"])
            .sort_values("Date", ignore_index=True)
            .rename(columns={"Date": "date", "SEDOL": "P_SYMBOL"})
        )  # calculate_growthÈñ¢Êï∞„Çí‰Ωø„ÅÜ„Åü„ÇÅ„Å´‰∏ÄÊôÇÁöÑ„Å´„É™„Éç„Éº„É†

        for growth in ["QoQ", "YoY", "CAGR_3Y", "CAGR_5Y"]:
            df_growth = df.copy()
            new_variable = f"{data}_{growth}"

            df_growth = roic_utils.calculate_growth(
                df=df_growth, data_name=data, growth_type=growth
            ).rename(
                columns={"date": "Date", "P_SYMBOL": "SEDOL"}
            )  # ÂÖÉ„ÅÆ„Ç´„É©„É†Âêç„Å´Êàª„Åô

            db_utils.delete_table_from_database(
                db_path=bloomberg_valuation_db_path, table_name=new_variable
            )

            blp.store_to_database(
                df=df_growth,
                db_path=bloomberg_valuation_db_path,
                table_name=new_variable,
                primary_keys=["date", "SEDOL", "variable"],
            )

  0%|          | 0/4 [00:00<?, ?it/s]

‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'BEST_PE_RATIO_QoQ' „Å´ 55043 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'BEST_PE_RATIO_YoY' „Å´ 38534 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'BEST_PE_RATIO_CAGR_3Y' „Å´ 10608 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ


 25%|‚ñà‚ñà‚ñå       | 1/4 [00:13<00:39, 13.07s/it]

‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'BEST_PE_RATIO_CAGR_5Y' „Å´ 889 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'BEST_EPS_QoQ' „Å´ 61342 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'BEST_EPS_YoY' „Å´ 41722 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'BEST_EPS_CAGR_3Y' „Å´ 7895 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ


 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 2/4 [00:25<00:25, 12.51s/it]

‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'BEST_EPS_CAGR_5Y' „Å´ 122 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'PE_RATIO_QoQ' „Å´ 512335 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'PE_RATIO_YoY' „Å´ 485879 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'PE_RATIO_CAGR_3Y' „Å´ 419875 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ


 75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 3/4 [02:43<01:09, 69.83s/it]

‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'PE_RATIO_CAGR_5Y' „Å´ 359299 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'TRAIL_12M_EPS_BEF_XO_ITEM_QoQ' „Å´ 161910 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'TRAIL_12M_EPS_BEF_XO_ITEM_YoY' „Å´ 136498 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ
‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'TRAIL_12M_EPS_BEF_XO_ITEM_CAGR_3Y' „Å´ 61627 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4/4 [03:25<00:00, 51.43s/it]

‚úÖ ‰øùÂ≠òÂÆå‰∫Ü„ÄÇ„ÉÜ„Éº„Éñ„É´ 'TRAIL_12M_EPS_BEF_XO_ITEM_CAGR_5Y' „Å´ 32107 Ë°å„ÇíÂá¶ÁêÜ„Åó„Åæ„Åó„Åü (IGNORE)„ÄÇ





In [None]:
with sqlite3.connect(bloomberg_valuation_db_path) as conn:
    df = pd.read_sql("SELECT * FROM BEST_EPS", con=conn, parse_dates=["Date"])
    display(df)

Unnamed: 0,Date,SEDOL,variable,value
0,2000-01-31,2206301,BEST_EPS,0.820
1,2000-01-31,2213981,BEST_EPS,3.653
2,2000-01-31,2226536,BEST_EPS,0.033
3,2000-01-31,2350651,BEST_EPS,0.230
4,2000-01-31,2552275,BEST_EPS,0.220
...,...,...,...,...
68844,2025-10-31,BZ8GX83,BEST_EPS,0.343
68845,2025-10-31,BZ8VJQ8,BEST_EPS,2.270
68846,2025-10-31,BZBW6G7,BEST_EPS,0.196
68847,2025-10-31,BZBYG74,BEST_EPS,0.216


In [None]:
tables = db_utils.get_table_names(bloomberg_valuation_db_path)
display(tables)

['BEST_PE_RATIO',
 'BEST_EPS',
 'PE_RATIO',
 'TRAIL_12M_EPS_BEF_XO_ITEM',
 'BEST_PE_RATIO_QoQ',
 'BEST_PE_RATIO_YoY',
 'BEST_PE_RATIO_CAGR_3Y',
 'BEST_PE_RATIO_CAGR_5Y',
 'BEST_EPS_QoQ',
 'BEST_EPS_YoY',
 'BEST_EPS_CAGR_3Y',
 'BEST_EPS_CAGR_5Y',
 'PE_RATIO_QoQ',
 'PE_RATIO_YoY',
 'PE_RATIO_CAGR_3Y',
 'PE_RATIO_CAGR_5Y',
 'TRAIL_12M_EPS_BEF_XO_ITEM_QoQ',
 'TRAIL_12M_EPS_BEF_XO_ITEM_YoY',
 'TRAIL_12M_EPS_BEF_XO_ITEM_CAGR_3Y',
 'TRAIL_12M_EPS_BEF_XO_ITEM_CAGR_5Y']

In [None]:
# ÊßãÊàêÈäòÊüÑÊÉÖÂ†±
query = f"""
    SELECT
        `date`, `P_SYMBOL`, `SEDOL`, `FG_COMPANY_NAME`, `Asset ID`, `GICS Sector`, `Weight (%)`
    FROM
        {UNIVERSE_CODE}
"""
with sqlite3.connect(factset_index_db_path) as conn:
    df_weight = pd.read_sql(query, parse_dates=["date"], con=conn)

# „Éï„Ç°„ÇØ„Çø„ÉºÂÄ§
factor_list = ["BEST_EPS", "BEST_PE_RATIO"]

with sqlite3.connect(bloomberg_valuation_db_path) as conn:
    # totalÂºïÊï∞„ÇíËøΩÂä†
    for factor in tqdm(factor_list):
        # „Éá„Éº„Çø„Éô„Éº„Çπ„Åã„ÇâÂëº„Å≥Âá∫„Åó
        df = (
            pd.read_sql(
                f"SELECT `Date`, `SEDOL`, `value` FROM `{factor}`",
                con=conn,
                parse_dates=["Date"],
            )
            .rename(columns={"Date": "date"})
            .assign(
                date=lambda row: pd.to_datetime(row["date"])
                + pd.tseries.offsets.MonthEnd(0)
            )
            .sort_values("date", ignore_index=True)
            .rename(columns={"value": factor})
        )

        # merge: ÊßãÊàêÈäòÊüÑÊÉÖÂ†±„Å®„Éï„Ç°„ÇØ„Çø„Éº
        df = (
            pd.merge(df_weight, df, on=["date", "SEDOL"], how="outer")
            .drop_duplicates(subset=["date", "SEDOL"])
            .dropna(
                subset=["Weight (%)", factor],
                how="any",
                axis=0,
                ignore_index=True,
            )
        )

        g = df.groupby(["date"])["Weight (%)"].agg("sum").to_frame()
        display(g.tail(50))

  0%|          | 0/2 [00:00<?, ?it/s]

Unnamed: 0_level_0,Weight (%)
date,Unnamed: 1_level_1
2021-09-30,21.20265
2021-10-31,36.429975
2021-11-30,38.70586
2021-12-31,23.446075
2022-01-31,42.453669
2022-02-28,35.490503
2022-03-31,31.115912
2022-04-30,43.708097
2022-05-31,40.849117
2022-06-30,25.092301


 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 1/2 [00:01<00:01,  1.11s/it]

Unnamed: 0_level_0,Weight (%)
date,Unnamed: 1_level_1
2021-09-30,24.75197
2021-10-31,30.705197
2021-11-30,39.116527
2021-12-31,40.400506
2022-01-31,42.623696
2022-02-28,45.42437
2022-03-31,47.750144
2022-04-30,49.021961
2022-05-31,50.595893
2022-06-30,51.328746


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:02<00:00,  1.07s/it]


In [None]:
# ÊßãÊàêÈäòÊüÑÊÉÖÂ†±
query = f"""
    SELECT
        `date`, `P_SYMBOL`, `SEDOL`, `FG_COMPANY_NAME`, `Asset ID`, `GICS Sector`, `Weight (%)`
    FROM
        {UNIVERSE_CODE}
"""
with sqlite3.connect(factset_index_db_path) as conn:
    df_weight = pd.read_sql(query, parse_dates=["date"], con=conn)

# „Éï„Ç°„ÇØ„Çø„ÉºÂÄ§
factor_list = ["BEST_EPS", "BEST_PE_RATIO"]
period_list = ["QoQ", "YoY", "CAGR_3Y", "CAGR_5Y"]
total_iterations = len(factor_list) * len(period_list)
print(f"Âá¶ÁêÜÁ∑èÊï∞: {total_iterations} Âõû")

with sqlite3.connect(bloomberg_valuation_db_path) as conn:
    # totalÂºïÊï∞„ÇíËøΩÂä†
    for factor, periods in tqdm(
        itertools.product(factor_list, period_list),
        total=total_iterations,
        desc="„Éï„Ç°„ÇØ„Çø„ÉºÂá¶ÁêÜÈÄ≤Êçó",
    ):
        factor_growth = f"{factor}_{periods}"
        # „Éá„Éº„Çø„Éô„Éº„Çπ„Åã„ÇâÂëº„Å≥Âá∫„Åó
        df = (
            pd.read_sql(
                f"SELECT `Date`, `SEDOL`, `value` FROM `{factor_growth}`",
                con=conn,
                parse_dates=["Date"],
            )
            .rename(columns={"Date": "date"})
            .assign(
                date=lambda row: pd.to_datetime(row["date"])
                + pd.tseries.offsets.MonthEnd(0)
            )
            .sort_values("date", ignore_index=True)
            .rename(columns={"value": factor_growth})
        )

        # merge: ÊßãÊàêÈäòÊüÑÊÉÖÂ†±„Å®„Éï„Ç°„ÇØ„Çø„Éº
        df = (
            pd.merge(df_weight, df, on=["date", "SEDOL"], how="outer")
            .drop_duplicates(subset=["date", "SEDOL"])
            .dropna(
                subset=["Weight (%)", factor_growth],
                how="any",
                axis=0,
                ignore_index=True,
            )
        )
        display(df)

Âá¶ÁêÜÁ∑èÊï∞: 8 Âõû


„Éï„Ç°„ÇØ„Çø„ÉºÂá¶ÁêÜÈÄ≤Êçó:   0%|          | 0/8 [00:00<?, ?it/s]

Unnamed: 0,date,P_SYMBOL,FG_COMPANY_NAME,Asset ID,GICS Sector,Weight (%),BEST_EPS_QoQ


„Éï„Ç°„ÇØ„Çø„ÉºÂá¶ÁêÜÈÄ≤Êçó:  12%|‚ñà‚ñé        | 1/8 [00:01<00:08,  1.16s/it]

Unnamed: 0,date,P_SYMBOL,FG_COMPANY_NAME,Asset ID,GICS Sector,Weight (%),BEST_EPS_YoY


„Éï„Ç°„ÇØ„Çø„ÉºÂá¶ÁêÜÈÄ≤Êçó:  25%|‚ñà‚ñà‚ñå       | 2/8 [00:02<00:05,  1.01it/s]

Unnamed: 0,date,P_SYMBOL,FG_COMPANY_NAME,Asset ID,GICS Sector,Weight (%),BEST_EPS_CAGR_3Y


„Éï„Ç°„ÇØ„Çø„ÉºÂá¶ÁêÜÈÄ≤Êçó:  38%|‚ñà‚ñà‚ñà‚ñä      | 3/8 [00:02<00:03,  1.39it/s]

Unnamed: 0,date,P_SYMBOL,FG_COMPANY_NAME,Asset ID,GICS Sector,Weight (%),BEST_EPS_CAGR_5Y


„Éï„Ç°„ÇØ„Çø„ÉºÂá¶ÁêÜÈÄ≤Êçó:  50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 4/8 [00:02<00:02,  1.83it/s]

Unnamed: 0,date,P_SYMBOL,FG_COMPANY_NAME,Asset ID,GICS Sector,Weight (%),BEST_PE_RATIO_QoQ


„Éï„Ç°„ÇØ„Çø„ÉºÂá¶ÁêÜÈÄ≤Êçó:  62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 5/8 [00:03<00:02,  1.37it/s]

Unnamed: 0,date,P_SYMBOL,FG_COMPANY_NAME,Asset ID,GICS Sector,Weight (%),BEST_PE_RATIO_YoY


„Éï„Ç°„ÇØ„Çø„ÉºÂá¶ÁêÜÈÄ≤Êçó:  75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 6/8 [00:04<00:01,  1.28it/s]

Unnamed: 0,date,P_SYMBOL,FG_COMPANY_NAME,Asset ID,GICS Sector,Weight (%),BEST_PE_RATIO_CAGR_3Y


„Éï„Ç°„ÇØ„Çø„ÉºÂá¶ÁêÜÈÄ≤Êçó:  88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 7/8 [00:05<00:00,  1.49it/s]

Unnamed: 0,date,P_SYMBOL,FG_COMPANY_NAME,Asset ID,GICS Sector,Weight (%),BEST_PE_RATIO_CAGR_5Y


„Éï„Ç°„ÇØ„Çø„ÉºÂá¶ÁêÜÈÄ≤Êçó: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 8/8 [00:05<00:00,  1.49it/s]


### 4-5. ROIC Label FactorÔºàFactsetÔºâ

-   ROIC(ROE) + Security Code
    -   „Çª„ÇØ„Çø„Éº‰∏≠Á´ã
    -   ÈáëËûç„Çª„ÇØ„Çø„Éº„ÅÆ„Åø ROIC „ÅÆ‰ª£„Çè„Çä„Å´ ROE „Çí‰ΩøÁî®Ôºà„Åü„Å†„Åó„Éá„Éº„Çø„Éï„É¨„Éº„É†„ÅÆ„Ç´„É©„É†Âêç„ÅØ ROIC „ÅßË°®Ë®òÔºâ


In [None]:
# ----------------------------------------
# 1. get ROIC and ROE data
# 2. get security info(Index constituents)
# ----------------------------------------
df_roic_and_roe = factset_utils.load_financial_data(
    financials_db_path=financials_db_path, factor_list=["FF_ROIC", "FF_ROE"]
)
security_info = factset_utils.load_index_constituents(
    factset_index_db_path=factset_index_db_path, UNIVERSE_CODE=UNIVERSE_CODE
)

# ----------------------------------------
# merge
# ----------------------------------------
df_roic_merged = (
    factset_utils.merge_idx_constituensts_and_financials(
        df_weight=security_info, df_factor=df_roic_and_roe
    )
    .assign(
        ROIC=lambda x: np.where(
            x["GICS Sector"] == "Financials", x["FF_ROE"], x["FF_ROIC"]
        )
    )
    .dropna(subset=["Weight (%)", "ROIC"], how="any", ignore_index=True)
    .drop(columns=["FF_ROIC", "FF_ROE"])
)

# ----------------------------------------
# sector‰∏≠Á´ã„ÅßROIC„Çí„É©„É≥„Ç≠„É≥„Ç∞
# ----------------------------------------
year_period = 3  # year_period„Åæ„Åß„Åï„Åã„ÅÆ„Åº„Å£„Å¶ROIC„ÅÆÁßªË°å„ÇíË¶ã„Çã

df_roic_merged = roic_utils.add_factor_rank_cols(
    df_roic_merged, factor_name="ROIC"
).rename(columns={"P_SYMBOL": "Symbol"})
df_roic_merged = roic_utils.add_shifted_factor_cols_month(
    df_roic_merged,
    factor_name="ROIC_Rank",
    shift_month=list(range(1, int(year_period * 12) + 1)),
    shift_direction="Past",
).rename(columns={"Symbol": "P_SYMBOL"})


# ----------------------------------------
# ROIC„É©„Éô„É´„Çí‰ªò‰∏é
# ----------------------------------------
roic_label_name = f"ROIC_label_Past{year_period}Y"
df_roic_merged[roic_label_name] = df_roic_merged.apply(
    lambda row: roic_utils.test_assign_roic_label(
        row=row,
        freq="annual",
        shift_direction="Past",
        year_period=year_period,
        judge_by_slope=False,
    ),
    axis=1,
)

df = df_roic_merged.copy()
df = (
    df[["date", "P_SYMBOL", roic_label_name]]
    .rename(columns={roic_label_name: "value"})
    .dropna(subset=["value"], ignore_index=True)
    .assign(variable=roic_label_name, date=lambda row: pd.to_datetime(row["date"]))
)
display(df.tail(10))

# ----------------------------------------
# „Éá„Éº„Çø„Éô„Éº„Çπ‰øùÂ≠ò
# ----------------------------------------
factset_utils.store_to_database(
    df=df, db_path=financials_db_path, table_name=roic_label_name
)

Unnamed: 0,date,P_SYMBOL,value,variable
220535,2025-10-31,XYZ-US,drop to low,ROIC_label_Past3Y
220536,2025-10-31,YAR-NO,move to high,ROIC_label_Past3Y
220537,2025-10-31,YUM-US,remain high,ROIC_label_Past3Y
220538,2025-10-31,Z-US,remain low,ROIC_label_Past3Y
220539,2025-10-31,Z74-SG,others,ROIC_label_Past3Y
220540,2025-10-31,ZAL-DE,remain low,ROIC_label_Past3Y
220541,2025-10-31,ZBH-US,others,ROIC_label_Past3Y
220542,2025-10-31,ZBRA-US,others,ROIC_label_Past3Y
220543,2025-10-31,ZTS-US,remain high,ROIC_label_Past3Y
220544,2025-10-31,ZURN-CH,remain high,ROIC_label_Past3Y


„ÉÜ„Éº„Éñ„É´ 'ROIC_label_Past3Y' „Å´ËøΩÂä†„Åô„Åπ„ÅçÊñ∞„Åó„ÅÑ„Éá„Éº„Çø„ÅØ„ÅÇ„Çä„Åæ„Åõ„Çì„Åß„Åó„Åü„ÄÇ„Çπ„Ç≠„ÉÉ„Éó„Åó„Åæ„Åô„ÄÇ


In [None]:
df = df_roic_merged.copy()

# --- label count ---
roic_count = pd.pivot(
    pd.DataFrame(
        df.groupby(["date", "GICS Sector", roic_label_name])["P_SYMBOL"].count()
    ).reset_index(),
    index=["date", "GICS Sector"],
    columns=roic_label_name,
).reset_index()
display(roic_count.loc[roic_count["GICS Sector"] == "Information Technology"])

roic_count = pd.pivot(
    pd.DataFrame(
        df.groupby(["date", "GICS Sector", "ROIC_Rank"])["P_SYMBOL"].count()
    ).reset_index(),
    index=["date", "GICS Sector"],
    columns="ROIC_Rank",
).reset_index()
display(roic_count.loc[roic_count["GICS Sector"] == "Information Technology"])

# --- weight check ---
weight_total_count = (
    df.groupby(["date"])["Weight (%)"]
    .agg(["count", "sum"])
    .rename(columns={"count": "Num of Securities", "sum": "Total Weight (%)"})
).sort_index()

weight_sector_count = (
    df.groupby(["date", "GICS Sector"])["Weight (%)"]
    .agg(["count", "sum"])
    .rename(columns={"count": "Num of Securities", "sum": "Total Weight (%)"})
).sort_index()
display(weight_total_count)
display(weight_sector_count)

roic_label_count = (
    df.groupby(["date", roic_label_name])["Weight (%)"]
    .agg(["count", "sum"])
    .rename(columns={"count": "Num of Securities", "sum": "Total Weight (%)"})
).sort_index()
display(roic_label_count)

Unnamed: 0_level_0,date,GICS Sector,P_SYMBOL,P_SYMBOL,P_SYMBOL,P_SYMBOL,P_SYMBOL
ROIC_label_Past3Y,Unnamed: 1_level_1,Unnamed: 2_level_1,drop to low,move to high,others,remain high,remain low
11,2008-08-31,Information Technology,7.0,8.0,17.0,22.0,10.0
21,2008-09-30,Information Technology,5.0,8.0,17.0,23.0,13.0
31,2008-10-31,Information Technology,6.0,9.0,18.0,21.0,12.0
41,2008-11-30,Information Technology,6.0,10.0,16.0,23.0,10.0
51,2008-12-31,Information Technology,5.0,10.0,20.0,22.0,9.0
...,...,...,...,...,...,...,...
2136,2025-06-30,Information Technology,12.0,15.0,28.0,28.0,20.0
2147,2025-07-31,Information Technology,13.0,16.0,27.0,28.0,18.0
2158,2025-08-31,Information Technology,14.0,15.0,25.0,29.0,18.0
2169,2025-09-30,Information Technology,11.0,13.0,25.0,28.0,20.0


Unnamed: 0_level_0,date,GICS Sector,P_SYMBOL,P_SYMBOL,P_SYMBOL,P_SYMBOL,P_SYMBOL
ROIC_Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,rank1,rank2,rank3,rank4,rank5
7,2005-08-31,Information Technology,25,25,24,25,25
17,2005-09-30,Information Technology,25,25,25,25,25
27,2005-10-31,Information Technology,25,25,25,25,26
37,2005-11-30,Information Technology,25,25,25,25,26
47,2005-12-31,Information Technology,25,25,25,25,25
...,...,...,...,...,...,...,...
2492,2025-06-30,Information Technology,23,22,23,22,23
2503,2025-07-31,Information Technology,23,22,22,22,23
2514,2025-08-31,Information Technology,23,22,22,22,23
2525,2025-09-30,Information Technology,22,21,22,21,22


Unnamed: 0_level_0,Num of Securities,Total Weight (%)
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2005-08-31,1296,91.341262
2005-09-30,1291,91.723118
2005-10-31,1290,91.461135
2005-11-30,1284,91.742185
2005-12-31,1318,95.426320
...,...,...
2025-06-30,1127,99.065370
2025-07-31,1126,99.203499
2025-08-31,1127,99.198368
2025-09-30,1095,98.372572


Unnamed: 0_level_0,Unnamed: 1_level_0,Num of Securities,Total Weight (%)
date,GICS Sector,Unnamed: 2_level_1,Unnamed: 3_level_1
2005-08-31,Communication Services,40,4.554126
2005-08-31,Consumer Discretionary,231,10.119443
2005-08-31,Consumer Staples,91,8.564437
2005-08-31,Energy,73,10.562697
2005-08-31,Financials,280,21.092156
...,...,...,...
2025-10-31,Industrials,195,9.800280
2025-10-31,Information Technology,82,18.765398
2025-10-31,Materials,78,3.004112
2025-10-31,Real Estate,61,1.787467


Unnamed: 0_level_0,Unnamed: 1_level_0,Num of Securities,Total Weight (%)
date,ROIC_label_Past3Y,Unnamed: 2_level_1,Unnamed: 3_level_1
2008-04-30,remain high,2,0.402252
2008-05-31,remain high,2,0.411722
2008-06-30,others,2,0.494384
2008-07-31,others,2,0.470749
2008-08-31,drop to low,106,8.383848
...,...,...,...
2025-10-31,drop to low,148,12.400099
2025-10-31,move to high,149,9.408305
2025-10-31,others,293,18.037149
2025-10-31,remain high,201,34.724012


### 4-6. ROIC ÂàÜ‰ΩçÁßªÂãï(‚ö†Ô∏ènot completed)

-   3 Âπ¥Ââç„ÅÆ ROIC5 ÂàÜ‰Ωç -> ÁèæÂú®„ÅÆ ROIC5 ÂàÜ‰Ωç„Å∏„ÅÆÁßªÂãï„Çí„É©„Éô„É™„É≥„Ç∞
-   Financials „Çª„ÇØ„Çø„Éº„ÅØ ROIC „ÅÆ‰ª£„Çè„Çä„Å´ ROE ‰ΩøÁî®


In [None]:
df_weight = factset_utils.load_index_constituents(
    factset_index_db_path=factset_index_db_path, UNIVERSE_CODE=UNIVERSE_CODE
)

df_factor = factset_utils.load_financial_data(
    financials_db_path=financials_db_path, factor_list=["FF_ROIC_Rank", "FF_ROE_Rank"]
)

df_roic_rank = (
    factset_utils.merge_idx_constituensts_and_financials(
        df_weight=df_weight, df_factor=df_factor
    )
    .assign(
        ROIC_Rank=lambda x: np.where(
            x["GICS Sector"] == "Financials", x["FF_ROE_Rank"], x["FF_ROIC_Rank"]
        )
    )
    .drop(columns=["FF_ROIC_Rank", "FF_ROE_Rank"])
    .rename(columns={"SEDOL": "Symbol"})
)
df_roic_rank = (
    roic_utils.add_shifted_factor_cols_month(
        df_roic_rank,
        factor_name="ROIC_Rank",
        shift_month=[36],
        shift_direction="Past",
    )
    .rename(columns={"Symbol": "SEDOL"})
    .dropna(subset=["ROIC_Rank", "ROIC_Rank_36MAgo"], how="any")
    .sort_values(["SEDOL", "date"], ignore_index=True)
)

display(df_roic_rank)

Unnamed: 0,date,P_SYMBOL,SEDOL,Asset ID,FG_COMPANY_NAME,GICS Sector,GICS Industry Group,Weight (%),ROIC_Rank,ROIC_Rank_36MAgo
0,2015-06-30,ADN-GB,0003128,UKIDUK1,Aberdeen Asset Management PLC,Financials,Financial Services,0.019301,rank1,rank1
1,2015-07-31,ADN-GB,0003128,UKIDUK1,Aberdeen Asset Management PLC,Financials,Financial Services,0.016965,rank1,rank1
2,2015-08-31,ADN-GB,0003128,UKIDUK1,Aberdeen Asset Management PLC,Financials,Financial Services,0.015731,rank1,rank1
3,2015-09-30,ADN-GB,0003128,UKIDUK1,Aberdeen Asset Management PLC,Financials,Financial Services,0.014948,rank1,rank1
4,2015-10-31,ADN-GB,0003128,UKIDUK1,Aberdeen Asset Management PLC,Financials,Financial Services,0.016557,rank1,rank1
...,...,...,...,...,...,...,...,...,...,...
206145,2025-06-30,DHER-DE,BZCNB42,GER2BR1,„Éá„É™„Éê„É™„Éº„Éª„Éí„Éº„É≠„Éº,Consumer Discretionary,Consumer Services,0.007228,rank5,rank5
206146,2025-07-31,DHER-DE,BZCNB42,GER2BR1,„Éá„É™„Éê„É™„Éº„Éª„Éí„Éº„É≠„Éº,Consumer Discretionary,Consumer Services,0.007958,rank5,rank5
206147,2025-08-31,DHER-DE,BZCNB42,GER2BR1,„Éá„É™„Éê„É™„Éº„Éª„Éí„Éº„É≠„Éº,Consumer Discretionary,Consumer Services,0.006908,rank5,rank5
206148,2025-09-30,DHER-DE,BZCNB42,GER2BR1,„Éá„É™„Éê„É™„Éº„Éª„Éí„Éº„É≠„Éº,Consumer Discretionary,Consumer Services,0.007233,rank5,rank5


In [None]:
g = pd.DataFrame(df_roic_rank.groupby("date")["Weight (%)"].agg("sum"))
display(g)

g_count = (
    pd.DataFrame(df_roic_rank.groupby(["date", "GICS Sector"])["SEDOL"].count())
    .reset_index()
    .pivot(index=["date"], columns="GICS Sector", values="SEDOL")
)
display(g_count)

Unnamed: 0_level_0,Weight (%)
date,Unnamed: 1_level_1
2008-08-31,74.801052
2008-09-30,76.101417
2008-10-31,76.204146
2008-11-30,76.204061
2008-12-31,80.869285
...,...
2025-06-30,93.851450
2025-07-31,94.125253
2025-08-31,93.974811
2025-09-30,93.355485


GICS Sector,Communication Services,Consumer Discretionary,Consumer Staples,Energy,Financials,Health Care,Industrials,Information Technology,Materials,Real Estate,Utilities
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2008-08-31,30.0,131.0,63.0,57.0,193.0,59.0,124.0,69.0,63.0,,44.0
2008-09-30,31.0,126.0,61.0,58.0,190.0,63.0,123.0,70.0,65.0,,41.0
2008-10-31,31.0,127.0,61.0,59.0,190.0,63.0,122.0,70.0,65.0,,42.0
2008-11-30,31.0,117.0,60.0,59.0,183.0,62.0,119.0,68.0,65.0,,42.0
2008-12-31,28.0,131.0,64.0,58.0,183.0,67.0,121.0,72.0,65.0,,48.0
...,...,...,...,...,...,...,...,...,...,...,...
2025-06-30,50.0,92.0,79.0,43.0,179.0,96.0,172.0,99.0,65.0,60.0,64.0
2025-07-31,50.0,92.0,79.0,43.0,179.0,97.0,172.0,98.0,65.0,60.0,64.0
2025-08-31,49.0,91.0,77.0,43.0,179.0,95.0,172.0,97.0,64.0,60.0,64.0
2025-09-30,49.0,88.0,76.0,44.0,174.0,93.0,171.0,93.0,64.0,59.0,62.0


In [None]:
g_roic_rank_shift = (
    pd.DataFrame(
        df_roic_rank.groupby(["date", "ROIC_Rank", "ROIC_Rank_36MAgo"])["SEDOL"].count()
    )
    .reset_index()
    .rename(columns={"SEDOL": "n_SEDOL"})
)
g_sedol_count_present_rank = (
    pd.DataFrame(g_roic_rank_shift.groupby(["date", "ROIC_Rank"])["n_SEDOL"].sum())
    .reset_index()
    .rename(columns={"n_SEDOL": "n_SEDOL_ROIC_Rank"})
)

g_sedol_count_past_rank = (
    pd.DataFrame(
        g_roic_rank_shift.groupby(["date", "ROIC_Rank_36MAgo"])["n_SEDOL"].sum()
    )
    .reset_index()
    .rename(columns={"n_SEDOL": "n_SEDOL_ROIC_Rank_36MAgo"})
)

g_roic_rank_shift = pd.merge(
    g_roic_rank_shift, g_sedol_count_present_rank, on=["date", "ROIC_Rank"], how="left"
)
g_roic_rank_shift = pd.merge(
    g_roic_rank_shift,
    g_sedol_count_past_rank,
    on=["date", "ROIC_Rank_36MAgo"],
    how="left",
)

g_roic_rank_shift = g_roic_rank_shift.assign(
    n_SEDOL_ROIC_Rank_pct=lambda x: x["n_SEDOL"].div(x["n_SEDOL_ROIC_Rank"]),
    n_SEDOL_ROIC_Rank_36MAgo_pct=lambda x: x["n_SEDOL"].div(
        x["n_SEDOL_ROIC_Rank_36MAgo"]
    ),
)
pd.options.display.precision = 2
display(g_roic_rank_shift[g_roic_rank_shift["ROIC_Rank"] == "rank1"].tail(50))

Unnamed: 0,date,ROIC_Rank,ROIC_Rank_36MAgo,n_SEDOL,n_SEDOL_ROIC_Rank,n_SEDOL_ROIC_Rank_36MAgo,n_SEDOL_ROIC_Rank_pct,n_SEDOL_ROIC_Rank_36MAgo_pct
4925,2025-01-31,rank1,rank1,126,208,225,0.61,0.56
4926,2025-01-31,rank1,rank2,42,208,210,0.2,0.2
4927,2025-01-31,rank1,rank3,18,208,217,0.09,0.08
4928,2025-01-31,rank1,rank4,10,208,206,0.05,0.05
4929,2025-01-31,rank1,rank5,12,208,188,0.06,0.06
4950,2025-02-28,rank1,rank1,126,209,224,0.6,0.56
4951,2025-02-28,rank1,rank2,42,209,210,0.2,0.2
4952,2025-02-28,rank1,rank3,18,209,218,0.09,0.08
4953,2025-02-28,rank1,rank4,10,209,205,0.05,0.05
4954,2025-02-28,rank1,rank5,13,209,191,0.06,0.07


### 4-7. Size Factor


#### 1. QoQ„ÄÅYoY„ÄÅ3Yr CAGR, 5Y CAGR „ÇíË®àÁÆó„Åó„Å¶„Éá„Éº„Çø„Éô„Éº„Çπ„Å´‰øùÂ≠ò„Åô„Çã„ÄÇ


In [None]:
factor_list = ["FF_SALES", "FF_ASSETS", "FF_COM_EQ", "FF_SHLDRS_EQ"]
period_list = ["QoQ", "YoY", "CAGR_3Y", "CAGR_5Y"]
factor_growth_list = [
    f"{factor}_{periods}"
    for factor, periods in itertools.product(factor_list, period_list)
]

In [None]:
query = [f"SELECT * FROM `{table}`" for table in factor_list]
query = " UNION ALL ".join(query)

# ------------------------------------------------------
# load from database
# ------------------------------------------------------
with sqlite3.connect(financials_db_path) as conn:
    df_all = (
        pd.read_sql(query, con=conn, parse_dates=["date"])
        .sort_values("date", ignore_index=True)
        .assign(
            variable=lambda x: x["variable"].astype("category"),
            P_SYMBOL=lambda x: x["P_SYMBOL"].astype("category"),
        )
    ).sort_values(["variable", "P_SYMBOL", "date"], ignore_index=True)

display(df_all)
# groupby("variable")„Çí‰Ωø„ÅÜ„Åì„Å®„Åß„ÄÅdf_allÂÖ®‰Ωì„Çí‰ΩïÂ∫¶„ÇÇËµ∞Êüª(loc)„Åô„Çã„Ç≥„Çπ„Éà„Çí„Çº„É≠„Å´„Åô„Çã
# observed=True „ÅØcategoryÂûã„Çí‰Ωø„ÅÜÂ†¥Âêà„ÅÆÈ´òÈÄüÂåñ„Ç™„Éó„Ç∑„Éß„É≥
grouped = df_all.groupby("variable", observed=True)

# ------------------------------------------------------
# calculate growth and store to database
# ------------------------------------------------------
total_steps = len(factor_list)
for factor_name, df_factor in tqdm(grouped, total=total_steps, desc="Factors"):
    # df_factor„ÅØView(ÂèÇÁÖß)„ÅÆÂèØËÉΩÊÄß„Åå„ÅÇ„Çã„Åü„ÇÅ„ÄÅË®àÁÆóÁî®„Å´„Ç≥„Éî„Éº„Çí‰ΩúÊàê
    # „Åì„Åì„Åß„É°„É¢„É™„ÇíÈ£ü„ÅÜ„Åå„ÄÅfactorÂçò‰Ωç„Å™„ÅÆ„ÅßÂÖ®‰Ωì„Ç≥„Éî„Éº„Çà„Çä„ÅØËªΩ„ÅÑ
    # „Åã„Å§„ÄÅ„É´„Éº„Éó„ÅÆÊúÄÂæå„ÅßËß£Êîæ„Åï„Çå„Çã
    df_base = df_factor.copy()

    for growth in period_list:
        new_variable_name = f"{factor_name}_{growth}"
        df_result = roic_utils.calculate_growth(
            df=df_base, data_name=str(factor_name), growth_type=growth
        )

        # store to database
        db_utils.delete_table_from_database(
            db_path=financials_db_path, table_name=new_variable_name
        )
        factset_utils.store_to_database(
            df=df_result,
            db_path=financials_db_path,
            table_name=new_variable_name,
            verbose=False,
        )

    # ---- „É°„É¢„É™ÁÆ°ÁêÜ ----
    # ‰∏Ä„Å§„ÅÆ„Éï„Ç°„ÇØ„Çø„ÉºÂá¶ÁêÜ„ÅåÁµÇ„Çè„Å£„Åü„Çâ„ÄÅ‰ΩøÁî®„Åó„Åü‰∏ÄÊôÇÂ§âÊï∞„ÇíÂâäÈô§„Åó„Å¶GCÂÆüË°å
    del df_base
    gc.collect()

Unnamed: 0,date,P_SYMBOL,value,variable
0,2005-08-31,0HSW-GB,2448.485005,FF_ASSETS
1,2005-09-30,0HSW-GB,2333.443621,FF_ASSETS
2,2005-10-31,0HSW-GB,2333.443621,FF_ASSETS
3,2005-11-30,0HSW-GB,2333.443621,FF_ASSETS
4,2005-12-30,0HSW-GB,,FF_ASSETS
...,...,...,...,...
3165047,2025-06-30,ZURN-CH,24724.999387,FF_SHLDRS_EQ
3165048,2025-07-31,ZURN-CH,24724.999387,FF_SHLDRS_EQ
3165049,2025-08-29,ZURN-CH,24724.999387,FF_SHLDRS_EQ
3165050,2025-09-30,ZURN-CH,24724.999387,FF_SHLDRS_EQ


Factors: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 4/4 [02:13<00:00, 33.38s/it]


#### 2. „Éï„Ç°„ÇØ„Çø„Éº„ÅÆ„É©„É≥„ÇØ„ÇíË®àÁÆó„Åó„ÄÅ„Éá„Éº„Çø„Éô„Éº„Çπ„Å´‰øùÂ≠ò„Åô„Çã„ÄÇ


In [None]:
# -----------------------------------
# load data
# -----------------------------------
# ÊßãÊàêÈäòÊüÑÊÉÖÂ†±
df_weight = factset_utils.load_index_constituents(
    factset_index_db_path=factset_index_db_path, UNIVERSE_CODE=UNIVERSE_CODE
).assign(date=lambda x: pd.to_datetime(x["date"]))
factset_utils.process_rank_calculation_store_to_db(
    df_weight=df_weight, factor_list=factor_list, financials_db_path=financials_db_path
)

üöÄ Âá¶ÁêÜÈñãÂßã: 16 ‰ª∂„ÅÆ„Çø„Çπ„ÇØ


RankË®àÁÆóÈÄ≤Êçó: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 16/16 [02:29<00:00,  9.33s/it]


üéâ ÂÖ®„Å¶„ÅÆ„É©„É≥„ÇØË®àÁÆó„Éª‰øùÂ≠ò„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü


#### 3. „Éá„Éº„Çø„É≠„Éº„Éâ&Ê¨†ÊêçÂÄ§Á¢∫Ë™çÔºöÊ¨†ÊêçÂÄ§„ÅØ„Çª„ÇØ„Çø„Éº‰∏≠Â§ÆÂÄ§„ÅßË£úÂÆå


In [None]:
# „Éï„Ç°„ÇØ„Çø„ÉºË®àÁÆó„ÅÆ„Åü„ÇÅ„ÅÆ„Éá„Ç£„Çπ„ÇØ„É™„Éó„Çø„Éº„ÇíÊåáÂÆö
factor_list = [
    "FF_SALES_CAGR_3Y_PctRank",
    "FF_ASSETS_PctRank",
    "FF_COM_EQ_PctRank",
    "FF_SHLDRS_EQ_PctRank",
]

# -----------------------------------
# load data
# -----------------------------------

df_factor = factset_utils.load_financial_data(
    financials_db_path=financials_db_path, factor_list=factor_list
)

df_weight = factset_utils.load_index_constituents(
    factset_index_db_path=factset_index_db_path, UNIVERSE_CODE=UNIVERSE_CODE
)
df = (
    factset_utils.merge_idx_constituents_and_financials(
        df_weight=df_weight, df_factor=df_factor
    )
    .fillna(np.nan)
    .dropna(subset=factor_list, how="all")
    .dropna(subset=["Weight (%)"], ignore_index=True)
)
display(df.head())

df = factset_utils.check_missing_value_and_fill_by_sector_median(
    df=df, factor_list=factor_list
)

Unnamed: 0,date,P_SYMBOL,SEDOL,Asset ID,FG_COMPANY_NAME,GICS Sector,GICS Industry Group,Weight (%),FF_ASSETS_PctRank,FF_COM_EQ_PctRank,FF_SALES_CAGR_3Y_PctRank,FF_SHLDRS_EQ_PctRank
0,2005-08-31,0HSW-GB,3335442,UKIGDP1,Telent PLC,Information Technology,Technology Hardware & Equipment,0.005496,0.421053,0.270677,,0.270677
1,2005-08-31,0II3.XX1-GB,299303,UKIBKB1,Emap Plc,Consumer Discretionary,Media,0.019501,0.24031,0.128906,,0.128906
2,2005-08-31,0MDJ-GB,610700,UKIBBF1,Cadbury PLC,Consumer Staples,Food Beverage & Tobacco,0.106288,0.765957,0.621053,,0.621053
3,2005-08-31,0N1N-GB,230346,UKIBGE1,Arriva Plc Ord,Industrials,Transportation,0.010309,0.331754,0.273585,,0.268868
4,2005-08-31,0N3I-GB,896265,UKIDBM1,Tomkins PLC,Industrials,Capital Goods,0.020285,0.421801,0.34434,,0.34434


üìã Ê¨†ÊêçÂÄ§„ÅÆÁä∂Ê≥ÅÔºàË£úÂÆåÂâçÔºâ
FF_SALES_CAGR_3Y_PctRank                     : 59,070‰ª∂ ( 18.5%)
FF_ASSETS_PctRank                            :  1,966‰ª∂ (  0.6%)
FF_COM_EQ_PctRank                            :  1,756‰ª∂ (  0.6%)
FF_SHLDRS_EQ_PctRank                         :  1,756‰ª∂ (  0.6%)

‚è≥ „Çª„ÇØ„Çø„Éº‰∏≠Â§ÆÂÄ§„ÅßË£úÂÆå‰∏≠...

üìã Ê¨†ÊêçÂÄ§„ÅÆÁä∂Ê≥ÅÔºà„Çª„ÇØ„Çø„Éº‰∏≠Â§ÆÂÄ§Ë£úÂÆåÂæåÔºâ
‚ö†Ô∏è FF_SALES_CAGR_3Y_PctRank                     : 52,190‰ª∂ ( 16.4%) | Ë£úÂÆå: 6,880‰ª∂
‚úÖ FF_ASSETS_PctRank                            :      0‰ª∂ (  0.0%) | Ë£úÂÆå: 1,966‰ª∂
‚úÖ FF_COM_EQ_PctRank                            :      0‰ª∂ (  0.0%) | Ë£úÂÆå: 1,756‰ª∂
‚úÖ FF_SHLDRS_EQ_PctRank                         :      0‰ª∂ (  0.0%) | Ë£úÂÆå: 1,756‰ª∂

‚ö†Ô∏è  „Çª„ÇØ„Çø„ÉºË£úÂÆå„ÅßÂüã„Åæ„Çâ„Å™„ÅÑÊ¨†ÊêçÂÄ§„ÇíÂÖ®‰Ωì‰∏≠Â§ÆÂÄ§„ÅßÂÜçË£úÂÆå...
------------------------------------------------------------
‚ö†Ô∏è  FF_SALES_CAGR_3Y_PctRank: ÂÖ®‰Ωì„Åß„ÇÇÊ¨†Êêç ‚Üí 0.5Ôºà‰∏≠Á´ãÂÄ§Ôºâ„ÅßË£úÂÆå
-

#### 4. „Éï„Ç°„ÇØ„Çø„ÉºË®àÁÆó


In [None]:
# -----------------------------------
# Factor„ÅÆË®àÁÆó
# -----------------------------------
# „Ç¶„Çß„Ç§„ÉàË®≠ÂÆö
blend_weight = {
    "FF_SALES_CAGR_3Y_PctRank": 0.25,
    "FF_ASSETS_PctRank": 0.25,
    "FF_COM_EQ_PctRank": 0.25,
    "FF_SHLDRS_EQ_PctRank": 0.25,
}
factor_name = "Factor_Size"

df = factset_utils.create_factor(
    df=df, factor_name=factor_name, blend_weight=blend_weight
)
display(df.tail(5))

# -----------------------------------
# Store to database
# -----------------------------------
for variable in [f"{factor_name}_Score", f"{factor_name}_Score_Rank"]:
    df_slice = (
        df[["date", "P_SYMBOL", variable]]
        .assign(variable=variable, date=lambda x: pd.to_datetime(x["date"]))
        .rename(columns={variable: "value"})
    )
    db_utils.delete_table_from_database(db_path=financials_db_path, table_name=variable)
    factset_utils.store_to_database(
        df=df_slice,
        db_path=financials_db_path,
        table_name=variable,
        verbose=True,
    )

[autoreload of src.factset_utils failed: Traceback (most recent call last):
  File "c:\Users\Yuki Hata\Desktop\papers\.venv\Lib\site-packages\IPython\extensions\autoreload.py", line 322, in check
    elif self.deduper_reloader.maybe_reload_module(m):
         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^
  File "c:\Users\Yuki Hata\Desktop\papers\.venv\Lib\site-packages\IPython\extensions\deduperreload\deduperreload.py", line 545, in maybe_reload_module
    new_source_code = f.read()
UnicodeDecodeError: 'cp932' codec can't decode byte 0x87 in position 725: illegal multibyte sequence
]



üìä Factor Ë®àÁÆó
„Ç¶„Çß„Ç§„ÉàË®≠ÂÆö:
  FF_SALES_CAGR_3Y_PctRank                     : 25.0%
  FF_ASSETS_PctRank                            : 25.0%
  FF_COM_EQ_PctRank                            : 25.0%
  FF_SHLDRS_EQ_PctRank                         : 25.0%

‚úÖ Score Ë®àÁÆóÂÆå‰∫Ü
   Âπ≥Âùá: 0.5039
   Ê®ôÊ∫ñÂÅèÂ∑Æ: 0.2127
   ÊúÄÂ∞èÂÄ§: 0.0150
   ÊúÄÂ§ßÂÄ§: 1.0000

üìä Rank ÂàÜÂ∏É:
  rank5: 63,903‰ª∂ ( 20.0%)
  rank4: 63,742‰ª∂ ( 20.0%)
  rank3: 63,744‰ª∂ ( 20.0%)
  rank2: 63,748‰ª∂ ( 20.0%)
  rank1: 63,833‰ª∂ ( 20.0%)

üéØ ÊúÄÁµÇ„Éá„Éº„Çø„Çµ„É≥„Éó„É´:
             date P_SYMBOL    SEDOL             GICS Sector  \
318950 2025-10-31   WTB-GB  B1KJJ40  Consumer Discretionary   
318951 2025-10-31   WTC-AU  BZ8GX83  Information Technology   
318952 2025-10-31  WTRG-US  BLCF3J9               Utilities   
318953 2025-10-31   WTW-US  BDB6Q21              Financials   
318954 2025-10-31    WY-US  2958936             Real Estate   
318955 2025-10-31     X-CA  B8KH5G7              Financials 

Unnamed: 0,date,P_SYMBOL,SEDOL,Asset ID,FG_COMPANY_NAME,GICS Sector,GICS Industry Group,Weight (%),FF_ASSETS_PctRank,FF_COM_EQ_PctRank,FF_SALES_CAGR_3Y_PctRank,FF_SHLDRS_EQ_PctRank,Factor_Size_Score,Factor_Size_Score_Rank
318965,2025-10-31,ZAL-DE,BQV0SV7,GERZQZ1,„Ç∂„É©„É≥„Éâ,Consumer Discretionary,Consumer Discretionary Distribution & Retail,0.008099,0.282353,0.364706,0.755814,0.364706,0.441895,rank3
318966,2025-10-31,ZBH-US,2783815,USA4JT1,„Ç∏„É≥„Éû„Éº„Éª„Éê„Ç§„Ç™„É°„ÉÉ„Éà„Éª„Éõ„Éº„É´„Éá„Ç£„É≥„Ç∞„Çπ,Health Care,Health Care Equipment & Services,0.025627,0.534653,0.633663,0.438776,0.633663,0.560189,rank3
318967,2025-10-31,ZBRA-US,2989356,USAP8H1,„Çº„Éñ„É©„Éª„ÉÜ„ÇØ„Éé„É≠„Ç∏„Éº„Ç∫„Éª„Ç≥„Éº„Éù„É¨„Éº„Ç∑„Éß„É≥ Class A,Information Technology,Technology Hardware & Equipment,0.017637,0.349398,0.39759,0.190476,0.39759,0.333764,rank4
318968,2025-10-31,ZTS-US,B95WG16,USBANZ1,„Çæ„Ç®„ÉÜ„Ç£„Çπ Class A,Health Care,Pharmaceuticals Biotechnology & Life Sciences,0.08263,0.415842,0.376238,0.459184,0.376238,0.406875,rank4
318969,2025-10-31,ZURN-CH,5983816,SWIAFM2,„ÉÅ„É•„Éº„É™„ÉÉ„Éí„Éª„Ç§„É≥„Ç∑„É•„Ç¢„É©„É≥„Çπ„Éª„Ç∞„É´„Éº„Éó,Financials,Insurance,0.131275,0.697436,0.641026,0.952632,0.630769,0.730466,rank1


Êó¢Â≠ò„ÅÆ 0 Ë°å„Å®„ÅÆÈáçË§á„Çí„ÉÅ„Çß„ÉÉ„ÇØ„Åó„Åæ„Åó„Åü„ÄÇ318947 Ë°å„ÇíÊñ∞„Åü„Å´ËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> Factor_Size_Score: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
Êó¢Â≠ò„ÅÆ 0 Ë°å„Å®„ÅÆÈáçË§á„Çí„ÉÅ„Çß„ÉÉ„ÇØ„Åó„Åæ„Åó„Åü„ÄÇ318947 Ë°å„ÇíÊñ∞„Åü„Å´ËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> Factor_Size_Score_Rank: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ


### 4-8. Leverage Factor

-   FF_DEBT_EQ(Debt to Equity ratio)
-   FF_LIABS_SHLDRS_EQ(Total Liabilities to Shareholders' Equity ratio)
-   FF_NET_DEBT(Net Debt)


#### (üöß ÂÆüË£Ö‰∫àÂÆö)üìù „Éá„Ç£„Çπ„ÇØ„É™„Éó„Çø„ÉºËøΩÂä†ÔºöNET_DEBT_TO_EBITDA_OPER

-> FF_NET_DEBT / FF_EBITDA_OPER


In [None]:
descriptor_list = ["FF_NET_DEBT", "FF_EBITDA_OPER"]
# -----------------------------------
# load data
# -----------------------------------
# ÊßãÊàêÈäòÊüÑÊÉÖÂ†±
df_weight = factset_utils.load_index_constituents(
    factset_index_db_path=factset_index_db_path, UNIVERSE_CODE=UNIVERSE_CODE
).assign(date=lambda x: pd.to_datetime(x["date"]))
# „Éï„Ç°„ÇØ„Çø„Éº
df_factor = factset_utils.load_financial_data(
    financials_db_path=financials_db_path, factor_list=descriptor_list
).assign(date=lambda x: pd.to_datetime(x["date"]))
df_merged = factset_utils.merge_idx_constituents_and_financials(
    df_weight=df_weight, df_factor=df_factor
)

df_merged["NET_DEBT_TO_EBITDA_OPER"] = df_merged["FF_NET_DEBT"].div(
    df_merged["FF_EBITDA_OPER"]
)
display(df_merged.loc[df_merged["NET_DEBT_TO_EBITDA_OPER"] < 0])

Unnamed: 0,date,P_SYMBOL,SEDOL,Asset ID,FG_COMPANY_NAME,GICS Sector,GICS Industry Group,Weight (%),FF_EBITDA_OPER,FF_NET_DEBT,NET_DEBT_TO_EBITDA_OPER
78939,2005-08-31,0UAN-GB,0128269,UKIAYK1,Invesco Ltd.,Financials,Financial Services,0.025020,156.54217,-320.523897,-2.047524
78956,2005-08-31,2878-HK,B00G568,HKGCXQ1,Solomon Systech (International) Ltd.,Information Technology,Semiconductors & Semiconductor Equipment,0.002918,45.57000,-152.981004,-3.357055
78960,2005-08-31,315-HK,6856995,HKGBCL1,SmarTone Telecommunications Holdings Limited,Communication Services,Telecommunication Services,0.001761,50.32445,-148.727957,-2.955382
78963,2005-08-31,330-HK,6321642,HKGAXW1,Esprit Holdings Limited,Consumer Discretionary,Consumer Discretionary Distribution & Retail,0.036683,283.80326,-222.383163,-0.783582
78966,2005-08-31,494-HK,6286257,HKGAJG1,Li & Fung Limited,Consumer Discretionary,Consumer Discretionary Distribution & Retail,0.018461,76.74025,-18.533442,-0.241509
...,...,...,...,...,...,...,...,...,...,...,...
400839,2025-10-31,WST-US,2950482,USAOV41,„Ç¶„Ç®„Çπ„Éà„Éª„Éï„Ç°„Éº„Éû„Ç∑„É•„Éº„ÉÜ„Ç£„Ç´„É´„Éª„Çµ„Éº„Éì„Ç∑„Éº„Ç∫,Health Care,Pharmaceuticals Biotechnology & Life Sciences,0.026103,216.70000,-325.500000,-1.502077
400841,2025-10-31,WTC-AU,BZ8GX83,AUSIDZ1,„ÉØ„Ç§„Ç∫„ÉÜ„ÉÉ„ÇØ„Éª„Ç∞„É≠„Éº„Éê„É´,Information Technology,Software & Services,0.011702,218.60000,-55.700003,-0.254803
400848,2025-10-31,XRO-AU,B8P4LP4,AUSHTH1,„Çº„É≠,Information Technology,Software & Services,0.020128,208.45848,-2004.614013,-9.616371
400850,2025-10-31,XYZ-US,BYNZGK1,USBDBN1,"Block, Inc. Class A",Financials,Financial Services,0.054288,865.35900,-1336.948000,-1.544963


#### 1. „Éï„Ç°„ÇØ„Çø„ÉºÂÄ§„ÇíË®àÁÆó„Åó„Å¶„Éá„Éº„Çø„Éô„Éº„Çπ„Å´‰øùÂ≠ò„Åô„Çã


In [None]:
factor_list = ["FF_DEBT_EQ", "FF_LIABS_SHLDRS_EQ", "FF_NET_DEBT"]

In [None]:
# -----------------------------------
# load data
# -----------------------------------
# ÊßãÊàêÈäòÊüÑÊÉÖÂ†±
df_weight = factset_utils.load_index_constituents(
    factset_index_db_path=factset_index_db_path, UNIVERSE_CODE=UNIVERSE_CODE
).assign(date=lambda x: pd.to_datetime(x["date"]))

factset_utils.process_rank_calculation_store_to_db(
    df_weight=df_weight, factor_list=factor_list, financials_db_path=financials_db_path
)

[autoreload of src.factset_utils failed: Traceback (most recent call last):
  File "c:\Users\Yuki Hata\Desktop\papers\.venv\Lib\site-packages\IPython\extensions\autoreload.py", line 322, in check
    elif self.deduper_reloader.maybe_reload_module(m):
         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^
  File "c:\Users\Yuki Hata\Desktop\papers\.venv\Lib\site-packages\IPython\extensions\deduperreload\deduperreload.py", line 545, in maybe_reload_module
    new_source_code = f.read()
UnicodeDecodeError: 'cp932' codec can't decode byte 0x87 in position 725: illegal multibyte sequence
]


üöÄ Âá¶ÁêÜÈñãÂßã: 3 ‰ª∂„ÅÆ„Çø„Çπ„ÇØ


RankË®àÁÆóÈÄ≤Êçó: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3/3 [00:32<00:00, 10.69s/it]


üéâ ÂÖ®„Å¶„ÅÆ„É©„É≥„ÇØË®àÁÆó„Éª‰øùÂ≠ò„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü


„Éá„Éº„Çø„É≠„Éº„Éâ&Ê¨†ÊêçÂÄ§Á¢∫Ë™ç: Ê¨†ÊêçÂÄ§„ÅØ„Çª„ÇØ„Çø„Éº‰∏≠Â§ÆÂÄ§„ÅßË£úÂÆå


In [None]:
# „Éï„Ç°„ÇØ„Çø„ÉºË®àÁÆó„ÅÆ„Åü„ÇÅ„ÅÆ„Éá„Ç£„Çπ„ÇØ„É™„Éó„Çø„Éº„ÇíÊåáÂÆö
factor_list = [
    "FF_DEBT_EQ_Inv_PctRank",
    "FF_LIABS_SHLDRS_EQ_Inv_PctRank",
    "FF_NET_DEBT_Inv_PctRank",
]

# -----------------------------------
# load data
# -----------------------------------

df_factor = factset_utils.load_financial_data(
    financials_db_path=financials_db_path, factor_list=factor_list
)

df_weight = factset_utils.load_index_constituents(
    factset_index_db_path=factset_index_db_path, UNIVERSE_CODE=UNIVERSE_CODE
)
df = (
    factset_utils.merge_idx_constituents_and_financials(
        df_weight=df_weight, df_factor=df_factor
    )
    .fillna(np.nan)
    .dropna(subset=factor_list, how="all")
    .dropna(subset=["Weight (%)"], ignore_index=True)
)
display(df.head())

df = factset_utils.check_missing_value_and_fill_by_sector_median(
    df=df, factor_list=factor_list
)

Unnamed: 0,date,P_SYMBOL,SEDOL,Asset ID,FG_COMPANY_NAME,GICS Sector,GICS Industry Group,Weight (%),FF_DEBT_EQ_Inv_PctRank,FF_LIABS_SHLDRS_EQ_Inv_PctRank,FF_NET_DEBT_Inv_PctRank
0,2005-08-31,0HSW-GB,3335442,UKIGDP1,Telent PLC,Information Technology,Technology Hardware & Equipment,0.005496,0.542636,0.578947,
1,2005-08-31,0II3.XX1-GB,299303,UKIBKB1,Emap Plc,Consumer Discretionary,Media,0.019501,0.197531,0.758755,
2,2005-08-31,0MDJ-GB,610700,UKIBBF1,Cadbury PLC,Consumer Staples,Food Beverage & Tobacco,0.106288,0.108696,0.234043,0.12987
3,2005-08-31,0N1N-GB,230346,UKIBGE1,Arriva Plc Ord,Industrials,Transportation,0.010309,0.323671,0.671429,0.564246
4,2005-08-31,0N3I-GB,896265,UKIDBM1,Tomkins PLC,Industrials,Capital Goods,0.020285,0.328502,0.580952,0.519553


üìã Ê¨†ÊêçÂÄ§„ÅÆÁä∂Ê≥ÅÔºàË£úÂÆåÂâçÔºâ
FF_DEBT_EQ_Inv_PctRank                       :  7,872‰ª∂ (  2.5%)
FF_LIABS_SHLDRS_EQ_Inv_PctRank               :    300‰ª∂ (  0.1%)
FF_NET_DEBT_Inv_PctRank                      : 18,313‰ª∂ (  5.8%)

‚è≥ „Çª„ÇØ„Çø„Éº‰∏≠Â§ÆÂÄ§„ÅßË£úÂÆå‰∏≠...

üìã Ê¨†ÊêçÂÄ§„ÅÆÁä∂Ê≥ÅÔºà„Çª„ÇØ„Çø„Éº‰∏≠Â§ÆÂÄ§Ë£úÂÆåÂæåÔºâ
‚úÖ FF_DEBT_EQ_Inv_PctRank                       :      0‰ª∂ (  0.0%) | Ë£úÂÆå: 7,872‰ª∂
‚úÖ FF_LIABS_SHLDRS_EQ_Inv_PctRank               :      0‰ª∂ (  0.0%) | Ë£úÂÆå: 300‰ª∂
‚úÖ FF_NET_DEBT_Inv_PctRank                      :      0‰ª∂ (  0.0%) | Ë£úÂÆå: 18,313‰ª∂

‚úÖ ÊúÄÁµÇÊ¨†ÊêçÂÄ§„ÉÅ„Çß„ÉÉ„ÇØ
‚úÖ FF_DEBT_EQ_Inv_PctRank                       : Ê¨†Êêç„Å™„Åó
‚úÖ FF_LIABS_SHLDRS_EQ_Inv_PctRank               : Ê¨†Êêç„Å™„Åó
‚úÖ FF_NET_DEBT_Inv_PctRank                      : Ê¨†Êêç„Å™„Åó
üéâ „Åô„Åπ„Å¶„ÅÆÊ¨†ÊêçÂÄ§„ÅåË£úÂÆå„Åï„Çå„Åæ„Åó„ÅüÔºÅ


„Éï„Ç°„ÇØ„Çø„ÉºË®àÁÆó


In [None]:
# -----------------------------------
# Factor„ÅÆË®àÁÆó
# -----------------------------------
# „Ç¶„Çß„Ç§„ÉàË®≠ÂÆö
blend_weight = {
    "FF_DEBT_EQ_Inv_PctRank": 1 / 3,
    "FF_LIABS_SHLDRS_EQ_Inv_PctRank": 1 / 3,
    "FF_NET_DEBT_Inv_PctRank": 1 / 3,
}
factor_name = "Factor_Leverage"

df = factset_utils.create_factor(
    df=df, factor_name=factor_name, blend_weight=blend_weight
)
display(df.tail(5))

# -----------------------------------
# Store to database
# -----------------------------------
for variable in [f"{factor_name}_Score", f"{factor_name}_Score_Rank"]:
    df_slice = (
        df[["date", "P_SYMBOL", variable]]
        .assign(variable=variable, date=lambda x: pd.to_datetime(x["date"]))
        .rename(columns={variable: "value"})
    )
    db_utils.delete_table_from_database(db_path=financials_db_path, table_name=variable)
    factset_utils.store_to_database(
        df=df_slice,
        db_path=financials_db_path,
        table_name=variable,
        verbose=True,
    )


üìä Factor Ë®àÁÆó
„Ç¶„Çß„Ç§„ÉàË®≠ÂÆö:
  FF_DEBT_EQ_Inv_PctRank                       : 33.3%
  FF_LIABS_SHLDRS_EQ_Inv_PctRank               : 33.3%
  FF_NET_DEBT_Inv_PctRank                      : 33.3%

‚úÖ Score Ë®àÁÆóÂÆå‰∫Ü
   Âπ≥Âùá: 0.4959
   Ê®ôÊ∫ñÂÅèÂ∑Æ: 0.2305
   ÊúÄÂ∞èÂÄ§: 0.0000
   ÊúÄÂ§ßÂÄ§: 0.9831

üìä Rank ÂàÜÂ∏É:
  rank5: 63,534‰ª∂ ( 20.0%)
  rank4: 63,382‰ª∂ ( 20.0%)
  rank3: 63,389‰ª∂ ( 20.0%)
  rank2: 63,377‰ª∂ ( 20.0%)
  rank1: 63,478‰ª∂ ( 20.0%)

üéØ ÊúÄÁµÇ„Éá„Éº„Çø„Çµ„É≥„Éó„É´:
             date P_SYMBOL    SEDOL             GICS Sector  \
317140 2025-10-31   WTB-GB  B1KJJ40  Consumer Discretionary   
317141 2025-10-31   WTC-AU  BZ8GX83  Information Technology   
317142 2025-10-31  WTRG-US  BLCF3J9               Utilities   
317143 2025-10-31   WTW-US  BDB6Q21              Financials   
317144 2025-10-31    WY-US  2958936             Real Estate   
317145 2025-10-31     X-CA  B8KH5G7              Financials   
317146 2025-10-31   XEL-US  2614807               Ut

Unnamed: 0,date,P_SYMBOL,SEDOL,Asset ID,FG_COMPANY_NAME,GICS Sector,GICS Industry Group,Weight (%),FF_DEBT_EQ_Inv_PctRank,FF_LIABS_SHLDRS_EQ_Inv_PctRank,FF_NET_DEBT_Inv_PctRank,Factor_Leverage_Score,Factor_Leverage_Score_Rank
317155,2025-10-31,ZAL-DE,BQV0SV7,GERZQZ1,„Ç∂„É©„É≥„Éâ,Consumer Discretionary,Consumer Discretionary Distribution & Retail,0.008099,0.689189,0.717647,0.788235,0.731691,rank1
317156,2025-10-31,ZBH-US,2783815,USA4JT1,„Ç∏„É≥„Éû„Éº„Éª„Éê„Ç§„Ç™„É°„ÉÉ„Éà„Éª„Éõ„Éº„É´„Éá„Ç£„É≥„Ç∞„Çπ,Health Care,Health Care Equipment & Services,0.025627,0.536842,0.465347,0.39604,0.466076,rank3
317157,2025-10-31,ZBRA-US,2989356,USAP8H1,„Çº„Éñ„É©„Éª„ÉÜ„ÇØ„Éé„É≠„Ç∏„Éº„Ç∫„Éª„Ç≥„Éº„Éù„É¨„Éº„Ç∑„Éß„É≥ Class A,Information Technology,Technology Hardware & Equipment,0.017637,0.3625,0.650602,0.421687,0.478263,rank3
317158,2025-10-31,ZTS-US,B95WG16,USBANZ1,„Çæ„Ç®„ÉÜ„Ç£„Çπ Class A,Health Care,Pharmaceuticals Biotechnology & Life Sciences,0.08263,0.136842,0.584158,0.465347,0.395449,rank4
317159,2025-10-31,ZURN-CH,5983816,SWIAFM2,„ÉÅ„É•„Éº„É™„ÉÉ„Éí„Éª„Ç§„É≥„Ç∑„É•„Ç¢„É©„É≥„Çπ„Éª„Ç∞„É´„Éº„Éó,Financials,Insurance,0.131275,0.583333,0.304124,0.371134,0.41953,rank4


Êó¢Â≠ò„ÅÆ 0 Ë°å„Å®„ÅÆÈáçË§á„Çí„ÉÅ„Çß„ÉÉ„ÇØ„Åó„Åæ„Åó„Åü„ÄÇ317137 Ë°å„ÇíÊñ∞„Åü„Å´ËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> Factor_Leverage_Score: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
Êó¢Â≠ò„ÅÆ 0 Ë°å„Å®„ÅÆÈáçË§á„Çí„ÉÅ„Çß„ÉÉ„ÇØ„Åó„Åæ„Åó„Åü„ÄÇ317137 Ë°å„ÇíÊñ∞„Åü„Å´ËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> Factor_Leverage_Score_Rank: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ


### 4-9. Momentum/Reversal Factor


#### 1. Âå∫Èñì„É™„Çø„Éº„É≥„ÇíË®àÁÆó


In [None]:
# ----------------------------------------
# load price data
# ----------------------------------------
df_price = (
    roic_utils.load_FG_PRICE(db_path=financials_db_path)
    .reset_index()
    .astype({"P_SYMBOL": "category"})
    .assign(date=lambda x: pd.to_datetime(x["date"]))
).sort_values(["date", "P_SYMBOL"], ignore_index=True)

# ----------------------------------------
# Âå∫Èñì„É™„Çø„Éº„É≥„ÇíË®àÁÆó
# ----------------------------------------

df_calculated = roic_utils.calculate_interval_returns(df=df_price).drop(
    columns=["FG_PRICE"]
)
target_columns = [c for c in df_calculated.columns if c not in ["date", "P_SYMBOL"]]

# store to database
for col_name in target_columns:
    df_slice = df_calculated[["date", "P_SYMBOL", col_name]].copy()
    df_slice = (
        df_slice.rename(columns={col_name: "value"})
        .assign(variable=col_name)
        .reindex(columns=["date", "P_SYMBOL", "variable", "value"])
    )
    df_slice = df_slice.dropna(subset=["value"])
    factset_utils.store_to_database(
        df=df_slice, db_path=financials_db_path, table_name=col_name
    )

Êó¢Â≠ò„ÅÆ 0 Ë°å„Å®„ÅÆÈáçË§á„Çí„ÉÅ„Çß„ÉÉ„ÇØ„Åó„Åæ„Åó„Åü„ÄÇ354363 Ë°å„ÇíÊñ∞„Åü„Å´ËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> Return_1YAgo_to_Current: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
Êó¢Â≠ò„ÅÆ 0 Ë°å„Å®„ÅÆÈáçË§á„Çí„ÉÅ„Çß„ÉÉ„ÇØ„Åó„Åæ„Åó„Åü„ÄÇ355091 Ë°å„ÇíÊñ∞„Åü„Å´ËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> Return_1YAgo_to_1MAgo: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
Êó¢Â≠ò„ÅÆ 0 Ë°å„Å®„ÅÆÈáçË§á„Çí„ÉÅ„Çß„ÉÉ„ÇØ„Åó„Åæ„Åó„Åü„ÄÇ338974 Ë°å„ÇíÊñ∞„Åü„Å´ËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> Return_2YAgo_to_1YAgo: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
Êó¢Â≠ò„ÅÆ 0 Ë°å„Å®„ÅÆÈáçË§á„Çí„ÉÅ„Çß„ÉÉ„ÇØ„Åó„Åæ„Åó„Åü„ÄÇ318456 Ë°å„ÇíÊñ∞„Åü„Å´ËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> Return_3YAgo_to_2YAgo: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
Êó¢Â≠ò„ÅÆ 0 Ë°å„Å®„ÅÆÈáçË§á„Çí„ÉÅ„Çß„ÉÉ„ÇØ„Åó„Åæ„Åó„Åü„ÄÇ310497 Ë°å„ÇíÊñ∞„Åü„Å´ËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> Return_3YAgo_to_1YAgo: „Éá„Éº„Çø„ÅÆÊõ∏„ÅçËæº„Åø„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü„ÄÇ
Êó¢Â≠ò„ÅÆ 0 Ë°å„Å®„ÅÆÈáçË§á„Çí„ÉÅ„Çß„ÉÉ„ÇØ„Åó„Åæ„Åó„Åü„ÄÇ381041 Ë°å„ÇíÊñ∞„Åü„Å´ËøΩÂä†„Åó„Åæ„Åô„ÄÇ
  -> Return_1MAgo_to_Curr

#### 2. „Éï„Ç°„ÇØ„Çø„ÉºÂÄ§„ÇíË®àÁÆó„Åó„Å¶ DB ‰øùÂ≠ò


In [None]:
# inverse = False
momentum_descriptor_list = [
    "Return_1YAgo_to_Current",
    "Return_1YAgo_to_1MAgo",
    "Return_2YAgo_to_1YAgo",
    "Return_3YAgo_to_2YAgo",
]

# inverse = True
reversal_descriptor_list = [
    "Return_3YAgo_to_1YAgo",
    "Return_1MAgo_to_Current",
]

In [None]:
# -----------------------------------
# load data
# -----------------------------------
# ÊßãÊàêÈäòÊüÑÊÉÖÂ†±
df_weight = factset_utils.load_index_constituents(
    factset_index_db_path=factset_index_db_path, UNIVERSE_CODE=UNIVERSE_CODE
).assign(date=lambda x: pd.to_datetime(x["date"]))

# -----------------------------------
# Momentum -> inversed=False
# -----------------------------------
# „Çª„ÇØ„Çø„Éº‰∏≠Á´ã„Å™„Åó
# process_rank_calculation_store_to_db(
#     df_weight=df_weight,
#     factor_list=momentum_descriptor_list,
#     financials_db_path=financials_db_path,
#     sector_neutral_mode=False,
#     inversed=False,
# )

# # „Çª„ÇØ„Çø„Éº‰∏≠Á´ã„ÅÇ„Çä
# process_rank_calculation_store_to_db(
#     df_weight=df_weight,
#     factor_list=momentum_descriptor_list,
#     financials_db_path=financials_db_path,
#     sector_neutral_mode=True,
#     inversed=False,
# )

# -----------------------------------
# Reversal -> inversed=True
# -----------------------------------
# „Çª„ÇØ„Çø„Éº‰∏≠Á´ã„Å™„Åó
factset_utils.process_rank_calculation_store_to_db(
    df_weight=df_weight,
    factor_list=reversal_descriptor_list,
    financials_db_path=financials_db_path,
    sector_neutral_mode=False,
    inversed=True,
)

# „Çª„ÇØ„Çø„Éº‰∏≠Á´ã„ÅÇ„Çä
factset_utils.process_rank_calculation_store_to_db(
    df_weight=df_weight,
    factor_list=reversal_descriptor_list,
    financials_db_path=financials_db_path,
    sector_neutral_mode=True,
    inversed=True,
)

üöÄ Âá¶ÁêÜÈñãÂßã: 2 ‰ª∂„ÅÆ„Çø„Çπ„ÇØ (Single Factor Mode)


RankË®àÁÆóÈÄ≤Êçó: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:15<00:00,  7.53s/it]


üéâ ÂÖ®„Å¶„ÅÆ„É©„É≥„ÇØË®àÁÆó„Éª‰øùÂ≠ò„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü
üöÄ Âá¶ÁêÜÈñãÂßã: 2 ‰ª∂„ÅÆ„Çø„Çπ„ÇØ (Single Factor Mode)


RankË®àÁÆóÈÄ≤Êçó: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:17<00:00,  8.76s/it]


üéâ ÂÖ®„Å¶„ÅÆ„É©„É≥„ÇØË®àÁÆó„Éª‰øùÂ≠ò„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü


#### 3. „Éá„Éº„Çø„É≠„Éº„Éâ&Ê¨†ÊêçÂÄ§Á¢∫Ë™ç: Ê¨†ÊêçÂÄ§„ÅØ„Çª„ÇØ„Çø„Éº‰∏≠Â§ÆÂÄ§„ÅßË£úÂÆå


In [None]:
# „Éï„Ç°„ÇØ„Çø„ÉºË®àÁÆó„ÅÆ„Åü„ÇÅ„ÅÆ„Éá„Ç£„Çπ„ÇØ„É™„Éó„Çø„Éº„ÇíÊåáÂÆö
factor_list = [s + "_PctRank" for s in momentum_descriptor_list]

# -----------------------------------
# load data
# -----------------------------------

df_factor = factset_utils.load_financial_data(
    financials_db_path=financials_db_path, factor_list=factor_list
)

df_weight = factset_utils.load_index_constituents(
    factset_index_db_path=factset_index_db_path, UNIVERSE_CODE=UNIVERSE_CODE
)
df = (
    factset_utils.merge_idx_constituents_and_financials(
        df_weight=df_weight, df_factor=df_factor
    )
    .fillna(np.nan)
    .dropna(subset=factor_list, how="all")
    .dropna(subset=["Weight (%)"], ignore_index=True)
)
df = df.loc[df["date"].dt.year >= 2009]
display(df.head())

df = factset_utils.check_missing_value_and_fill_by_sector_median(
    df=df, factor_list=factor_list
)

Unnamed: 0,date,P_SYMBOL,SEDOL,Asset ID,FG_COMPANY_NAME,GICS Sector,GICS Industry Group,Weight (%),Return_1YAgo_to_1MAgo_PctRank,Return_1YAgo_to_Current_PctRank,Return_2YAgo_to_1YAgo_PctRank,Return_3YAgo_to_2YAgo_PctRank
30834,2009-03-31,0MDJ-GB,B2PF6M7,UKIBBF1,Cadbury PLC,Consumer Staples,Food Beverage & Tobacco,0.078896,0.737864,0.747573,0.247525,0.356436
30835,2009-03-31,0N3I-GB,0896265,UKIDBM1,Tomkins PLC,Industrials,Capital Goods,0.011803,0.722772,0.742574,0.071066,0.03125
30836,2009-03-31,0P7J-GB,0028262,UKIAPY1,Amec Foster Wheeler plc,Energy,Energy,0.01952,0.854545,0.790909,0.688073,0.924528
30837,2009-03-31,1-HK,6190273,HKGAAE1,CK„Éè„ÉÅ„ÇΩ„É≥„Éª„Éõ„Éº„É´„Éá„Ç£„É≥„Ç∞„Çπ,Financials,Real Estate,0.09194,0.711191,0.689531,0.923358,0.6875
30838,2009-03-31,10-HK,6408352,HKGAGG1,Hang Lung Group Limited,Financials,Real Estate,0.020283,0.696751,0.736462,0.985401,0.959559


üìã Ê¨†ÊêçÂÄ§„ÅÆÁä∂Ê≥ÅÔºàË£úÂÆåÂâçÔºâ
Return_1YAgo_to_Current_PctRank              :      6‰ª∂ (  0.0%)
Return_1YAgo_to_1MAgo_PctRank                :      0‰ª∂ (  0.0%)
Return_2YAgo_to_1YAgo_PctRank                :  2,146‰ª∂ (  1.2%)
Return_3YAgo_to_2YAgo_PctRank                :  4,703‰ª∂ (  2.6%)

‚è≥ „Çª„ÇØ„Çø„Éº‰∏≠Â§ÆÂÄ§„ÅßË£úÂÆå‰∏≠...

üìã Ê¨†ÊêçÂÄ§„ÅÆÁä∂Ê≥ÅÔºà„Çª„ÇØ„Çø„Éº‰∏≠Â§ÆÂÄ§Ë£úÂÆåÂæåÔºâ
‚úÖ Return_1YAgo_to_Current_PctRank              :      0‰ª∂ (  0.0%) | Ë£úÂÆå: 6‰ª∂
‚úÖ Return_1YAgo_to_1MAgo_PctRank                :      0‰ª∂ (  0.0%) | Ë£úÂÆå: 0‰ª∂
‚úÖ Return_2YAgo_to_1YAgo_PctRank                :      0‰ª∂ (  0.0%) | Ë£úÂÆå: 2,146‰ª∂
‚úÖ Return_3YAgo_to_2YAgo_PctRank                :      0‰ª∂ (  0.0%) | Ë£úÂÆå: 4,703‰ª∂

‚úÖ ÊúÄÁµÇÊ¨†ÊêçÂÄ§„ÉÅ„Çß„ÉÉ„ÇØ
‚úÖ Return_1YAgo_to_Current_PctRank              : Ê¨†Êêç„Å™„Åó
‚úÖ Return_1YAgo_to_1MAgo_PctRank                : Ê¨†Êêç„Å™„Åó
‚úÖ Return_2YAgo_to_1YAgo_PctRank                : Ê¨†Êêç„Å™„Åó
‚úÖ Return_3YAgo_to_2

#### Ôºàüöß Â∑•‰∫ã‰∏≠Ôºâ4. „Éï„Ç°„ÇØ„Çø„ÉºË®àÁÆó


### 4-10. „Åù„ÅÆ‰ªñ„ÅÆË≤°ÂãôÈ†ÖÁõÆ

ÊôÇÁÇπ„Åß„ÅÆ„É©„É≥„ÇØ„ÄÅ„Éë„Éº„Çª„É≥„Éà„É©„É≥„ÇØ„ÄÅZScore „ÅÆ„ÅøË®àÁÆó


In [None]:
# „Éï„Ç°„ÇØ„Çø„ÉºÂÄ§
factor_list = [
    "FF_ASSETS",
    "FF_BPS",
    "FF_BPS_TANG",
    "FF_CAPEX",
    "FF_CASH_ST",
    "FF_COGS",
    "FF_COM_EQ",
    "FF_CURR_RATIO",
    "FF_DEBT",
    "FF_DEBT_ENTRPR_VAL",
    "FF_DEBT_EQ",
    "FF_DEBT_LT",
    "FF_DEBT_ST",
    "FF_DEP_AMORT_EXP",
    "FF_DIV_YLD",
    "FF_DPS",
    "FF_EBITDA_OPER",
    "FF_EBITDA_OPER_MGN",
    "FF_EBIT_OPER",
    "FF_EBIT_OPER_MGN",
    "FF_ENTRPR_VAL_EBITDA_OPER",
    "FF_ENTRPR_VAL_EBIT_OPER",
    "FF_ENTRPR_VAL_SALES",
    "FF_EPS",
    "FF_EPS_DIL",
    "FF_FREE_CF",
    "FF_FREE_PS_CF",
    "FF_GROSS_INC",
    "FF_GROSS_MGN",
    "FF_INC_TAX",
    "FF_INT_EXP_NET",
    "FF_LIABS",
    "FF_LIABS_SHLDRS_EQ",
    "FF_MIN_INT_ACCUM",
    "FF_NET_DEBT",
    "FF_NET_INC",
    "FF_NET_MGN",
    "FF_OPER_CF",
    "FF_OPER_INC",
    "FF_OPER_MGN",
    "FF_OPER_PS_NET_CF",
    "FF_PAY_OUT_RATIO",
    "FF_PBK",
    "FF_PE",
    "FF_PFD_STK",
    "FF_PPE_NET",
    "FF_PSALES",
    "FF_PTX_INC",
    "FF_PTX_MGN",
    "FF_QUICK_RATIO",
    "FF_ROA",
    "FF_ROE",
    "FF_ROIC",
    "FF_ROTC",
    "FF_SALES",
    "FF_SALES_PS",
    "FF_SGA",
    "FF_SHLDRS_EQ",
    "FF_STK_OPT_EXP",
    "FF_STK_PURCH_CF",
    "FF_TAX_RATE",
    "FF_WKCAP",
]

In [None]:
# -----------------------------------
# load data
# -----------------------------------
# ÊßãÊàêÈäòÊüÑÊÉÖÂ†±
df_weight = factset_utils.load_index_constituents(
    factset_index_db_path=factset_index_db_path, UNIVERSE_CODE=UNIVERSE_CODE
).assign(date=lambda x: pd.to_datetime(x["date"]))

factset_utils.process_rank_calculation_store_to_db(
    df_weight=df_weight, factor_list=factor_list, financials_db_path=financials_db_path
)

RankË®àÁÆóÈÄ≤Êçó: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 62/62 [08:56<00:00,  8.65s/it]


üéâ ÂÖ®„Å¶„ÅÆ„É©„É≥„ÇØË®àÁÆó„Éª‰øùÂ≠ò„ÅåÂÆå‰∫Ü„Åó„Åæ„Åó„Åü


### ‚úÖ „Éá„Éº„Çø„Éô„Éº„ÇπÂÜÖÂÆπÁ¢∫Ë™ç


In [None]:
table_names = sorted(db_utils.get_table_names(db_path=financials_db_path))
print(f"ÂÖ®{len(table_names)}„ÉÜ„Éº„Éñ„É´")
display(table_names)

with sqlite3.connect(factset_index_db_path) as conn:
    df = pd.read_sql(
        f"SELECT * FROM {UNIVERSE_CODE} LIMIT 5", parse_dates=["date"], con=conn
    )
    display(df)
    display(df.columns)

ÂÖ®714„ÉÜ„Éº„Éñ„É´


['Active_Return_12M',
 'Active_Return_12M_annlzd',
 'Active_Return_1M',
 'Active_Return_1M_annlzd',
 'Active_Return_3M',
 'Active_Return_3M_annlzd',
 'Active_Return_3Y',
 'Active_Return_3Y_annlzd',
 'Active_Return_5Y',
 'Active_Return_5Y_annlzd',
 'Active_Return_6M',
 'Active_Return_6M_annlzd',
 'FF_ASSETS',
 'FF_ASSETS_CAGR_3Y',
 'FF_ASSETS_CAGR_3Y_PctRank',
 'FF_ASSETS_CAGR_3Y_Rank',
 'FF_ASSETS_CAGR_3Y_ZScore',
 'FF_ASSETS_CAGR_5Y',
 'FF_ASSETS_CAGR_5Y_PctRank',
 'FF_ASSETS_CAGR_5Y_Rank',
 'FF_ASSETS_CAGR_5Y_ZScore',
 'FF_ASSETS_PctRank',
 'FF_ASSETS_QoQ',
 'FF_ASSETS_QoQ_PctRank',
 'FF_ASSETS_QoQ_Rank',
 'FF_ASSETS_QoQ_ZScore',
 'FF_ASSETS_Rank',
 'FF_ASSETS_YoY',
 'FF_ASSETS_YoY_PctRank',
 'FF_ASSETS_YoY_Rank',
 'FF_ASSETS_YoY_ZScore',
 'FF_ASSETS_ZScore',
 'FF_BPS',
 'FF_BPS_PctRank',
 'FF_BPS_Rank',
 'FF_BPS_TANG',
 'FF_BPS_TANG_PctRank',
 'FF_BPS_TANG_Rank',
 'FF_BPS_TANG_ZScore',
 'FF_BPS_ZScore',
 'FF_CAPEX',
 'FF_CAPEX_CAGR_3Y',
 'FF_CAPEX_CAGR_3Y_PctRank',
 'FF_CAPEX_CAGR_3

Unnamed: 0,Universe,Universe_code_BPM,date,Name,Bloomberg Ticker,BloombergID,Asset ID,Asset ID Type,SEDOL,Country,...,FG_COMPANY_NAME_CUSIP,P_SYMBOL_CUSIP,ISIN,FG_COMPANY_NAME_ISIN,P_SYMBOL_ISIN,CODE_JP,FG_COMPANY_NAME_CODE_JP,P_SYMBOL_CODE_JP,P_SYMBOL,FG_COMPANY_NAME
0,MSCI KOKUSAI - Daily,MSXJPN_AD,2000-01-31,21ST CENTURY FOX,,,AUSBIN2,BARRAID,662075,AUS,...,,,,,,,,,FOXLV-AU,Twenty-First Century Fox Inc. Class A CDI
1,MSCI KOKUSAI - Daily,MSXJPN_AD,2000-01-31,21ST CENTURY FOX,,,AUSBIN1,BARRAID,688692,AUS,...,,,,,,,,,FOX-AU,Twenty-First Century Fox Inc. Class B CDI
2,MSCI KOKUSAI - Daily,MSXJPN_AD,2000-01-31,3I GROUP PLC,,,UKIENL1,BARRAID,888693,GBR,...,„Çπ„É™„Éº„Ç¢„Ç§„Éª„Ç∞„É´„Éº„Éó,TGOPF-US,GB0008886938,„Çπ„É™„Éº„Ç¢„Ç§„Éª„Ç∞„É´„Éº„Éó,III-GB,,,,III-GB,„Çπ„É™„Éº„Ç¢„Ç§„Éª„Ç∞„É´„Éº„Éó
3,MSCI KOKUSAI - Daily,MSXJPN_AD,2000-01-31,3M CO,,,USAJ8P1,BARRAID,2595708,USA,...,3M„Ç´„É≥„Éë„Éã„Éº,MMM-US,US6040591058,3M„Ç´„É≥„Éë„Éã„Éº,MMM-US,,,,MMM-US,3M„Ç´„É≥„Éë„Éã„Éº
4,MSCI KOKUSAI - Daily,MSXJPN_AD,2000-01-31,ABB LTD,,,SWIAAN1,BARRAID,5661190,CHE,...,ABB,,CH0003846620,ABB,ABBN-CH,,,,ABBN-CH,ABB


Index(['Universe', 'Universe_code_BPM', 'date', 'Name', 'Bloomberg Ticker',
       'BloombergID', 'Asset ID', 'Asset ID Type', 'SEDOL', 'Country',
       'GICS Sector', 'GICS Industry', 'GICS Industry Group',
       'GICS Sub-Industry', 'Holdings', 'Weight (%)', 'Mkt Value',
       'FG_COMPANY_NAME_SEDOL', 'P_SYMBOL_SEDOL', 'CUSIP',
       'FG_COMPANY_NAME_CUSIP', 'P_SYMBOL_CUSIP', 'ISIN',
       'FG_COMPANY_NAME_ISIN', 'P_SYMBOL_ISIN', 'CODE_JP',
       'FG_COMPANY_NAME_CODE_JP', 'P_SYMBOL_CODE_JP', 'P_SYMBOL',
       'FG_COMPANY_NAME'],
      dtype='object')

## 5. Ê¨†ÊêçÁ¢∫Ë™ç


### ‚úÖ „Éá„Éº„Çø„Éô„Éº„ÇπÁ¢∫Ë™ç


In [None]:
table_names = sorted(db_utils.get_table_names(db_path=financials_db_path))
print(f"ÂÖ®{len(table_names)}„ÉÜ„Éº„Éñ„É´")
display(table_names)

with sqlite3.connect(factset_index_db_path) as conn:
    df_weight = pd.read_sql(
        f"SELECT * FROM {UNIVERSE_CODE}", parse_dates=["date"], con=conn
    )
    display(df_weight.columns)
    display(df_weight.tail(5))

ÂÖ®454„ÉÜ„Éº„Éñ„É´


['Active_Return_1M',
 'Active_Return_1M_annlzd',
 'Active_Return_3M',
 'Active_Return_3M_annlzd',
 'Active_Return_3Y',
 'Active_Return_3Y_annlzd',
 'Active_Return_5Y',
 'Active_Return_5Y_annlzd',
 'Active_Return_6M',
 'Active_Return_6M_annlzd',
 'FF_ASSETS',
 'FF_ASSETS_CAGR_3Y',
 'FF_ASSETS_CAGR_3Y_PctRank',
 'FF_ASSETS_CAGR_3Y_Rank',
 'FF_ASSETS_CAGR_3Y_ZScore',
 'FF_ASSETS_CAGR_5Y',
 'FF_ASSETS_CAGR_5Y_PctRank',
 'FF_ASSETS_CAGR_5Y_Rank',
 'FF_ASSETS_CAGR_5Y_ZScore',
 'FF_ASSETS_PctRank',
 'FF_ASSETS_QoQ',
 'FF_ASSETS_QoQ_PctRank',
 'FF_ASSETS_QoQ_Rank',
 'FF_ASSETS_QoQ_ZScore',
 'FF_ASSETS_Rank',
 'FF_ASSETS_YoY',
 'FF_ASSETS_YoY_PctRank',
 'FF_ASSETS_YoY_Rank',
 'FF_ASSETS_YoY_ZScore',
 'FF_ASSETS_ZScore',
 'FF_BPS',
 'FF_BPS_PctRank',
 'FF_BPS_Rank',
 'FF_BPS_TANG',
 'FF_BPS_TANG_PctRank',
 'FF_BPS_TANG_Rank',
 'FF_BPS_TANG_ZScore',
 'FF_BPS_ZScore',
 'FF_CAPEX',
 'FF_CAPEX_PctRank',
 'FF_CAPEX_Rank',
 'FF_CAPEX_ZScore',
 'FF_CASH_ST',
 'FF_CASH_ST_PctRank',
 'FF_CASH_ST_Rank',
 

Index(['Universe', 'Universe_code_BPM', 'date', 'Name', 'Bloomberg Ticker',
       'BloombergID', 'Asset ID', 'Asset ID Type', 'SEDOL', 'Country',
       'GICS Sector', 'GICS Industry', 'GICS Industry Group',
       'GICS Sub-Industry', 'Holdings', 'Weight (%)', 'Mkt Value',
       'FG_COMPANY_NAME_SEDOL', 'P_SYMBOL_SEDOL', 'CUSIP',
       'FG_COMPANY_NAME_CUSIP', 'P_SYMBOL_CUSIP', 'ISIN',
       'FG_COMPANY_NAME_ISIN', 'P_SYMBOL_ISIN', 'CODE_JP',
       'FG_COMPANY_NAME_CODE_JP', 'P_SYMBOL_CODE_JP', 'P_SYMBOL',
       'FG_COMPANY_NAME'],
      dtype='object')

Unnamed: 0,Universe,Universe_code_BPM,date,Name,Bloomberg Ticker,BloombergID,Asset ID,Asset ID Type,SEDOL,Country,...,FG_COMPANY_NAME_CUSIP,P_SYMBOL_CUSIP,ISIN,FG_COMPANY_NAME_ISIN,P_SYMBOL_ISIN,CODE_JP,FG_COMPANY_NAME_CODE_JP,P_SYMBOL_CODE_JP,P_SYMBOL,FG_COMPANY_NAME
400859,MSCI KOKUSAI - Daily,MSXJPN_AD,2025-10-31,ZIMMER BIOMET HOLDINGS INC,,,USA4JT1,BARRAID,2783815,USA,...,„Ç∏„É≥„Éû„Éº„Éª„Éê„Ç§„Ç™„É°„ÉÉ„Éà„Éª„Éõ„Éº„É´„Éá„Ç£„É≥„Ç∞„Çπ,ZBH-US,US98956P1021,„Ç∏„É≥„Éû„Éº„Éª„Éê„Ç§„Ç™„É°„ÉÉ„Éà„Éª„Éõ„Éº„É´„Éá„Ç£„É≥„Ç∞„Çπ,ZBH-US,,,,ZBH-US,„Ç∏„É≥„Éû„Éº„Éª„Éê„Ç§„Ç™„É°„ÉÉ„Éà„Éª„Éõ„Éº„É´„Éá„Ç£„É≥„Ç∞„Çπ
400860,MSCI KOKUSAI - Daily,MSXJPN_AD,2025-10-31,ZOETIS INC,,,USBANZ1,BARRAID,B95WG16,USA,...,„Çæ„Ç®„ÉÜ„Ç£„Çπ Class A,ZTS-US,US98978V1035,„Çæ„Ç®„ÉÜ„Ç£„Çπ Class A,ZTS-US,,,,ZTS-US,„Çæ„Ç®„ÉÜ„Ç£„Çπ Class A
400861,MSCI KOKUSAI - Daily,MSXJPN_AD,2025-10-31,ZOOM COMMUNICATIONS INC,,,USBEOV1,BARRAID,BGSP7M9,USA,...,„Ç∫„Éº„É†„Éª„Éì„Éá„Ç™„Éª„Ç≥„Éü„É•„Éã„Ç±„Éº„Ç∑„Éß„É≥„Ç∫ Class A,ZM-US,US98980L1017,„Ç∫„Éº„É†„Éª„Éì„Éá„Ç™„Éª„Ç≥„Éü„É•„Éã„Ç±„Éº„Ç∑„Éß„É≥„Ç∫ Class A,ZM-US,,,,ZM-US,„Ç∫„Éº„É†„Éª„Éì„Éá„Ç™„Éª„Ç≥„Éü„É•„Éã„Ç±„Éº„Ç∑„Éß„É≥„Ç∫ Class A
400862,MSCI KOKUSAI - Daily,MSXJPN_AD,2025-10-31,ZSCALER INC,,,USBDYI1,BARRAID,BZ00V34,USA,...,„Çº„ÉÉ„Éà„Çπ„Ç±„Ç§„É©„Éº,ZS-US,US98980G1022,„Çº„ÉÉ„Éà„Çπ„Ç±„Ç§„É©„Éº,ZS-US,,,,ZS-US,„Çº„ÉÉ„Éà„Çπ„Ç±„Ç§„É©„Éº
400863,MSCI KOKUSAI - Daily,MSXJPN_AD,2025-10-31,ZURICH INSURANCE GROUP AG,,,SWIAFM2,BARRAID,5983816,CHE,...,„ÉÅ„É•„Éº„É™„ÉÉ„Éí„Éª„Ç§„É≥„Ç∑„É•„Ç¢„É©„É≥„Çπ„Éª„Ç∞„É´„Éº„Éó,ZFSVF-US,CH0011075394,„ÉÅ„É•„Éº„É™„ÉÉ„Éí„Éª„Ç§„É≥„Ç∑„É•„Ç¢„É©„É≥„Çπ„Éª„Ç∞„É´„Éº„Éó,ZURN-CH,,,,ZURN-CH,„ÉÅ„É•„Éº„É™„ÉÉ„Éí„Éª„Ç§„É≥„Ç∑„É•„Ç¢„É©„É≥„Çπ„Éª„Ç∞„É´„Éº„Éó


In [None]:
# ÂêÑÂ§âÊï∞„ÇíÂá¶ÁêÜ„Åô„ÇãÈñ¢Êï∞
def process_variable(variable, financials_db_path, df_weight):
    with sqlite3.connect(financials_db_path) as conn:
        df_factor = pd.read_sql(
            f"SELECT `date`, `P_SYMBOL`, `value` FROM `{variable}`",
            con=conn,
            parse_dates=["date"],
        )

    merged_df = (
        pd.merge(df_weight, df_factor, on=["date", "P_SYMBOL"], how="outer")
        .rename(columns={"value": variable})
        .dropna(subset=["Weight (%)", variable], how="any", axis=0)
    ).fillna(np.nan)

    g = (
        pd.DataFrame(merged_df.groupby(["date"])["Weight (%)"].agg("sum"))
        .reset_index()
        .assign(variable=variable)
    )

    return g

In [None]:
dfs_weight_sum = []
with ThreadPoolExecutor(max_workers=4) as executor:
    future_to_var = {
        executor.submit(process_variable, var, financials_db_path, df_weight): var
        for var in table_names
    }

    for future in as_completed(future_to_var):
        variable = future_to_var[future]
        try:
            result = future.result()
            if result is not None:
                dfs_weight_sum.append(result)
                print(f"‚úì Completed: {variable}")
        except Exception as e:
            print(f"‚úó Failed {variable}: {e}")

‚úì Completed: Active_Return_3M
‚úì Completed: Active_Return_1M_annlzd
‚úì Completed: Active_Return_1M
‚úì Completed: Active_Return_3M_annlzd
‚úì Completed: Active_Return_3Y
‚úì Completed: Active_Return_3Y_annlzd
‚úì Completed: Active_Return_5Y
‚úì Completed: Active_Return_5Y_annlzd
‚úì Completed: Active_Return_6M
‚úì Completed: FF_ASSETS
‚úì Completed: Active_Return_6M_annlzd
‚úì Completed: FF_ASSETS_CAGR_3Y
‚úì Completed: FF_ASSETS_CAGR_5Y
‚úì Completed: FF_ASSETS_CAGR_3Y_Rank
‚úì Completed: FF_ASSETS_CAGR_3Y_PctRank
‚úì Completed: FF_ASSETS_CAGR_3Y_ZScore
‚úì Completed: FF_ASSETS_CAGR_5Y_PctRank
‚úì Completed: FF_ASSETS_CAGR_5Y_Rank
‚úì Completed: FF_ASSETS_CAGR_5Y_ZScore
‚úì Completed: FF_ASSETS_PctRank
‚úì Completed: FF_ASSETS_QoQ
‚úì Completed: FF_ASSETS_QoQ_Rank
‚úì Completed: FF_ASSETS_QoQ_PctRank
‚úì Completed: FF_ASSETS_QoQ_ZScore
‚úì Completed: FF_ASSETS_Rank
‚úì Completed: FF_ASSETS_YoY
‚úì Completed: FF_ASSETS_YoY_PctRank
‚úì Completed: FF_ASSETS_YoY_Rank
‚úì Completed: FF

In [None]:
df_weight_sum = pd.concat(dfs_weight_sum).sort_values(
    ["date", "Weight (%)"], ignore_index=True
)
df_weight_sum = (
    pd.pivot(df_weight_sum, index=["date"], columns="variable", values="Weight (%)")
    .reset_index()
    .filter(regex="date|_Rank|_PctRank|_ZScore")
)
display(df_weight_sum)

output_path = BLOOMBERG_DATA_DIR / f"{UNIVERSE_CODE}_not_missing_weight.xlsx"
df_weight_sum.to_excel(output_path, index=False)

variable,date,FF_ASSETS_CAGR_3Y_PctRank,FF_ASSETS_CAGR_3Y_Rank,FF_ASSETS_CAGR_3Y_ZScore,FF_ASSETS_CAGR_5Y_PctRank,FF_ASSETS_CAGR_5Y_Rank,FF_ASSETS_CAGR_5Y_ZScore,FF_ASSETS_PctRank,FF_ASSETS_QoQ_PctRank,FF_ASSETS_QoQ_Rank,...,FF_STK_OPT_EXP_ZScore,FF_STK_PURCH_CF_PctRank,FF_STK_PURCH_CF_Rank,FF_STK_PURCH_CF_ZScore,FF_TAX_RATE_PctRank,FF_TAX_RATE_Rank,FF_TAX_RATE_ZScore,FF_WKCAP_PctRank,FF_WKCAP_Rank,FF_WKCAP_ZScore
0,2005-08-31,,,,,,,97.532947,,,...,15.983133,61.725108,61.725108,59.932037,93.525348,93.525348,93.525348,89.346444,89.346444,89.346444
1,2005-09-30,,,,,,,96.534935,,,...,15.871030,62.047600,62.047600,51.564443,93.419152,93.419152,93.419152,89.205829,89.205829,89.205829
2,2005-10-31,,,,,,,96.471981,,,...,16.097034,61.961402,61.961402,52.128179,93.798649,93.798649,93.798649,89.420052,89.420052,89.420052
3,2005-11-30,,,,,,,96.472460,97.503718,97.503718,...,16.909461,62.512187,62.512187,52.846167,93.905481,93.905481,93.905481,89.080571,89.080571,89.080571
4,2005-12-31,,,,,,,97.655170,97.996633,97.996633,...,20.332567,62.126714,62.126714,60.209553,91.484261,91.484261,91.484261,93.541597,93.541597,93.541597
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238,2025-06-30,99.000611,99.000611,99.000611,97.892807,97.892807,97.892807,99.504106,99.999989,99.999989,...,77.992514,97.671326,97.671326,93.137308,97.224061,97.224061,97.224061,99.363345,99.363345,99.363345
239,2025-07-31,99.073587,99.073587,99.073587,97.926685,97.926685,97.926685,99.627776,100.000012,100.000012,...,78.723921,97.738047,97.738047,93.201061,97.311733,97.311733,97.311733,99.490787,99.490787,99.490787
240,2025-08-31,99.090801,99.090801,99.090801,97.910641,97.910641,97.910641,99.625314,100.000009,100.000009,...,78.425465,97.698057,97.698057,93.258100,97.260548,97.260548,97.260548,99.488512,99.488512,99.488512
241,2025-09-30,98.275030,98.275030,98.275030,97.371288,97.371288,97.371288,98.765681,99.999991,99.999991,...,77.311625,96.750014,96.750014,92.399361,97.000819,97.000819,97.000819,98.463638,98.463638,98.463638


In [None]:
# Ê¨†ÊêçÁ¢∫Ë™çÔºàWeight (%)Ôºâ
dfs_weight_sum = []
with sqlite3.connect(financials_db_path) as conn:
    for variable in table_names:
        df_factor = pd.read_sql(
            f"SELECT `date`, `P_SYMBOL`, `value` FROM `{variable}`",
            con=conn,
            parse_dates=["date"],
        )
        merged_df = (
            pd.merge(df_weight, df_factor, on=["date", "P_SYMBOL"], how="outer")
            .rename(columns={"value": variable})
            .dropna(subset=["Weight (%)", variable], how="any", axis=0)
        ).fillna(np.nan)

        g = (
            pd.DataFrame(merged_df.groupby(["date"])["Weight (%)"].agg("sum"))
            .reset_index()
            .assign(variable=variable)
        )

        dfs_weight_sum.append(g)

df_weight_sum = pd.concat(dfs_weight_sum, ignore_index=True)
df_weight_sum = pd.pivot(
    df_weight_sum, index=["date"], columns="variable", values="Weight (%)"
).reset_index()
display(df_weight_sum)

KeyboardInterrupt: 