# Implement Factset and Bloomberg formulas


In [2]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import os, sys
from dotenv import load_dotenv
from pathlib import Path
from tqdm import tqdm
import yaml
import datetime
from dateutil.relativedelta import relativedelta
import warnings
warnings.simplefilter("ignore")


load_dotenv()
QUANTS_DIR = Path(os.environ.get("QUANTS_DIR")) # type: ignore
SRC_DIR = Path(os.environ.get("SRC_DIR")) # type: ignore
FACTSET_ROOT_DIR = Path(os.environ.get("FACTSET_ROOT_DIR")) # type: ignore
FACTSET_FINANCIALS_DIR = Path(os.environ.get("FACTSET_FINANCIALS_DIR")) # type: ignore
FACTSET_INDEX_CONSTITUENTS_DIR = Path(os.environ.get("FACTSET_INDEX_CONSTITUENTS_DIR")) # type: ignore
BPM_ROOT_DIR = Path(os.environ.get("BPM_ROOT_DIR")) # type: ignore
BPM_DATA_DIR = Path(os.environ.get("BPM_DATA_DIR")) # type: ignore
BPM_SRC_DIR = Path(os.environ.get("BPM_SRC_DIR")) # type: ignore
BLOOMBERG_DATA_DIR = Path(os.environ.get("BLOOMBERG_DATA_DIR")) # type: ignore
BLOOMBERG_PRICE_DIR = Path(os.environ.get("BLOOMBERG_PRICE_DIR")) # type: ignore

sys.path.insert(0, str(QUANTS_DIR))
import src.factset_utils as factset_utils
import src.implement_FS_BBG_formulas_utils as implement_utils
import src.bloomberg_utils as bloomberg_utils


formula_xlsx = FACTSET_ROOT_DIR / "FDS samples and Factset Formulas.xlsx"
with open(SRC_DIR / "BPM_Index-code-map.yaml") as f:
    bpm_code_map = yaml.safe_load(f)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 1. BPM からダウンロードした Index 構成銘柄の paruqet ファイルを読み取る

-   (Universe name)\_Constituents.parquet から Factset の P_SYMBOL と FG_COMPANY_NAME をダウンロードするエクセルファイルを作成。
-   その後、すべてのインデックスをまとめて parquet ファイルに保存。


In [None]:
start_date = "2000-01-31"
end_date = "2025-11-30"
# 対象インデックスディレクトリ
index_dir = [
    s for s in list(BPM_DATA_DIR.iterdir()) if (s.is_dir()) & (s.name.startswith("MS"))
]
display(index_dir)
factset_utils.load_bpm_and_export_factset_code_file(
    index_dir=index_dir, start_date=start_date, end_date=end_date
)


[WindowsPath('H:/HataY/03_Work/Quants/data/BPM/data/MSCI AC ASEAN - Daily'),
 WindowsPath('H:/HataY/03_Work/Quants/data/BPM/data/MSCI AC ASIA - Daily'),
 WindowsPath('H:/HataY/03_Work/Quants/data/BPM/data/MSCI AC ASIA ex JAPAN - Daily'),
 WindowsPath('H:/HataY/03_Work/Quants/data/BPM/data/MSCI AC ASIA PACIFIC - Daily'),
 WindowsPath('H:/HataY/03_Work/Quants/data/BPM/data/MSCI AC ASIA PACIFIC ex JAPAN- Daily'),
 WindowsPath('H:/HataY/03_Work/Quants/data/BPM/data/MSCI ACWI - Daily'),
 WindowsPath('H:/HataY/03_Work/Quants/data/BPM/data/MSCI HONG KONG - Daily'),
 WindowsPath('H:/HataY/03_Work/Quants/data/BPM/data/MSCI INDIA - Daily'),
 WindowsPath('H:/HataY/03_Work/Quants/data/BPM/data/MSCI INDONESIA - Daily'),
 WindowsPath('H:/HataY/03_Work/Quants/data/BPM/data/MSCI KOKUSAI - Daily'),
 WindowsPath('H:/HataY/03_Work/Quants/data/BPM/data/MSCI KOREA - Daily'),
 WindowsPath('H:/HataY/03_Work/Quants/data/BPM/data/MSCI MALAYSIA - Daily'),
 WindowsPath('H:/HataY/03_Work/Quants/data/BPM/data/MSCI

BPMから取得した全構成銘柄のエクスポート:
	 A file has been exported. -> H:\HataY\03_Work\Quants\data\BPM\data\Index_Constituents.parquet
Factset銘柄コードダウンロード用のExcel file:
	 A file has been exported -> H:\HataY\03_Work\Quants\data\Factset\Index_Constituents\Index_Constituents_Factset_code_DL.xlsx


## 2.（Factset でデータダウンロード後）ファイル統合

-   FACTSET のダウンロードが終わったら、Excel を CSV 出力しておくこと。


### 2-1. Factset コード統合


In [None]:
factset_utils.unify_factset_code_data()


MSACAPFAD_Index_Constituents_with_Factset_code.parquet has been exported.
MSAPFXJ_AD_Index_Constituents_with_Factset_code.parquet has been exported.
MSASD_Index_Constituents_with_Factset_code.parquet has been exported.
MSAWIF_AD_Index_Constituents_with_Factset_code.parquet has been exported.
MSCAFXJAD_Index_Constituents_with_Factset_code.parquet has been exported.
MSCIHKGD_Index_Constituents_with_Factset_code.parquet has been exported.
MSCIINDD_Index_Constituents_with_Factset_code.parquet has been exported.
MSCISIND_Index_Constituents_with_Factset_code.parquet has been exported.
MSFIDND_Index_Constituents_with_Factset_code.parquet has been exported.
MSFKORD_Index_Constituents_with_Factset_code.parquet has been exported.
MSFMALD_Index_Constituents_with_Factset_code.parquet has been exported.
MSFPHID_Index_Constituents_with_Factset_code.parquet has been exported.
MSFTAID_Index_Constituents_with_Factset_code.parquet has been exported.
MSFTHAD_Index_Constituents_with_Factset_code.parquet h

### 2-2. Bloomberg から Ticker と FIGI 取得

-   blpapi を使用してダウンロード
-   これにて最終的な構成銘柄の全データ収集完了


In [None]:
# ------------------------------------
# parquetファイル読み込み
# ------------------------------------
file_list = list(
    FACTSET_INDEX_CONSTITUENTS_DIR.glob(
        "Index_Constituents_with_Factset_code-compressed-*.parquet"
    )
)
dfs = [pd.read_parquet(f) for f in file_list]
df_members = (
    pd.concat(dfs, ignore_index=True)
    .fillna(np.nan)
    .assign(
        SEDOL=lambda x: x["SEDOL"].astype(str).str.zfill(7),
        date=lambda x: pd.to_datetime(x["date"]),
    )
)

# ------------------------------------
# blpapiでデータダウンロード
# ------------------------------------
blp = bloomberg_utils.BlpapiCustom()
output_dfs = []

for date in tqdm(df_members["date"].unique()):
    df_slice = df_members.loc[df_members["date"] == date].reset_index(drop=True)
    sedol_list = df_slice["SEDOL"].dropna().astype(str).str.zfill(7)
    sedol_list = [str(s) + " Equity" for s in sedol_list.tolist()]
    cusip_list = [str(s) + " Equity" for s in df_slice["CUSIP"].dropna().tolist()]
    isin_list = [str(s) + " Equity" for s in df_slice["ISIN"].dropna().tolist()]

    # 翌月1日
    start_date = ((date + relativedelta(months=1)).replace(day=1)).strftime("%Y%m%d")
    # 翌月末
    end_date = (
        (date + relativedelta(months=2)).replace(day=1) - relativedelta(days=1)
    ).strftime("%Y%m%d")

    df_sedol = blp.load_ids_from_blpapi(
        id_type="SEDOL", id_list=sedol_list, as_of_date=date
    )
    df_cusip = blp.load_ids_from_blpapi(
        id_type="CUSIP", id_list=sedol_list, as_of_date=date
    )
    df_isin = blp.load_ids_from_blpapi(
        id_type="ISIN", id_list=sedol_list, as_of_date=date
    )

    # merge
    df_output = pd.merge(df_slice, df_sedol, on=["date", "SEDOL"], how="left")
    df_output = pd.merge(df_output, df_cusip, on=["date", "CUSIP"], how="left")
    df_output = pd.merge(df_output, df_isin, on=["date", "ISIN"], how="left")

    output_dfs.append(df_output)

df_output = pd.concat(output_dfs, ignore_index=True)
df_output["Weight (%)"] = df_output["Weight (%)"].astype(float)

factset_utils.split_and_save_dataframe(
    df_all=df_output,
    base_dir=FACTSET_INDEX_CONSTITUENTS_DIR,
    n_splits=6,
    base_filename="Index_Constituents_w_Factset_and_Bloomberg-compressed-",
    compression="zstd",
    index=False,
)


  1%|          | 2/310 [01:57<5:02:27, 58.92s/it]


InvalidArgumentException: Choice sub-element not found for name 'securityData'. (0x00020002)

In [None]:
# 必要に応じて欠損値チェック（date, Universeごとに）
file_list = list(
    FACTSET_ROOT_DIR.glob(
        "Index_Constituents/M*_Index_Constituents_with_Factset_code.parquet"
    )
)
dfs = [pd.read_parquet(f) for f in file_list]
for df in dfs:
    df["Weight (%)"] = df["Weight (%)"].astype(float)
    df["P_SYMBOL_MISS"] = df["P_SYMBOL"].isnull()
    g = pd.DataFrame(
        df.groupby(["date", "Universe", "P_SYMBOL_MISS"])["Weight (%)"].agg(
            ["count", "sum"]
        )
    )
    display(g)


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,sum
date,Universe,P_SYMBOL_MISS,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-10-31,MSCI AC ASIA PACIFIC - Daily,False,1233,99.959175
2025-10-31,MSCI AC ASIA PACIFIC - Daily,True,1,0.040838


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,sum
date,Universe,P_SYMBOL_MISS,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-10-31,MSCI AC ASIA PACIFIC ex JAPAN- Daily,False,1053,99.941284
2025-10-31,MSCI AC ASIA PACIFIC ex JAPAN- Daily,True,1,0.058719


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,sum
date,Universe,P_SYMBOL_MISS,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-10-31,MSCI AC ASIA - Daily,False,1181,99.955019
2025-10-31,MSCI AC ASIA - Daily,True,1,0.04495


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,sum
date,Universe,P_SYMBOL_MISS,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-10-31,MSCI ACWI - Daily,False,2507,99.898648
2025-10-31,MSCI ACWI - Daily,True,4,0.101336


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,sum
date,Universe,P_SYMBOL_MISS,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-10-31,MSCI AC ASIA ex JAPAN - Daily,False,1001,99.932401
2025-10-31,MSCI AC ASIA ex JAPAN - Daily,True,1,0.067614


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,sum
date,Universe,P_SYMBOL_MISS,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-10-31,MSCI HONG KONG - Daily,False,27,100.0


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,sum
date,Universe,P_SYMBOL_MISS,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-10-31,MSCI INDIA - Daily,False,160,99.607349
2025-10-31,MSCI INDIA - Daily,True,1,0.392652


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,sum
date,Universe,P_SYMBOL_MISS,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-10-31,MSCI SINGAPORE - Daily,False,17,99.999999


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,sum
date,Universe,P_SYMBOL_MISS,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-10-31,MSCI INDONESIA - Daily,False,18,100.0


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,sum
date,Universe,P_SYMBOL_MISS,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-10-31,MSCI KOREA - Daily,False,81,99.999998


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,sum
date,Universe,P_SYMBOL_MISS,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-10-31,MSCI MALAYSIA - Daily,False,27,99.999999


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,sum
date,Universe,P_SYMBOL_MISS,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-10-31,MSCI PHILIPPINES - Daily,False,11,100.0


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,sum
date,Universe,P_SYMBOL_MISS,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-10-31,MSCI TAIWAN - Daily,False,87,99.999995


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,sum
date,Universe,P_SYMBOL_MISS,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-10-31,MSCI THAILAND - Daily,False,19,100.0


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,sum
date,Universe,P_SYMBOL_MISS,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-10-31,MSCI AC ASEAN - Daily,False,92,100.000001


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,sum
date,Universe,P_SYMBOL_MISS,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-10-31,MSCI KOKUSAI - Daily,False,1139,99.958187
2025-10-31,MSCI KOKUSAI - Daily,True,2,0.041813


## Financial Data Download 用ファイル作成

-   Factset の Financials データ取得用の関数を Excel ファイルに埋め込む
-   ユニバースごとに処理
-   取得期間は最長 20 年(20AY)。長いので、こまめに区切って取る。

#### 注意！

-   関数を埋め込んだら、Excel を Factset 端末で開き、関数部分のセルを F2 で開く操作を行ってから、Factset のデータダウンロードを行う。
    -   ➡ そうしないと関数が動かない。


In [None]:
# ユニバース一括
universe_code_list = list(
    FACTSET_ROOT_DIR.glob(
        "Index_Constituents/MSAWIF*_Index_Constituents_with_Factset_code.parquet"
    )
)
universe_code_list = [
    s.name.replace("_Index_Constituents_with_Factset_code.parquet", "")
    for s in universe_code_list
]
display(universe_code_list)
for universe_code in universe_code_list:
    factset_utils.implement_factset_formulas(
        universe_code=universe_code, year_range="20AY"
    )


['MSAWIF_AD']

--- MSAWIF_AD: Factset関数埋め込み開始 ---
	カテゴリー：計9件
	category_list: Income Statement, Balance Sheet, Cash Flow, Enterprise Value, Valuation, Profitability, Liquidity, Price, Price_Daily
=== MSAWIF_AD: Factset関数埋め込み完了 ===


## （Factset からダウンロード後）Formatting downloaded data

-   落とした財務データの Excel ファイルからデータを収集
-   Excel ファイル ➡D_Financials●●*{year_range}.xlsx | D_Price*{year_range}.xlsx


### 20AY データの処理（初めて長期データをダウンロードした場合）

-   5 分くらいかかる


In [None]:
universe_code = "MSXJPN_AD"
year_range = "20AY"
universe_folder = FACTSET_FINANCIALS_DIR / universe_code
file_list = list(universe_folder.glob(f"D_Financials*_{year_range}.xlsx")) + list(
    universe_folder.glob(f"D_Price_{year_range}.xlsx")
)
factset_utils.format_factset_downloaded_data(
    file_list=file_list,  # type:ignore
    output_folder=universe_folder,
    split_save_mode=True,  # type:ignore
)


100%|██████████| 8/8 [04:57<00:00, 37.20s/it]


A file has been exported -> H:\HataY\03_Work\Quants\data\Factset\Financials\MSXJPN_AD\Financials_and_Price.parquet
A file has been exported -> H:\HataY\03_Work\Quants\data\Factset\Financials\MSXJPN_AD\Financials_and_Price-compressed-2005-2007.parquet
A file has been exported -> H:\HataY\03_Work\Quants\data\Factset\Financials\MSXJPN_AD\Financials_and_Price-compressed-2008-2010.parquet
A file has been exported -> H:\HataY\03_Work\Quants\data\Factset\Financials\MSXJPN_AD\Financials_and_Price-compressed-2011-2013.parquet
A file has been exported -> H:\HataY\03_Work\Quants\data\Factset\Financials\MSXJPN_AD\Financials_and_Price-compressed-2014-2016.parquet
A file has been exported -> H:\HataY\03_Work\Quants\data\Factset\Financials\MSXJPN_AD\Financials_and_Price-compressed-2017-2019.parquet
A file has been exported -> H:\HataY\03_Work\Quants\data\Factset\Financials\MSXJPN_AD\Financials_and_Price-compressed-2020-2022.parquet
A file has been exported -> H:\HataY\03_Work\Quants\data\Factset\Fina

### 1AY データの処理（データのアップデート時）


In [None]:
universe_code = "MSXJPN_AD"
year_range = "1AY"
universe_folder = FACTSET_FINANCIALS_DIR / universe_code
file_list = list(universe_folder.glob(f"D_Financials*_{year_range}.xlsx")) + list(
    universe_folder.glob(f"D_Price_{year_range}.xlsx")
)
factset_utils.format_factset_downloaded_data(
    file_list=file_list, output_folder=universe_folder, split_save_mode=False  # type: ignore
)


100%|██████████| 8/8 [00:09<00:00,  1.19s/it]


A file has been exported -> H:\HataY\03_Work\Quants\data\Factset\Financials\MSXJPN_AD\Financials_and_Price-compressed-20241129_20251031.parquet


## Price data

-   FQL(Factset)でダウンロードした daily price data をエクセルファイルから抽出->paruqet ファイルにエクスポートする
-   インデックス構成銘柄とベンチマークそれぞれに対して実行
-   Daily の price データは Bloomberg から取得する


In [None]:
universe_code = "MSXJPN_AD"
df_security = pd.read_parquet(
    FACTSET_ROOT_DIR
    / f"Index_Constituents/{universe_code}_Index_Constituents_with_Factset_code.parquet",
    columns=["Asset ID", "SEDOL", "CUSIP", "ISIN", "CODE_JP", "Country"],
)
display(df_security.shape)

sedol_list = list(
    set([s + " SEDOL" for s in df_security["SEDOL"].tolist() if not pd.isna(s)])
)
cusip_list = list(
    set([s + " CUSIP" for s in df_security["CUSIP"].tolist() if not pd.isna(s)])
)
isin_list = list(
    set([s + " ISIN" for s in df_security["ISIN"].tolist() if not pd.isna(s)])
)
code_jp_list = list(
    set([s + " CODE_JP" for s in df_security["CODE_JP"].tolist() if not pd.isna(s)])
)

bql_list_sedol = f"=BQL.LIST({(',').join(sedol_list)})"
bql_list_cusip = f"=BQL.LIST({(',').join(cusip_list)})"
bql_list_isin = f"=BQL.LIST({(',').join(isin_list)})"

print(len(sedol_list))
print(len(cusip_list))
print(len(isin_list))


output_excel_file = BLOOMBERG_PRICE_DIR / f"{universe_code}/Price.xlsx"
output_excel_file.parent.mkdir(parents=True, exist_ok=True)
utils.create_excel_safely(output_path=output_excel_file, data=bql_list_sedol)


(399723, 6)

4121
4374
4087
データ型: <class 'str'>
データ内容: '=BQL.LIST(5788130 SEDOL,BFYFZP SEDOL,5013832 SEDOL,BNGC0D3 SEDOL,2892045 SEDOL,2459785 SEDOL,3058750 SEDOL,BJ1F880 SEDOL,7128541 SEDOL,B3MR9L3 SEDOL,2574707 SEDOL,6293699 SEDOL,2046853 SEDOL,BK6QWF0 SEDOL,2867719 SEDOL,2339638 SEDOL,5249187 SEDOL,B00FWN1 SEDOL,2279002 SEDOL,2098508 SEDOL,B0190C7 SEDOL,B142S60 SEDOL,7171589 SEDOL,0604316 SEDOL,B01R311 SEDOL,5687431 SEDOL,BM8H5Y5 SEDOL,B10QTX0 SEDOL,6064969 SEDOL,BZCNB42 SEDOL,4490005 SEDOL,3034545 SEDOL,B3Y49D4 SEDOL,BZ5ZHK3 SEDOL,2138158 SEDOL,BZ1HM42 SEDOL,2623911 SEDOL,BKRTG56 SEDOL,2506658 SEDOL,5560811 SEDOL,2325091 SEDOL,2232685 SEDOL,4818351 SEDOL,5654316 SEDOL,B1Z4ST8 SEDOL,B68XHC3 SEDOL,7262610 SEDOL,B0M42T2 SEDOL,5703672 SEDOL,B1Z7RQ7 SEDOL,4499013 SEDOL,BNG8PQ9 SEDOL,BZ8GX83 SEDOL,5529027 SEDOL,2312071 SEDOL,5092433 SEDOL,B0LL8N0 SEDOL,B4Z73G0 SEDOL,2126335 SEDOL,BYNZGK1 SEDOL,2150107 SEDOL,BMHTPY2 SEDOL,BCV7KT2 SEDOL,2868165 SEDOL,7068799 SEDOL,BRC3N84 SEDOL,BDZT6P9 SEDOL,2686646 SE

In [None]:
universe_code = "MSXJPN_AD"
df_security = pd.read_parquet(
    FACTSET_ROOT_DIR
    / f"Index_Constituents/{universe_code}_Index_Constituents_with_Factset_code.parquet",
    columns=["Asset ID", "SEDOL", "CUSIP", "ISIN", "CODE_JP", "Country"],
)


# メイン処理
sedol_list = implement_utils.create_identifier_list(df_security["SEDOL"], "SEDOL")
cusip_list = implement_utils.create_identifier_list(df_security["CUSIP"], "CUSIP")
isin_list = implement_utils.create_identifier_list(df_security["ISIN"], "ISIN")
code_jp_list = implement_utils.create_identifier_list(df_security["CODE_JP"], "CODE_JP")

# 各識別子の統計情報を表示
print("=== 識別子統計 ===")
print(f"SEDOL: {len(sedol_list)}銘柄")
print(f"CUSIP: {len(cusip_list)}銘柄")
print(f"ISIN: {len(isin_list)}銘柄")
print(f"CODE_JP: {len(code_jp_list)}銘柄")

# 識別子辞書を作成
identifier_dict = {
    "SEDOL": sedol_list,
    "CUSIP": cusip_list,
    "ISIN": isin_list,
    "CODE_JP": code_jp_list,
}

# Excelファイルを作成
output_excel_file = BLOOMBERG_PRICE_DIR / f"{universe_code}/Price.xlsx"
implement_utils.create_excel_with_chunked_data(
    output_excel_file, identifier_dict, chunk_size=500
)


=== 識別子統計 ===
SEDOL: 4121銘柄
CUSIP: 4374銘柄
ISIN: 4087銘柄
CODE_JP: 0銘柄
SEDOL: 4121銘柄 -> 9シートに分割
  シート 'SEDOL_1': 500銘柄, 数式長: 8009文字
  シート 'SEDOL_2': 500銘柄, 数式長: 8003文字
  シート 'SEDOL_3': 500銘柄, 数式長: 8003文字
  シート 'SEDOL_4': 500銘柄, 数式長: 8007文字
  シート 'SEDOL_5': 500銘柄, 数式長: 8005文字
  シート 'SEDOL_6': 500銘柄, 数式長: 8005文字
  シート 'SEDOL_7': 500銘柄, 数式長: 8006文字
  シート 'SEDOL_8': 500銘柄, 数式長: 8006文字
  シート 'SEDOL_9': 121銘柄, 数式長: 1944文字
CUSIP: 4374銘柄 -> 9シートに分割
  シート 'CUSIP_1': 500銘柄, 数式長: 9009文字
  シート 'CUSIP_2': 500銘柄, 数式長: 9005文字
  シート 'CUSIP_3': 500銘柄, 数式長: 9001文字
  シート 'CUSIP_4': 500銘柄, 数式長: 9008文字
  シート 'CUSIP_5': 500銘柄, 数式長: 9007文字
  シート 'CUSIP_6': 500銘柄, 数式長: 9007文字
  シート 'CUSIP_7': 500銘柄, 数式長: 9005文字
  シート 'CUSIP_8': 500銘柄, 数式長: 9009文字
  シート 'CUSIP_9': 374銘柄, 数式長: 6742文字
ISIN: 4087銘柄 -> 9シートに分割
  シート 'ISIN_1': 500銘柄, 数式長: 10010文字
  シート 'ISIN_2': 500銘柄, 数式長: 10010文字
  シート 'ISIN_3': 500銘柄, 数式長: 10010文字
  シート 'ISIN_4': 500銘柄, 数式長: 10010文字
  シート 'ISIN_5': 500銘柄, 数式長: 10010文字
  シート 'ISIN_6': 500銘柄, 数式長: 10

## Price


In [None]:
universe_code = "MSXJPN_AD"
universe_name = bpm_code_map[universe_code]

df = (
    pd.read_parquet(
        BPM_DATA_DIR / f"{universe_name}/{universe_name}_Constituents.parquet"
    )
    .replace("N/A", np.nan)
    .dropna(axis=1, how="all")
)
df["Weight (%)"] = df["Weight (%)"].astype(float)

all_columns = [
    col for col in df.columns.tolist() if not col in ["SEDOL", "CUSIP", "ISIN"]
]

output_excel = BLOOMBERG_PRICE_DIR / f"{universe_code}_Price.xlsx"

with pd.ExcelWriter(output_excel) as f:
    for id_type in ["SEDOL", "CUSIP", "ISIN"]:
        df_slice = df[all_columns + [id_type]].dropna(subset=id_type)
        df_slice.to_excel(f, sheet_name=id_type, index=False)


### Index Constituents


In [None]:
excel_file = MSCI_KOKUSAI_OUTPUT_DIR / "D_price_Daily.xlsx"
df_price_universe = pd.read_excel(excel_file, sheet_name="FG_PRICE")
df_date_universe = pd.read_excel(excel_file, sheet_name="date")

dfs = []
for symbol in tqdm(df_price_universe.columns.tolist()):
    data_price = df_price_universe[symbol].values
    data_date = df_date_universe[symbol].values
    df_slice = pd.DataFrame(
        data={
            "date": data_date,
            "value": data_price,
        }
    ).assign(P_SYMBOL=symbol, variable="FG_PRICE")
    dfs.append(df_slice)

df_kokusai = (
    pd.concat(dfs, ignore_index=True)
    .dropna(
        subset=["date", "value"],
        how="all",
        ignore_index=True,
    )
    .reindex(
        columns=[
            "date",
            "P_SYMBOL",
            "variable",
            "value",
        ]
    )
)
display(df_kokusai.head())
print(df_kokusai.shape)

# export
df_kokusai = df_kokusai.assign(date=lambda x: pd.to_datetime(x["date"])).sort_values(
    ["P_SYMBOL", "date"], ignore_index=True
)
df_kokusai_1 = df_kokusai[df_kokusai["date"] <= pd.to_datetime("2009-12-31")]
df_kokusai_2 = df_kokusai[
    (df_kokusai["date"] >= pd.to_datetime("2010-01-01"))
    & (df_kokusai["date"] <= pd.to_datetime("2014-12-31"))
]
df_kokusai_3 = df_kokusai[
    (df_kokusai["date"] >= pd.to_datetime("2015-01-01"))
    & (df_kokusai["date"] <= pd.to_datetime("2019-12-31"))
]
df_kokusai_4 = df_kokusai[df_kokusai["date"] >= pd.to_datetime("2020-01-01")]
display(df_kokusai_1)
display(df_kokusai_2)
display(df_kokusai_3)
display(df_kokusai_4)
df_kokusai_1.to_parquet(
    MSCI_KOKUSAI_OUTPUT_DIR / "MSCI_KOKUSAI_Price_1.parquet",
    index=False,
)
df_kokusai_2.to_parquet(
    MSCI_KOKUSAI_OUTPUT_DIR / "MSCI_KOKUSAI_Price_2.parquet",
    index=False,
)
df_kokusai_3.to_parquet(
    MSCI_KOKUSAI_OUTPUT_DIR / "MSCI_KOKUSAI_Price_3.parquet",
    index=False,
)
df_kokusai_4.to_parquet(
    MSCI_KOKUSAI_OUTPUT_DIR / "MSCI_KOKUSAI_Price_4.parquet",
    index=False,
)

del (
    df_kokusai_1,
    df_kokusai_2,
    df_kokusai_3,
    df_kokusai_4,
    df_kokusai,
    dfs,
    df_price_universe,
    df_date_universe,
)


100%|██████████| 3262/3262 [00:01<00:00, 1804.99it/s]


Unnamed: 0,date,P_SYMBOL,variable,value
0,2005-08-22,0HSW-GB,FG_PRICE,5.990826
1,2005-08-23,0HSW-GB,FG_PRICE,5.937046
2,2005-08-24,0HSW-GB,FG_PRICE,5.909186
3,2005-08-25,0HSW-GB,FG_PRICE,6.024418
4,2005-08-26,0HSW-GB,FG_PRICE,6.008603


(16439356, 4)


Unnamed: 0,date,P_SYMBOL,variable,value
0,2005-08-22,0HSW-GB,FG_PRICE,5.990826
1,2005-08-23,0HSW-GB,FG_PRICE,5.937046
2,2005-08-24,0HSW-GB,FG_PRICE,5.909186
3,2005-08-25,0HSW-GB,FG_PRICE,6.024418
4,2005-08-26,0HSW-GB,FG_PRICE,6.008603
...,...,...,...,...
16435419,2009-12-22,ZURN-CH,FG_PRICE,212.542880
16435420,2009-12-23,ZURN-CH,FG_PRICE,217.698850
16435421,2009-12-28,ZURN-CH,FG_PRICE,220.782360
16435422,2009-12-29,ZURN-CH,FG_PRICE,220.758470


Unnamed: 0,date,P_SYMBOL,variable,value
1104,2010-01-04,0HSW-GB,FG_PRICE,
1105,2010-01-05,0HSW-GB,FG_PRICE,
1106,2010-01-06,0HSW-GB,FG_PRICE,
1107,2010-01-07,0HSW-GB,FG_PRICE,
1108,2010-01-08,0HSW-GB,FG_PRICE,
...,...,...,...,...
16436675,2014-12-19,ZURN-CH,FG_PRICE,315.07062
16436676,2014-12-22,ZURN-CH,FG_PRICE,318.57327
16436677,2014-12-23,ZURN-CH,FG_PRICE,317.60178
16436678,2014-12-29,ZURN-CH,FG_PRICE,318.63516


Unnamed: 0,date,P_SYMBOL,variable,value
2366,2015-01-02,0HSW-GB,FG_PRICE,
2367,2015-01-05,0HSW-GB,FG_PRICE,
2368,2015-01-06,0HSW-GB,FG_PRICE,
2369,2015-01-07,0HSW-GB,FG_PRICE,
2370,2015-01-08,0HSW-GB,FG_PRICE,
...,...,...,...,...
16437929,2019-12-19,ZURN-CH,FG_PRICE,407.19540
16437930,2019-12-20,ZURN-CH,FG_PRICE,410.77612
16437931,2019-12-23,ZURN-CH,FG_PRICE,409.71338
16437932,2019-12-27,ZURN-CH,FG_PRICE,411.02630


Unnamed: 0,date,P_SYMBOL,variable,value
3630,2020-01-02,0HSW-GB,FG_PRICE,
3631,2020-01-03,0HSW-GB,FG_PRICE,
3632,2020-01-06,0HSW-GB,FG_PRICE,
3633,2020-01-07,0HSW-GB,FG_PRICE,
3634,2020-01-08,0HSW-GB,FG_PRICE,
...,...,...,...,...
16439351,2025-08-19,ZURN-CH,FG_PRICE,730.04650
16439352,2025-08-20,ZURN-CH,FG_PRICE,741.92950
16439353,2025-08-21,ZURN-CH,FG_PRICE,737.82560
16439354,2025-08-22,ZURN-CH,FG_PRICE,739.81665


### Benchmark


In [None]:
excel_file = MSCI_KOKUSAI_OUTPUT_DIR / "D_Benchmark_Price.xlsx"
df_price_benchmark = pd.read_excel(excel_file, sheet_name="FG_PRICE")
df_date_benchmark = pd.read_excel(excel_file, sheet_name="date")

dfs = []
for symbol in tqdm(df_price_benchmark.columns.tolist()):
    data_price = df_price_benchmark[symbol].values
    data_date = df_date_benchmark[symbol].values
    df_slice = pd.DataFrame(
        data={
            "date": data_date,
            "value": data_price,
        }
    ).assign(P_SYMBOL=symbol, variable="FG_PRICE")
    dfs.append(df_slice)

df_benchmark = (
    pd.concat(dfs, ignore_index=True)
    .dropna(
        subset=["date", "value"],
        how="all",
        ignore_index=True,
    )
    .reindex(
        columns=[
            "date",
            "P_SYMBOL",
            "variable",
            "value",
        ]
    )
)
df_benchmark.replace(
    {
        "991200": "MSCI Kokusai Index (World ex Japan)",
        "SP50": "S&P 500",
    },
    inplace=True,
)
df_benchmark = df_benchmark.assign(
    date=lambda x: pd.to_datetime(x["date"])
).sort_values(["P_SYMBOL", "date"], ignore_index=True)
display(df_benchmark)
print(df_benchmark.shape)

df_benchmark.to_parquet(
    MSCI_KOKUSAI_OUTPUT_DIR / "Benchmark_Price.parquet",
    index=False,
)

del (
    df_benchmark,
    dfs,
    df_price_benchmark,
    df_date_benchmark,
)


100%|██████████| 2/2 [00:00<00:00, 100.00it/s]


Unnamed: 0,date,P_SYMBOL,variable,value
0,2005-08-25,MSCI Kokusai Index (World ex Japan),FG_PRICE,1164.848165
1,2005-08-26,MSCI Kokusai Index (World ex Japan),FG_PRICE,1159.250335
2,2005-08-29,MSCI Kokusai Index (World ex Japan),FG_PRICE,1161.959970
3,2005-08-30,MSCI Kokusai Index (World ex Japan),FG_PRICE,1157.563921
4,2005-08-31,MSCI Kokusai Index (World ex Japan),FG_PRICE,1171.923999
...,...,...,...,...
10057,2025-08-19,S&P 500,FG_PRICE,6411.370000
10058,2025-08-20,S&P 500,FG_PRICE,6395.780000
10059,2025-08-21,S&P 500,FG_PRICE,6370.170000
10060,2025-08-22,S&P 500,FG_PRICE,6466.910000


(10062, 4)
