# Get data IHSG from Yahoo Finance

In [1]:
import yfinance as yf

ihsg = yf.download(
    tickers="^JKSE",
    start="2025-01-10",
    end="2026-01-25",
    interval="1d"
)

ihsg.columns = ihsg.columns.droplevel(1)

cols = ["Open", "High", "Low", "Close", "Volume"]

ihsg[cols] = ihsg[cols].round(2)

ihsg = ihsg.reset_index()

ihsg["Ticker"] = "IHSG"

ihsg.columns = ihsg.columns.str.lower()

ihsg.head()


[*********************100%***********************]  1 of 1 completed


Price,date,close,high,low,open,volume,ticker
0,2025-01-10,7088.87,7121.63,7074.14,7076.9,166170500,IHSG
1,2025-01-13,7016.88,7089.57,6986.59,7076.7,149887800,IHSG
2,2025-01-14,6956.67,7042.33,6956.67,7014.8,151037100,IHSG
3,2025-01-15,7079.56,7084.56,6977.77,6992.85,173538400,IHSG
4,2025-01-16,7107.52,7190.61,7071.91,7165.18,144790100,IHSG


In [None]:
ihsg = ihsg[['date','ticker','open','high','low','close','volume']]

ihsg.to_csv("/Users/albert/Documents/Finances/data/raw/ohlcv/ticker_data/ticker_daily_ohlcv/ihsg_2025-2026_ohlcv.csv", index=False)

ihsg

Price,date,ticker,open,high,low,close,volume
0,2025-01-10,IHSG,7076.90,7121.63,7074.14,7088.87,166170500
1,2025-01-13,IHSG,7076.70,7089.57,6986.59,7016.88,149887800
2,2025-01-14,IHSG,7014.80,7042.33,6956.67,6956.67,151037100
3,2025-01-15,IHSG,6992.85,7084.56,6977.77,7079.56,173538400
4,2025-01-16,IHSG,7165.18,7190.61,7071.91,7107.52,144790100
...,...,...,...,...,...,...,...
240,2026-01-19,IHSG,9098.70,9133.87,9026.00,9133.87,541550600
241,2026-01-20,IHSG,9156.19,9174.47,9120.15,9134.70,606446200
242,2026-01-21,IHSG,9094.43,9105.23,8977.68,9010.33,553244200
243,2026-01-22,IHSG,9052.17,9109.71,8992.13,8992.18,661020800


# Concat data last year for specific Ticker 

## Concat all dataset from 2025

In [None]:
import pandas as pd
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor

In [18]:
base_dir = Path("/Users/albert/Documents/Finances/data/raw/market_data/idx_summaries")

def read_file(file):
    try:
        df = pd.read_excel(file)
        df["source_file"] = file.name
        return df
    except Exception as e:
        print(f"Error reading {file}: {e}")
        return None

In [19]:
files = list(base_dir.rglob("*.xlsx"))

with ThreadPoolExecutor(max_workers=8) as executor:
    dfs = list(executor.map(read_file, files))

# Filter None if failed load the data
dfs = [d for d in dfs if d is not None]

# Concat all dataset into a DataFrame 
final_df = pd.concat(dfs, ignore_index=True)

final_df.columns = final_df.columns.str.lower().str.strip().str.replace(" ", "_")

In [20]:
# Transform date
final_df["date"] = pd.to_datetime(final_df["date"], errors='coerce')
final_df = final_df.rename(columns={"stockcode": "ticker", "openprice": "open"})

In [21]:
# filter cols
cols_to_keep = ['date', 'ticker', 'open', 'high', 'low', 'close', 'volume']
stock_summary = final_df[cols_to_keep].copy()

In [22]:
# Remove Duplicaate & Sort
stock_summary = stock_summary.drop_duplicates(subset=['date', 'ticker'], keep='last')
stock_summary = stock_summary.sort_values(by=["date", "ticker"]).reset_index(drop=True)

In [None]:
stock_summary_bumi = stock_summary[stock_summary['ticker'] == 'BUMI']
stock_summary_bumi.to_csv(
    "/Users/albert/Documents/Finances/data/raw/ohlcv/ticker_data/ticker_daily_ohlcv/bumi_2025-2026_ohlcv.csv",
    index=False
)

In [None]:
# from concurrent.futures import ThreadPoolExecutor

# def read_file(file):
#     df = pd.read_excel(file)
#     df["source_file"] = file.name
#     df["source_month"] = file.parent.name
#     return df

# files = list(root_dir.rglob("*.xlsx"))

# with ThreadPoolExecutor(max_workers=8) as executor:
#     dfs = list(executor.map(read_file, files))

# final_df = pd.concat(dfs, ignore_index=True)

# final_df.columns = (
#     final_df.columns
#         .str.lower()
#         .str.strip()
#         .str.replace(" ", "_")
# )
# final_df["date"] = pd.to_datetime(final_df["date"])
# final_df = final_df.sort_values(
#     by=["date", "stockcode"]
# ).reset_index(drop=True)

# final_df.head()


# stock_summary = final_df[['date', 'stockcode', 'openprice', 'high', 'low', 'close', 'volume']]

# stock_summary = stock_summary.rename(columns={
#     "stockcode": "ticker",
#     "openprice": "open"
# })


# stock_summary.head()

## Concat all dataset from 2026

In [None]:
# root_dir = Path("/Users/albert/Documents/Finances/data/raw/market_data/idx_summaries/2026")


# def read_file(file):
#     df = pd.read_excel(file)
#     df["source_file"] = file.name
#     df["source_month"] = file.parent.name
#     return df

# files = list(root_dir.rglob("*.xlsx"))

# with ThreadPoolExecutor(max_workers=8) as executor:
#     dfs = list(executor.map(read_file, files))

# final_df = pd.concat(dfs, ignore_index=True)

# final_df.columns = (
#     final_df.columns
#         .str.lower()
#         .str.strip()
#         .str.replace(" ", "_")
# )

# final_df["date"] = pd.to_datetime(final_df["date"])
# final_df = final_df.sort_values(
#     by=["date", "stockcode"]
# ).reset_index(drop=True)

# final_df.head()

# stock_summary = final_df[['date', 'stockcode', 'openprice', 'high', 'low', 'close', 'volume']]

# stock_summary = stock_summary.rename(columns={
#     "stockcode": "ticker",
#     "openprice": "open"
# })


# stock_summary.head()

## OHLCV BUMI

In [17]:
# stock_summary_bumi = stock_summary[stock_summary['ticker'] == 'BUMI']
# stock_summary_bumi = stock_summary_bumi.reset_index(drop=True)

# stock_summary_bumi.to_csv(
#     "/Users/albert/Documents/Finances/data/raw/ohlcv/ticker_data/CAMS/bumi_2025-2026_ohlcv.csv",
#     index=False
#     )

# stock_summary_bumi_2026 = stock_summary[stock_summary['ticker'] == 'BUMI']
# stock_summary_bumi_2026 = stock_summary_bumi_2026.reset_index(drop=True)


# stock_summary_bumi = pd.concat(
#     [stock_summary_bumi, stock_summary_bumi_2026],
#     ignore_index=True
#     )

# stock_summary_bumi.to_csv(
#     "/Users/albert/Documents/Finances/data/raw/ohlcv/ticker_data/CAMS/bumi_2025-2026_ohlcv.csv", 
#     index=False
#     )

# stock_summary_bumi


# bum = pd.read_csv("/Users/albert/Documents/Finances/data/raw/ohlcv/ticker_data/CAMS/bumi_2025-2026_ohlcv.csv")

# bum = bum.sort_values('date').drop_duplicates(subset=['date'], keep='last')

# bum.to_csv(
#     "/Users/albert/Documents/Finances/data/raw/ohlcv/ticker_data/CAMS/bumi_2025-2026_ohlcv.csv",
#     index=False
#     )

# bum['date'].duplicated().sum()


Unnamed: 0,date,ticker,open,high,low,close,volume
0,2026-01-02,BUMI,372,424,370,420,11771094600
1,2026-01-05,BUMI,428,476,420,464,13048421800
2,2026-01-06,BUMI,466,484,452,464,8648116700
3,2026-01-07,BUMI,464,478,452,452,6102514400
4,2026-01-08,BUMI,450,460,442,460,3629921000
5,2026-01-09,BUMI,470,480,456,462,7430491000
6,2026-01-12,BUMI,468,472,394,436,9759299800
7,2026-01-13,BUMI,436,448,396,406,9716507600
8,2026-01-14,BUMI,414,434,410,422,6113687100
9,2026-01-15,BUMI,426,428,404,410,4173617100
