# FILTER OHLCV SPECIFIC TICKER

In [1]:
import pandas as pd
import os

folder_path = "/Users/albert/Documents/Finances/data/raw/market_data/daily_trading"
output_dir = "/Users/albert/Documents/Finances/data/raw/ohlcv/ticker_data/ticker_daily_ohlcv"
target_ticker = "BUMI"

os.makedirs(output_dir, exist_ok=True)

df_list = []

for filename in os.listdir(folder_path):
    if filename.endswith(".txt"):
        file_path = os.path.join(folder_path, filename)

        df = pd.read_csv(file_path)

        filtered = df[df["<ticker>"] == target_ticker]
        if not filtered.empty:
            df_list.append(filtered)

if not df_list:
    print(f"Tidak ada data untuk ticker {target_ticker}")
else:
    final_df = pd.concat(df_list, ignore_index=True)

    final_df["<date>"] = pd.to_datetime(
        final_df["<date>"],
        format="%m/%d/%Y",
        errors="coerce"
    )

    # ambil tanggal terakhir di data
    last_date = final_df["<date>"].max()

    # start date: 1 Oktober (tahun dari last_date)
    start_date = pd.Timestamp(year=last_date.year, month=10, day=1)

    # filter dari Oktober sampai last date
    final_df = final_df[
        (final_df["<date>"] >= start_date) &
        (final_df["<date>"] <= last_date)
    ]

    final_df = final_df.sort_values("<date>")

    output_file = os.path.join(
        output_dir,
        f"{target_ticker}_daily_ohlcv.csv"
    )

    final_df.to_csv(output_file, index=False)
    print(f"Total data {target_ticker}: {len(final_df)}")
    print("File tersimpan di:", output_file)


final_df.head()

Total data BUMI: 45
File tersimpan di: /Users/albert/Documents/Finances/data/raw/ohlcv/ticker_data/ticker_daily_ohlcv/BUMI_daily_ohlcv.csv


Unnamed: 0,<date>,<ticker>,<open>,<high>,<low>,<close>,<volume>
34,2025-10-01,BUMI,149.0,164.0,148.0,161.0,8086835900
25,2025-10-02,BUMI,163.0,167.0,156.0,163.0,5836304600
14,2025-10-03,BUMI,164.0,168.0,160.0,161.0,3401965900
6,2025-10-06,BUMI,162.0,164.0,149.0,150.0,6865926600
11,2025-10-07,BUMI,151.0,155.0,143.0,144.0,6120027600


# FILTER NEWS SPECIFIC BY TICKER from range 20250901_20260120

In [6]:
import json

path = "/Users/albert/Documents/Finances/data/processed/filtered_merged_20250901_20260120_market_news.json"

with open(path, "r", encoding="utf-8") as f:
    data = json.load(f)

keywords = ["BUMI", "Bumi Resources Tbk."]

filtered_data = [
    item for item in data
    if any(k.lower() in item.get("title", "").lower() for k in keywords)
]

processed_data = [
    {
        "title": item.get("title"),
        "date": item.get("date"),
        "full_content": item.get("full_content"),
        "summary": item.get("summary"),
    }
    for item in filtered_data
]

output_path = f"/Users/albert/Documents/Finances/data/processed/filtered_emiten/filtered_{keywords[0]}_20250901_20260120_market_news.json"

with open(output_path, "w", encoding="utf-8") as f:
    json.dump(processed_data, f, ensure_ascii=False, indent=2)


print(len(filtered_data))


41


# filtered to only `keyword == BUMI` from range 20250901_20260120

In [8]:
import json
from datetime import datetime

path = "/Users/albert/Documents/Finances/data/processed/filtered_merged_20250901_20260120_market_news.json"

with open(path, "r", encoding="utf-8") as f:
    data = json.load(f)

keywords = ["BUMI", "Bumi Resources Tbk."]

# filter keyword di title
filtered_data = [
    item for item in data
    if any(k.lower() in item.get("title", "").lower() for k in keywords)
]

# set range tanggal: Oktober 2025 â†’ November 2025
start_date = datetime(2025, 10, 1)
end_date = datetime(2025, 11, 30)

# filter berdasarkan range tanggal
date_filtered_data = [
    item for item in filtered_data
    if start_date <= datetime.fromisoformat(item.get("date")) <= end_date
]

# ambil field yang dibutuhkan
processed_data = [
    {
        "title": item.get("title"),
        "date": item.get("date"),
        "full_content": item.get("full_content"),
        "summary": item.get("summary"),
    }
    for item in date_filtered_data
]

# buat output path
output_path = f"/Users/albert/Documents/Finances/data/processed/filtered_emiten/filtered_{keywords[0]}_20251001_20251130_market_news.json"

with open(output_path, "w", encoding="utf-8") as f:
    json.dump(processed_data, f, ensure_ascii=False, indent=2)

print("Total data setelah filter:", len(processed_data))


Total data setelah filter: 10
