In [2]:
# Section 1: Import Required Libraries
import os
import pandas as pd
from dotenv import load_dotenv

# Section 2: Load Environment Variables and Data Files
load_dotenv(r"C:\Users\by003457\workspace\perfectdays\.env")
CAPIQ_DATA_DIR = os.environ["CAPIQ_DATA_DIR"]
compprice_file = os.path.join(CAPIQ_DATA_DIR, "capiq_price_equity_kr.parquet")
compinfo_file = os.path.join(CAPIQ_DATA_DIR, "comp_naics_code_common_stock_kr.parquet")
df_compprice = pd.read_parquet(compprice_file)
df_compinfo = pd.read_parquet(compinfo_file)

# Section 3: Extract Unique Company IDs
company_ids = df_compinfo['tradingItemId'].unique()

# Section 4: Calculate 5-Day Rolling Percentage Change for All Companies
# Assume price column is 'priceClose', date column is 'pricingDate'
# add pricingDate filter to focus on recent data
df_compprice = df_compprice[df_compprice['pricingDate'] > '2024-01-01']
df_compprice = df_compprice.sort_values(['tradingItemId', 'pricingDate'])
df_compprice['pct_change_5d'] = df_compprice.groupby('tradingItemId')['priceClose'].transform(lambda x: x.pct_change(periods=5))

# Section 5: Identify Big Price Jumps for Each Company
threshold = 0.10  # 10%
big_jumps = df_compprice[df_compprice['pct_change_5d'] > threshold]

In [3]:
# Section 6: Export Results to Excel for Tableau
big_jumps = big_jumps.merge(df_compinfo[['tradingItemId', 'companyName']], on='tradingItemId', how='left')
export_cols = ['pricingDate', 'tradingItemId', 'companyName', 'priceClose', 'volume', 'pct_change_5d']


ValueError: You are trying to merge on int64 and object columns for key 'tradingItemId'. If you wish to proceed you should use pd.concat

In [None]:
big_jumps[export_cols].to_excel('c:/Users/by003457/downloads/all_companies_pricejump.xlsx', index=False)
print('Exported big price jumps to Excel for Tableau.')