In [22]:
# Section 1: Import Required Libraries
import os
import pandas as pd
from dotenv import load_dotenv

# Section 2: Load Environment Variables and Data Files
load_dotenv(r"C:\Users\by003457\workspace\perfectdays\.env")
CAPIQ_DATA_DIR = os.environ["CAPIQ_DATA_DIR"]
compprice_file = os.path.join(CAPIQ_DATA_DIR, "capiq_price_equity_kr.parquet")
compinfo_file = os.path.join(CAPIQ_DATA_DIR, "comp_naics_code_common_stock_kr.parquet")
compmarketcap_file = os.path.join(CAPIQ_DATA_DIR, "capiq_marketcap_kr.parquet")
df_compprice = pd.read_parquet(compprice_file)
df_compinfo = pd.read_parquet(compinfo_file)
df_compmarketcap = pd.read_parquet(compmarketcap_file)

# Section 3: Extract Unique Company IDs
company_ids = df_compinfo['tradingItemId'].unique()

# Section 4: Calculate 5-Day Rolling Percentage Change for All Companies
# Assume price column is 'VWAP', date column is 'pricingDate'
# add pricingDate filter to focus on recent data
df_compprice = df_compprice[df_compprice['pricingDate'] > '2024-01-01']
df_compprice = df_compprice.sort_values(['tradingItemId', 'pricingDate'])
df_compprice['pct_change_5d'] = df_compprice.groupby('tradingItemId')['VWAP'].transform(lambda x: x.pct_change(periods=5))

# Section 5: Identify Big Price Jumps for Each Company
threshold = 0.10  # 10%
big_jumps = df_compprice[df_compprice['pct_change_5d'] > threshold]

  df_compprice['pct_change_5d'] = df_compprice.groupby('tradingItemId')['VWAP'].transform(lambda x: x.pct_change(periods=5))


In [23]:
big_jumps['tradingItemId'] = big_jumps['tradingItemId'].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  big_jumps['tradingItemId'] = big_jumps['tradingItemId'].astype(str)


In [28]:
df_compmarketcap['companyId'] = df_compmarketcap['companyId'].astype(str)

In [18]:
big_jumps.head(2)

Unnamed: 0,tradingItemId,pricingDate,priceOpen,priceHigh,priceLow,priceMid,priceClose,priceBid,priceAsk,volume,adjustmentFactor,VWAP,pct_change_5d
4144917,20108704,2024-07-11,14510.0,16700.0,14500.0,15605.0,15600.0,15600.0,15610.0,6543759.0,1.0,15752.0,0.1189089359283989
4144920,20108704,2024-07-16,15360.0,18890.0,15050.0,17665.0,17660.0,17660.0,17670.0,11949380.0,1.0,17379.0,0.2096471079557318


In [29]:
df_compinfo.head(2)

Unnamed: 0,tradingItemId,securityId,companyId,exchangeName,exchangeSymbol,tickerSymbol,ISOCode,isoCountry2,companyName,NAICS,...,desc_2,level_3,code_3,desc_3,level_4,code_4,desc_4,level_5,code_5,desc_5
0,1937019783,1870961429,26840462,KOSDAQ,KOSDAQ,A475430,KRW,KR,"KISTRON Co., Ltd.",331222,...,Primary Metal Manufacturing,3.0,3312,Steel Product Manufacturing from Purchased Steel,4.0,33122,Rolling and Drawing of Purchased Steel,5.0,331222,Steel Wire Drawing
1,1899874192,270184252,29513698,KOSDAQ,KOSDAQ,A177900,KRW,KR,3A Logics Inc.,334413,...,Computer and Electronic Product Manufacturing,3.0,3344,Semiconductor and Other Electronic Component M...,4.0,33441,Semiconductor and Other Electronic Component M...,5.0,334413,Semiconductor and Related Device Manufacturing


In [31]:
df_compmarketcap.shape

(7397243, 5)

In [32]:
# merge df_compinfo to df_compmarketcap
df_compmarketcap.merge(df_compinfo, on=['companyId'], how='left').head(2)

Unnamed: 0,companyId,pricingDate,marketCap,TEV,sharesOutstanding,tradingItemId,securityId,exchangeName,exchangeSymbol,tickerSymbol,...,desc_2,level_3,code_3,desc_3,level_4,code_4,desc_4,level_5,code_5,desc_5
0,7672534,2025-01-24,141108.6096,101429.02926,13065612,1860304045,272381524,KOSDAQ,KOSDAQ,A096250,...,Publishing Industries,3.0,5132,Software Publishers,4.0,51321,Software Publishers,5.0,513210,Software Publishers
1,7672534,2025-01-25,141108.6096,101429.02926,13065612,1860304045,272381524,KOSDAQ,KOSDAQ,A096250,...,Publishing Industries,3.0,5132,Software Publishers,4.0,51321,Software Publishers,5.0,513210,Software Publishers


In [20]:
# Section 6: Export Results to Excel for Tableau
big_jumps = big_jumps.merge(df_compinfo[['tradingItemId', 'companyName','tickerSymbol','indu_desc','desc_1','desc_2','desc_3','desc_4','desc_5']], on='tradingItemId', how='left')
export_cols = ['pricingDate', 'tradingItemId', 'companyName', 'tickerSymbol', 'VWAP', 'volume', 'pct_change_5d','indu_desc','desc_1','desc_2','desc_3','desc_4','desc_5']


In [21]:
big_jumps[export_cols].to_excel('c:/Users/by003457/downloads/all_companies_pricejump.xlsx', index=False)
print('Exported big price jumps to Excel for Tableau.')

Exported big price jumps to Excel for Tableau.
