In [1]:
import pandas as pd

df = pd.read_csv("~/Downloads/disclosure_events_sql.csv")

In [2]:
df

Unnamed: 0,id,stock_code,market,company_name,report_name,disclosed_at,summary_kr,raw,created_at,ret_1m,ret_3m,ret_10m,ret_60m
0,64684,,,삼성엔지니어링,투자판단관련주요경영사항,2023-10-31 23:00:01+00,,"2023.11.01 07:46:54\n기업명: 삼성엔지니어링(시가총액: 4조 6,5...",2025-07-08 17:09:01.716527,,,,
1,64685,KR7006280002,KOSPI,녹십자,투자판단관련주요경영사항,2023-10-31 23:00:02+00,,"2023.11.01 07:46:54\n기업명: 녹십자(시가총액: 1조 1,219억)...",2025-07-08 17:09:01.716527,,,,
2,64686,KR7051910008,KOSPI,LG화학,조회공시요구(풍문또는보도)에대한답변(미확정),2023-11-01 00:33:24+00,,"2023.11.01 09:33:22\n기업명: LG화학(시가총액: 30조 8,136...",2025-07-08 17:09:01.716527,,,,
3,64687,KR7018250001,KOSPI,애경산업,기업설명회(IR)개최(안내공시),2023-11-01 00:33:30+00,,"2023.11.01 09:33:27\n기업명: 애경산업(시가총액: 5,810억)\n...",2025-07-08 17:09:01.716527,,,,
4,64688,KR7046440004,KOSDAQ,KG모빌리언스,연결재무제표기준영업(잠정)실적(공정공시),2023-11-01 00:53:05+00,,"2023.11.01 09:53:04\n기업명: KG모빌리언스(시가총액: 1,862억...",2025-07-08 17:09:01.716527,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3412,68175,KR7230240004,KOSDAQ,에치에프알,타법인주식및출자증권취득결정,2023-12-29 06:56:21+00,,"2023.12.29 15:56:20\n기업명: 에치에프알(시가총액: 2,346억)\...",2025-07-08 17:09:03.053194,,,,
3413,68176,KR7009290008,KOSPI,광동제약,타법인주식및출자증권취득결정,2023-12-29 07:22:38+00,,"2023.12.29 16:22:37\n기업명: 광동제약(시가총액: 3,895억)\n...",2025-07-08 17:09:03.053194,,,,
3414,68177,,,효성첨단소재,주식등의대량보유상황보고서(일반),2023-12-29 07:43:03+00,,"2023.12.29 16:43:02\n기업명: 효성첨단소재(시가총액: 1조 7,89...",2025-07-08 17:09:03.053194,,,,
3415,68178,KR7012750006,KOSPI,에스원,단일판매ㆍ공급계약체결,2023-12-29 07:51:25+00,,"2023.12.29 16:51:24\n기업명: 에스원(시가총액: 2조 2,800억)...",2025-07-08 17:09:03.053194,,,,


In [3]:
# market 종류와 각각의 비율 분석
market_counts = df['market'].value_counts()
print("시장별 공시 분포:")
print(market_counts)
print(f"\n총 공시 건수: {len(df)}")
print("\n시장별 비율:")
for market, count in market_counts.items():
    ratio = count / len(df) * 100
    print(f"{market}: {count}건 ({ratio:.1f}%)")


시장별 공시 분포:
market
KOSDAQ           1502
KOSPI            1434
KOSDAQ GLOBAL     146
Name: count, dtype: int64

총 공시 건수: 3417

시장별 비율:
KOSDAQ: 1502건 (44.0%)
KOSPI: 1434건 (42.0%)
KOSDAQ GLOBAL: 146건 (4.3%)


In [4]:
# disclosed_at을 datetime으로 변환 (이미 timezone-aware 상태)
df['disclosed_at'] = pd.to_datetime(df['disclosed_at'])

# 이미 timezone-aware이므로 tz_convert만 사용하여 KST로 변환
df['disclosed_at_kst'] = df['disclosed_at'].dt.tz_convert('Asia/Seoul')

# 시간만 추출 (KST 기준)
df['time_only'] = df['disclosed_at_kst'].dt.time

# 장중 시간 정의 (9:00 ~ 15:30)
import datetime
market_open = datetime.time(9, 0)
market_close = datetime.time(15, 30)

# 장중/장외 분류
df['market_session'] = df['time_only'].apply(
    lambda x: '장중' if market_open <= x <= market_close else '장외'
)

# 결과 집계
session_counts = df['market_session'].value_counts()
print("공시 발표 시간대별 분포:")
print(session_counts)
print(f"\n장중 비율: {session_counts['장중'] / len(df) * 100:.1f}%")
print(f"장외 비율: {session_counts['장외'] / len(df) * 100:.1f}%")


공시 발표 시간대별 분포:
market_session
장외    2154
장중    1263
Name: count, dtype: int64

장중 비율: 37.0%
장외 비율: 63.0%


In [None]:
# ret_ 값들 중 하나라도 null이 아닌 row만 필터링
ret_columns = ['ret_1m', 'ret_3m', 'ret_10m', 'ret_60m']

# 각 ret_ 컬럼이 null이 아닌 조건들을 생성
not_null_conditions = [df[col].notna() for col in ret_columns]

# 하나라도 null이 아닌 조건 (OR 연산)
has_return_data = pd.concat(not_null_conditions, axis=1).any(axis=1)

# 필터링된 데이터
df_with_returns = df[has_return_data]

print(f"전체 공시 건수: {len(df)}")
print(f"수익률 데이터가 있는 공시 건수: {len(df_with_returns)}")
print(f"수익률 데이터 비율: {len(df_with_returns) / len(df) * 100:.1f}%")

# 수익률 데이터가 있는 공시들 확인
print("\n수익률 데이터가 있는 공시들:")


전체 공시 건수: 3417
수익률 데이터가 있는 공시 건수: 160
수익률 데이터 비율: 4.7%

수익률 데이터가 있는 공시들:


Unnamed: 0,company_name,disclosed_at_kst,ret_1m,ret_3m,ret_10m,ret_60m
2399,STX,2023-12-01 09:01:05+09:00,0.58,1.54,0.16,5.64
2401,HD현대일렉트릭,2023-12-01 10:08:26+09:00,0.23,0.12,-0.58,-0.93
2402,삼성전자,2023-12-01 10:08:49+09:00,0.00,0.00,0.00,0.00
2403,삼성전자,2023-12-01 10:09:59+09:00,-0.14,0.00,0.00,0.00
2404,동부건설,2023-12-01 11:08:27+09:00,7.36,9.17,0.00,-0.33
...,...,...,...,...,...,...
3359,에쓰씨엔지니어링,2023-12-28 13:10:54+09:00,0.11,-3.49,-1.75,-1.46
3363,SIMPAC,2023-12-28 14:25:48+09:00,0.13,0.25,0.00,
3364,한창제지,2023-12-28 14:42:24+09:00,-0.32,-0.54,-0.75,
3366,까뮤이앤씨,2023-12-28 14:53:08+09:00,0.67,0.24,,


In [7]:
df_with_returns = df_with_returns[['company_name', 'disclosed_at_kst', 'ret_1m', 'ret_3m', 'ret_10m', 'ret_60m']]

Unnamed: 0,company_name,disclosed_at_kst,ret_1m,ret_3m,ret_10m,ret_60m
2399,STX,2023-12-01 09:01:05+09:00,0.58,1.54,0.16,5.64
2401,HD현대일렉트릭,2023-12-01 10:08:26+09:00,0.23,0.12,-0.58,-0.93
2402,삼성전자,2023-12-01 10:08:49+09:00,0.00,0.00,0.00,0.00
2403,삼성전자,2023-12-01 10:09:59+09:00,-0.14,0.00,0.00,0.00
2404,동부건설,2023-12-01 11:08:27+09:00,7.36,9.17,0.00,-0.33
...,...,...,...,...,...,...
3359,에쓰씨엔지니어링,2023-12-28 13:10:54+09:00,0.11,-3.49,-1.75,-1.46
3363,SIMPAC,2023-12-28 14:25:48+09:00,0.13,0.25,0.00,
3364,한창제지,2023-12-28 14:42:24+09:00,-0.32,-0.54,-0.75,
3366,까뮤이앤씨,2023-12-28 14:53:08+09:00,0.67,0.24,,


In [8]:
df_with_returns.to_csv("~/Downloads/disclosure_events_with_returns.csv", index=False)