## Import

In [3]:
import pandas as pd
import numpy as np
import random
import os

from tqdm import tqdm
from statsmodels.tsa.arima.model import ARIMA

import warnings
warnings.filterwarnings("ignore")

In [4]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

## Data Load

In [6]:
train = pd.read_csv('./train.csv')

In [7]:
train.head()

Unnamed: 0,일자,종목코드,종목명,거래량,시가,고가,저가,종가
0,20210601,A060310,3S,166690,2890,2970,2885,2920
1,20210601,A095570,AJ네트웍스,63836,5860,5940,5750,5780
2,20210601,A006840,AK홀딩스,103691,35500,35600,34150,34400
3,20210601,A054620,APS,462544,14600,14950,13800,14950
4,20210601,A265520,AP시스템,131987,29150,29150,28800,29050


## Model Define, Train and Inference

In [20]:
# 추론 결과를 저장하기 위한 리스트 생성
results = []

# train 데이터에 존재하는 독립적인 종목코드 추출
unique_codes = train['종목코드'].unique()

# 각 종목코드에 대해서 모델 학습 및 추론 반복
for code in tqdm(unique_codes):
    # 학습 데이터 생성
    train_close = train[train['종목코드'] == code][['일자', '종가']]
    train_close['일자'] = pd.to_datetime(train_close['일자'], format='%Y%m%d')
    train_close.set_index('일자', inplace=True)
    tc = train_close['종가']

    # 모델 선언, 학습 및 추론
    model = ARIMA(tc, order=(2, 1, 2))
    model_fit = model.fit()
    predictions = model_fit.forecast(steps=15)  # 향후 15개의 거래일에 대해서 예측

    # 최종 수익률 계산
    final_return = (predictions.iloc[-1] - predictions.iloc[0]) / predictions.iloc[0]

    # 결과를 리스트에 추가
    results.append({'종목코드': code, 'final_return': final_return})

# 리스트를 DataFrame으로 변환
results_df = pd.DataFrame(results)


100%|██████████| 2000/2000 [04:02<00:00,  8.26it/s]


In [21]:
results_df.head()

Unnamed: 0,종목코드,final_return
0,A060310,-0.056222
1,A095570,-0.002837
2,A006840,0.00304
3,A054620,0.001862
4,A265520,0.00209


In [25]:
results_df['순위'] = results_df['final_return'].rank(method='first', ascending=False).astype('int') # 각 순위를 중복없이 생성
results_df

Unnamed: 0,종목코드,final_return,순위
0,A060310,-0.056222,1992
1,A095570,-0.002837,1682
2,A006840,0.003040,350
3,A054620,0.001862,488
4,A265520,0.002090,447
...,...,...,...
1995,A189980,0.000630,728
1996,A000540,0.002514,394
1997,A003280,0.001430,548
1998,A037440,0.001889,483


## Submit

In [26]:
sample_submission = pd.read_csv('./sample_submission.csv')
sample_submission

Unnamed: 0,종목코드,순위
0,A000020,1
1,A000040,2
2,A000050,3
3,A000070,4
4,A000080,5
...,...,...
1995,A375500,1996
1996,A378850,1997
1997,A383220,1998
1998,A383310,1999


In [27]:
baseline_submission = sample_submission[['종목코드']].merge(results_df[['종목코드', '순위']], on='종목코드', how='left')
baseline_submission

Unnamed: 0,종목코드,순위
0,A000020,497
1,A000040,1656
2,A000050,866
3,A000070,1075
4,A000080,1203
...,...,...
1995,A375500,750
1996,A378850,554
1997,A383220,618
1998,A383310,243


In [28]:
baseline_submission.to_csv('baseline_submission.csv', index=False)