In [1]:
import pandas as pd
import numpy as np
import os
import FinanceDataReader as fdr
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression, HuberRegressor
from tqdm import tqdm
from sklearn.model_selection import GridSearchCV
import xgboost as xgb
import warnings

warnings.filterwarnings(action='ignore')

In [2]:
# read stock code, list

path = './open'
list_name = 'Stock_List.csv'
sample_name = 'sample_submission_week4.csv'

stock_list = pd.read_csv(os.path.join(path,list_name))
stock_list['종목코드'] = stock_list['종목코드'].apply(lambda x : str(x).zfill(6))
stock_list

Unnamed: 0,종목명,종목코드,상장시장
0,삼성전자,005930,KOSPI
1,SK하이닉스,000660,KOSPI
2,NAVER,035420,KOSPI
3,카카오,035720,KOSPI
4,삼성바이오로직스,207940,KOSPI
...,...,...,...
371,더네이쳐홀딩스,298540,KOSDAQ
372,코엔텍,029960,KOSDAQ
373,원익홀딩스,030530,KOSDAQ
374,웹케시,053580,KOSDAQ


In [3]:
# select start, end data

start_date = '20210104'
end_date = '20210910'

start_weekday = pd.to_datetime(start_date).weekday()
max_weeknum = pd.to_datetime(end_date).strftime('%V')
Business_days = pd.DataFrame(pd.date_range(start_date,end_date,freq='B'), columns = ['Date'])

print(f'WEEKDAY of "start_date" : {start_weekday}')
print(f'NUM of WEEKS to "end_date" : {max_weeknum}')
print(f'HOW MANY "Business_days" : {Business_days.shape}', )
display(Business_days.head())

WEEKDAY of "start_date" : 0
NUM of WEEKS to "end_date" : 36
HOW MANY "Business_days" : (180, 1)


Unnamed: 0,Date
0,2021-01-04
1,2021-01-05
2,2021-01-06
3,2021-01-07
4,2021-01-08


In [4]:
# read submission file

sample_name = 'sample_submission_week4.csv'
sample_submission = pd.read_csv(os.path.join(path,sample_name))

In [5]:
# training, predict method
def get_prediction(x_close, y_close, x_close_public):
    prediction_close = 0.0
    for i, model in enumerate(models):
        model.fit(x_close, y_close)
        prediction_close += model.predict(np.expand_dims(x_close_public, 0))*models_rate[i]
    return prediction_close

In [6]:
# public data

def NMAE(y_pred, y_train):
    return np.mean(abs(y_train-y_pred)/y_train)*100

model1 = LinearRegression(n_jobs=-1)
model2 = RandomForestRegressor(criterion="mae")
model3 = xgb.XGBRegressor(n_jobs=-1, eval_metric=NMAE)
models = [model1, model2, model3]

model1_rate = 0.5
model2_rate = 0.4
model3_rate = 0.1
models_rate = [model1_rate, model2_rate, model3_rate]

for code in tqdm(stock_list['종목코드'].values):
    data = fdr.DataReader(code, start = start_date, end = end_date)[['Close']].reset_index()
    data = pd.merge(Business_days, data, how = 'outer')
    data['weekday'] = data.Date.apply(lambda x : x.weekday())
    data['weeknum'] = data.Date.apply(lambda x : x.strftime('%V'))
    data.Close = data.Close.ffill()
    data.Close = data.Close.bfill()

    data_close = pd.pivot_table(data = data, values = 'Close', columns = 'weekday', index = 'weeknum')

    # data flatten  # data flatten for reshape
    data_close_flatten = np.ravel(data_close.to_numpy())

    predictions = []

    
    # predict day 1

    # close
    data_close_day1 = data_close_flatten.reshape((-1,1))

    x_close = data_close_day1[:-6]  # training x
    y_close = data_close_day1[1:-5]  # training y
    x_close_public = data_close_day1[-6]  # predict x

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 1 finish
    
    
    # predict day 2
    
    # close
    data_close_day2 = np.insert(data_close_flatten, 0, data_close_flatten[0])
    data_close_day2 = data_close_day2[:-1]
    data_close_day2 = data_close_day2.reshape((-1,2))

    x_close = data_close_day2[:-3]
    y_close = data_close_day2[1:-2][:, 1]  # 2일차만 훈련, 예측함
    x_close_public = data_close_day2[-3]

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 2 finish
    
    
    # predict day 3
    
    # close
    data_close_day3 = data_close_flatten[1:]
    data_close_day3 = np.append(data_close_day3, 0)
    data_close_day3 = data_close_day3.reshape((-1, 3))

    x_close = data_close_day3[:-3]
    y_close = data_close_day3[1:-2][:, 2]
    x_close_public = data_close_day3[-3]
        
    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 3 finish

    
    # predict day 4
    
    # close
    data_close_day4 = np.insert(data_close_flatten, 0, data_close_flatten[0])
    data_close_day4 = data_close_day4[:-1]
    data_close_day4 = data_close_day4.reshape((-1, 4))

    x_close = data_close_day4[:-2]
    y_close = data_close_day4[1:-1][:, 3]
    x_close_public = data_close_day4[-2]

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 4 finish
    
    # predict day 5
    
    # close
    data_close_day5 = data_close_flatten.reshape((-1, 5))
    
    x_close = data_close_day5[:-2]
    y_close = data_close_day5[1:-1][:, 4]
    x_close_public = data_close_day5[-2]

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 5 finish
        
    sample_submission.loc[:4, code] = predictions
    #sample_submission.loc[:4,code] = predictions
sample_submission.isna().sum().sum()

100%|████████████████████████████████████████████████████████████████████████████████| 376/376 [06:19<00:00,  1.01s/it]


0

In [7]:
# private

start_date = '20210104'
end_date = '20211001'

start_weekday = pd.to_datetime(start_date).weekday()
max_weeknum = pd.to_datetime(end_date).strftime('%V')
Business_days = pd.DataFrame(pd.date_range(start_date,end_date,freq='B'), columns = ['Date'])

print(f'WEEKDAY of "start_date" : {start_weekday}')
print(f'NUM of WEEKS to "end_date" : {max_weeknum}')
print(f'HOW MANY "Business_days" : {Business_days.shape}', )
display(Business_days)

WEEKDAY of "start_date" : 0
NUM of WEEKS to "end_date" : 39
HOW MANY "Business_days" : (195, 1)


Unnamed: 0,Date
0,2021-01-04
1,2021-01-05
2,2021-01-06
3,2021-01-07
4,2021-01-08
...,...
190,2021-09-27
191,2021-09-28
192,2021-09-29
193,2021-09-30


In [8]:
# private data

def NMAE(y_pred, y_train):
    return np.mean(abs(y_train-y_pred)/y_train)*100

model1 = LinearRegression(n_jobs=-1)
model2 = RandomForestRegressor(criterion="mae")
model3 = xgb.XGBRegressor(n_jobs=-1, eval_metric=NMAE)
models = [model1, model2, model3]

model1_rate = 0.5
model2_rate = 0.4
model3_rate = 0.1
models_rate = [model1_rate, model2_rate, model3_rate]

for code in tqdm(stock_list['종목코드'].values):
    data = fdr.DataReader(code, start = start_date, end = end_date)[['Close']].reset_index()
    data = pd.merge(Business_days, data, how = 'outer')
    data['weekday'] = data.Date.apply(lambda x : x.weekday())
    data['weeknum'] = data.Date.apply(lambda x : x.strftime('%V'))
    data.Close = data.Close.ffill()
    data.Close = data.Close.bfill()

    data_close = pd.pivot_table(data = data, values = 'Close', columns = 'weekday', index = 'weeknum')

    # data flatten for reshape
    data_close_flatten = np.ravel(data_close.to_numpy())

    predictions = []

    # predict day 1

    # close
    data_close_day1 = data_close_flatten.reshape((-1,1))

    x_close = data_close_day1[:-6]  # x_train
    y_close = data_close_day1[1:-5]  # y_train
    x_close_public = data_close_day1[-6]  # predict x

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 1 finish
    
    
    # predict day 2
    
    # close
    data_close_day2 = data_close_flatten[:-3]
    data_close_day2 = data_close_day2.reshape((-1,2))

    x_close = data_close_day2[:-2]
    y_close = data_close_day2[1:-1][:, 1]
    x_close_public = data_close_day2[-2]   

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 2 finish
    
    
    # predict day 3
    
    # close
    data_close_day3 = data_close_flatten[1:-2]
    data_close_day3 = data_close_day3.reshape((-1, 3))

    x_close = data_close_day3[:-2]
    y_close = data_close_day3[1:-1][:, 2]
    x_close_public = data_close_day3[-2]
    
    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 3 finish

    
    # predict day 4
    
    # close
    data_close_day4 = data_close_flatten[2:-1]
    data_close_day4 = data_close_day4.reshape((-1, 4))

    x_close = data_close_day4[:-2]
    y_close = data_close_day4[1:-1][:, 3]
    x_close_public = data_close_day4[-2]

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 4 finish
    
    # predict day 5
    
    # close
    data_close_day5 = data_close_flatten.reshape((-1, 5))
    
    x_close = data_close_day5[:-2]
    y_close = data_close_day5[1:-1][:, 4]
    x_close_public = data_close_day5[-2]

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 5 finish
        
    sample_submission.loc[5:,code] = predictions
sample_submission.isna().sum().sum()

100%|████████████████████████████████████████████████████████████████████████████████| 376/376 [06:27<00:00,  1.03s/it]


0

In [33]:
sample_submission.to_csv('BASELINE_Linear_Week4.csv',index=False)

In [32]:
sample_submission

Unnamed: 0,Day,000060,000080,000100,000120,000150,000240,000250,000270,000660,...,330860,336260,336370,347860,348150,348210,352820,357780,363280,950130
0,2021-09-06,29337.129718,34546.5431,64813.692605,170692.125696,93216.764122,17088.176491,53691.038972,85551.070518,107324.867155,...,48299.857508,52821.168545,62526.486012,36137.764415,35350.401403,57587.9416,282038.66791,283974.633027,29271.41735,17707.244452
1,2021-09-07,28797.841635,34590.626549,63840.940756,169396.691252,93005.286287,17491.237121,53467.148196,85673.774685,106590.616766,...,48109.89936,52420.690587,61868.053214,35769.168919,35543.009923,59002.094323,284002.879097,293816.761147,29716.330274,17540.310292
2,2021-09-08,28870.156139,34724.975559,63821.479534,171721.236976,94245.764369,17322.111714,54053.578245,84925.145493,107203.188337,...,47219.854881,51992.384914,63093.895446,34996.774255,35822.352523,57550.390647,282816.417498,287605.209801,29945.13337,17681.556299
3,2021-09-09,29614.108196,34614.313632,63456.570839,170940.833333,97938.84971,17373.288707,54309.280558,84256.763882,106389.922668,...,48392.088644,55907.230616,61757.564488,34886.408637,35018.519181,57223.514577,284402.677538,290903.88517,29648.539831,17926.670753
4,2021-09-10,28758.27103,34560.97854,63377.321809,170539.234807,90781.546,17950.062822,53046.954482,83956.431178,105273.655583,...,47688.619023,50422.149092,62437.298036,33330.464349,34498.982438,58198.803061,286131.490258,290505.926883,29352.052977,17748.69786
5,2021-09-27,29744.880132,33617.928115,63033.405021,167807.421634,96359.399732,16426.018963,52210.555293,83174.701558,104295.818833,...,46671.749987,49259.695877,67983.117307,32133.036735,29316.374845,58441.912512,266489.94991,290881.761032,28487.14827,19758.755721
6,2021-09-28,30105.592244,33210.196656,63762.458246,168496.51931,95467.500225,16760.432405,52594.806839,83398.560417,104922.981613,...,46234.41907,48701.90903,67337.232992,32155.248018,29873.63108,57475.387746,271699.829598,294322.202602,28515.5186,19852.483264
7,2021-09-29,29817.986941,33530.237322,63191.133929,169454.082803,96453.21423,16619.638344,54209.10227,83188.11887,105712.59527,...,45959.260342,49673.643985,67420.171715,31908.652242,29753.876738,57634.604502,269717.045819,291707.043686,28817.134038,21039.880129
8,2021-09-30,29822.561973,33137.170478,63485.665112,168369.958866,96262.307621,16766.807405,52728.055041,83309.98255,105728.891314,...,46544.461631,48545.438392,66854.246453,32194.140252,29221.6005,56759.84008,272465.225195,300292.637221,28522.965042,21047.456581
9,2021-10-01,30348.40763,33405.458587,63503.435694,168740.286804,96530.128378,17671.78089,52538.131255,84112.593714,104458.217404,...,46794.043511,47975.21998,65722.615546,31673.783279,30155.848179,55764.181421,274474.225278,293446.243007,28427.088998,20587.075985


In [None]:
# public data, 토요일에 수정해야함

model1 = LinearRegression()
model2 = RandomForestRegressor(criterion="mae", n_estimators=200)
models = [model1, model2]

model1_rate = 0.7
model2_rate = 0.3
models_rate = [model1_rate, model2_rate]

data = fdr.DataReader("000060", start = start_date, end = end_date)[['Close']].reset_index()
data = pd.merge(Business_days, data, how = 'outer')
data['weekday'] = data.Date.apply(lambda x : x.weekday())
data['weeknum'] = data.Date.apply(lambda x : x.strftime('%V'))
data.Close = data.Close.ffill()
data.Close = data.Close.bfill()

data_close = pd.pivot_table(data = data, values = 'Close', columns = 'weekday', index = 'weeknum')

# data flatten  # data flatten for reshape
data_close_flatten = np.ravel(data_close.to_numpy())

predictions = []


# predict day 1

# close
data_close_day1 = data_close_flatten.reshape((-1,1))

x_close = data_close_day1[:-6]  # training x
y_close = data_close_day1[1:-5]  # training y
x_close_public = data_close_day1[-6]  # predict x

prediction_close = get_prediction(x_close, y_close, x_close_public)
predictions.append(prediction_close)

# predict day 1 finish


# predict day 2

# close
data_close_day2 = np.append(data_close_flatten, 0)  # padding
data_close_day2 = data_close_day2.reshape((-1,2))

x_close = data_close_day2[:-4]
y_close = data_close_day2[1:-3][:, 1]  # 2일차만 훈련, 예측함
x_close_public = data_close_day2[-4]   

prediction_close = get_prediction(x_close, y_close, x_close_public)
predictions.append(prediction_close)

# predict day 2 finish


# predict day 3

# close
data_close_day3 = np.append(data_close_flatten, 0)
data_close_day3 = np.insert(data_close_day3, 0, data_close_day3[0])
data_close_day3 = data_close_day3.reshape((-1, 3))

x_close = data_close_day3[:-3]
y_close = data_close_day3[1:-2][:, 2]
x_close_public = data_close_day3[-3]

prediction_close = get_prediction(x_close, y_close, x_close_public)
predictions.append(prediction_close)

# predict day 3 finish


# predict day 4

# close
data_close_day4 = np.insert(data_close_flatten, 0, data_close_flatten[0])
data_close_day4 = np.insert(data_close_day4, 0, data_close_day4[0])
data_close_day4 = np.append(data_close_day4, 0)
data_close_day4 = np.append(data_close_day4, 0)
data_close_day4 = np.append(data_close_day4, 0)
data_close_day4 = data_close_day4.reshape((-1, 4))

x_close = data_close_day4[:-3]
y_close = data_close_day4[1:-2][:, 3]
x_close_public = data_close_day4[-3]

prediction_close = get_prediction(x_close, y_close, x_close_public)
predictions.append(prediction_close)

# predict day 4 finish

# predict day 5

# close
data_close_day5 = data_close_flatten.reshape((-1, 5))

x_close = data_close_day5[:-2]
y_close = data_close_day5[1:-1][:, 4]
x_close_public = data_close_day5[-2]

prediction_close = get_prediction(x_close, y_close, x_close_public)
predictions.append(prediction_close)

# predict day 5 finish
