In [1]:
import pandas as pd
import numpy as np
import os
import FinanceDataReader as fdr
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from tqdm import tqdm
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings("ignore", category=np.VisibleDeprecationWarning)

In [2]:
# read stock code, list

path = './open'
list_name = 'Stock_List.csv'
sample_name = 'sample_submission_week3.csv'

stock_list = pd.read_csv(os.path.join(path,list_name))
stock_list['종목코드'] = stock_list['종목코드'].apply(lambda x : str(x).zfill(6))
stock_list

Unnamed: 0,종목명,종목코드,상장시장
0,삼성전자,005930,KOSPI
1,SK하이닉스,000660,KOSPI
2,NAVER,035420,KOSPI
3,카카오,035720,KOSPI
4,삼성바이오로직스,207940,KOSPI
...,...,...,...
371,더네이쳐홀딩스,298540,KOSDAQ
372,코엔텍,029960,KOSDAQ
373,원익홀딩스,030530,KOSDAQ
374,웹케시,053580,KOSDAQ


In [3]:
# select start, end data

start_date = '20210104'
end_date = '20210903'

start_weekday = pd.to_datetime(start_date).weekday()
max_weeknum = pd.to_datetime(end_date).strftime('%V')
Business_days = pd.DataFrame(pd.date_range(start_date,end_date,freq='B'), columns = ['Date'])

print(f'WEEKDAY of "start_date" : {start_weekday}')
print(f'NUM of WEEKS to "end_date" : {max_weeknum}')
print(f'HOW MANY "Business_days" : {Business_days.shape}', )
display(Business_days.head())

WEEKDAY of "start_date" : 0
NUM of WEEKS to "end_date" : 35
HOW MANY "Business_days" : (175, 1)


Unnamed: 0,Date
0,2021-01-04
1,2021-01-05
2,2021-01-06
3,2021-01-07
4,2021-01-08


In [4]:
# read submission file

sample_name = 'sample_submission_week3.csv'
sample_submission = pd.read_csv(os.path.join(path,sample_name))

In [5]:
# training, predict method

def get_prediction(x_close, y_close, x_close_public):
    
    param_grid = {
        'max_depth': [None, 50, 80, 110],
        'criterion': ["mse", "mae"],
        'n_estimators': [25, 50, 100, 150, 200]
    }

    prediction_close = 0.0
    for i, model in enumerate(models):
        if "max_depth" in dir(model):  # randomforest
            grid_search = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
            grid_search.fit(x_close, y_close.ravel())
            best_grid = grid_search.best_estimator_
            prediction_close += best_grid.predict(np.expand_dims(x_close_public, 0))*models_rate[i]
        else:            
            model.fit(x_close, y_close)
            prediction_close += model.predict(np.expand_dims(x_close_public, 0))*models_rate[i]
    
    return prediction_close

In [6]:
# public data

model1 = LinearRegression()
model2 = RandomForestRegressor(criterion="mae", n_estimators=200)
models = [model1, model2]

model1_rate = 0.7
model2_rate = 0.3
models_rate = [model1_rate, model2_rate]

for code in tqdm(stock_list['종목코드'].values):
    data = fdr.DataReader(code, start = start_date, end = end_date)[['Close']].reset_index()
    data = pd.merge(Business_days, data, how = 'outer')
    data['weekday'] = data.Date.apply(lambda x : x.weekday())
    data['weeknum'] = data.Date.apply(lambda x : x.strftime('%V'))
    data.Close = data.Close.ffill()
    data.Close = data.Close.bfill()

    data_close = pd.pivot_table(data = data, values = 'Close', columns = 'weekday', index = 'weeknum')

    data_close_flatten = np.ravel(data_close.to_numpy())

    predictions = []

    # predict day 1

    data_close_day1 = data_close_flatten.reshape((-1,1))

    x_close = data_close_day1[:-6]  # training x
    y_close = data_close_day1[1:-5]  # training y
    x_close_public = data_close_day1[-6]  # predict x

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 1 finish
    
    
    # predict day 2
    
    data_close_day2 = np.append(data_close_flatten, 0)
    data_close_day2 = data_close_day2.reshape((-1,2))

    x_close = data_close_day2[:-4]
    y_close = data_close_day2[1:-3][:, 1]
    x_close_public = data_close_day2[-4]   

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 2 finish
    
    
    # predict day 3
    
    data_close_day3 = np.append(data_close_flatten, 0)
    data_close_day3 = np.insert(data_close_day3, 0, data_close_day3[0])
    data_close_day3 = data_close_day3.reshape((-1, 3))

    x_close = data_close_day3[:-3]
    y_close = data_close_day3[1:-2][:, 2]
    x_close_public = data_close_day3[-3]
        
    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 3 finish

    
    # predict day 4
    
    data_close_day4 = np.insert(data_close_flatten, 0, data_close_flatten[0])
    data_close_day4 = np.insert(data_close_day4, 0, data_close_day4[0])
    data_close_day4 = np.append(data_close_day4, 0)
    data_close_day4 = np.append(data_close_day4, 0)
    data_close_day4 = np.append(data_close_day4, 0)
    data_close_day4 = data_close_day4.reshape((-1, 4))

    x_close = data_close_day4[:-3]
    y_close = data_close_day4[1:-2][:, 3]
    x_close_public = data_close_day4[-3]

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 4 finish
    
    # predict day 5
    
    data_close_day5 = data_close_flatten.reshape((-1, 5))
    
    x_close = data_close_day5[:-2]
    y_close = data_close_day5[1:-1][:, 4]
    x_close_public = data_close_day5[-2]

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 5 finish
        
    sample_submission.loc[:4,code] = predictions
sample_submission.isna().sum().sum()

100%|██████████████████████████████████████████████████████████████████████████████| 376/376 [1:09:54<00:00, 11.16s/it]


0

In [7]:
start_date = '20210104'
end_date = '20210917'

start_weekday = pd.to_datetime(start_date).weekday()
max_weeknum = pd.to_datetime(end_date).strftime('%V')
Business_days = pd.DataFrame(pd.date_range(start_date,end_date,freq='B'), columns = ['Date'])

print(f'WEEKDAY of "start_date" : {start_weekday}')
print(f'NUM of WEEKS to "end_date" : {max_weeknum}')
print(f'HOW MANY "Business_days" : {Business_days.shape}', )
display(Business_days)

WEEKDAY of "start_date" : 0
NUM of WEEKS to "end_date" : 37
HOW MANY "Business_days" : (185, 1)


Unnamed: 0,Date
0,2021-01-04
1,2021-01-05
2,2021-01-06
3,2021-01-07
4,2021-01-08
...,...
180,2021-09-13
181,2021-09-14
182,2021-09-15
183,2021-09-16


In [8]:
# private data

model1 = LinearRegression()
model2 = RandomForestRegressor(criterion="mae", n_estimators=200)
models = [model1, model2]

model1_rate = 0.7
model2_rate = 0.3
models_rate = [model1_rate, model2_rate]

for code in tqdm(stock_list['종목코드'].values):
    data = fdr.DataReader(code, start = start_date, end = end_date)[['Close']].reset_index()
    data = pd.merge(Business_days, data, how = 'outer')
    data['weekday'] = data.Date.apply(lambda x : x.weekday())
    data['weeknum'] = data.Date.apply(lambda x : x.strftime('%V'))
    data.Close = data.Close.ffill()
    data.Close = data.Close.bfill()

    data_close = pd.pivot_table(data = data, values = 'Close', columns = 'weekday', index = 'weeknum')

    data_close_flatten = np.ravel(data_close.to_numpy())

    predictions = []

    
    # predict day 1

    data_close_day1 = data_close_flatten.reshape((-1,1))

    x_close = data_close_day1[:-6]  # x_train
    y_close = data_close_day1[1:-5]  # y_train
    x_close_public = data_close_day1[-6]  # predict x

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 1 finish
    
    
    # predict day 2
    
    data_close_day2 = np.append(data_close_flatten, 0)
    data_close_day2 = data_close_day2.reshape((-1,2))

    x_close = data_close_day2[:-4]
    y_close = data_close_day2[1:-3][:, 1]
    x_close_public = data_close_day2[-4]   

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 2 finish
    
    
    # predict day 3
    
    data_close_day3 = np.append(data_close_flatten, 0)
    data_close_day3 = data_close_day3.reshape((-1, 3))

    x_close = data_close_day3[:-3]
    y_close = data_close_day3[1:-2][:, 2]
    x_close_public = data_close_day3[-3]
    
    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 3 finish

    
    # predict day 4
    
    data_close_day4 = np.append(data_close_flatten, 0)
    data_close_day4 = np.append(data_close_day4, 0)
    data_close_day4 = np.append(data_close_day4, 0)
    data_close_day4 = data_close_day4.reshape((-1, 4))

    x_close = data_close_day4[:-3]
    y_close = data_close_day4[1:-2][:, 3]
    x_close_public = data_close_day4[-3]

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 4 finish
    
    # predict day 5
    
    data_close_day5 = data_close_flatten.reshape((-1, 5))
    
    x_close = data_close_day5[:-2]
    y_close = data_close_day5[1:-1][:, 4]
    x_close_public = data_close_day5[-2]

    prediction_close = get_prediction(x_close, y_close, x_close_public)
    predictions.append(prediction_close)
    
    # predict day 5 finish
        
    sample_submission.loc[5:,code] = predictions
sample_submission.isna().sum().sum()

100%|██████████████████████████████████████████████████████████████████████████████| 376/376 [1:10:19<00:00, 11.22s/it]


0

In [9]:
sample_submission.to_csv('BASELINE_Linear_Week3.csv',index=False)

In [10]:
sample_submission

Unnamed: 0,Day,000060,000080,000100,000120,000150,000240,000250,000270,000660,...,330860,336260,336370,347860,348150,348210,352820,357780,363280,950130
0,2021-08-30,25210.154522,32839.964777,61129.651679,169906.983879,89370.378087,17076.747809,54697.877553,82291.211531,103985.447647,...,46001.190141,51570.526911,58996.053269,34833.966695,31521.354783,52817.498667,282853.478244,287790.668024,30314.820677,17050.719346
1,2021-08-31,25348.258077,33285.186874,61194.061559,170547.421564,89787.948688,17350.728908,54969.676025,82245.216508,103836.670406,...,45632.123148,51885.925308,59410.334944,34727.211478,31335.720327,53210.286453,284815.895519,290718.347478,30157.567557,17130.045021
2,2021-09-01,25124.89162,33228.148825,61687.520984,170036.220549,90135.029045,17679.224129,56941.931355,82484.564035,104291.439238,...,45740.338638,51089.758239,58944.90078,33778.525074,31081.167493,53652.243797,287753.136231,287050.109941,30058.529693,17439.325542
3,2021-09-02,25759.766791,33370.546,61227.826748,170599.071988,90327.976199,17496.515305,55249.052284,83104.671087,103695.915155,...,45652.971741,52109.624875,59763.139989,34274.155179,31498.273208,54532.50099,288647.347416,296873.090195,29974.853862,17279.987976
4,2021-09-03,24418.280767,32820.550799,61623.236274,170925.245514,92991.478697,18003.654787,57470.657136,83143.526828,102150.657913,...,47837.018821,48013.730463,60052.859636,33279.872263,31401.999121,54946.893025,288740.702925,295966.491852,29525.423787,16303.800795
5,2021-09-13,33315.711265,34571.648485,64127.5881,170352.686056,90616.327126,16945.276265,52469.928155,84317.171086,104958.83989,...,50145.243492,50088.051588,62657.258608,33140.755967,31002.210466,60775.770532,269105.98939,282068.902041,29048.759474,17157.522571
6,2021-09-14,33068.290565,34718.243301,64333.874473,170899.462842,91149.527867,16984.999018,52278.889886,83195.407987,105071.701254,...,50192.974744,49044.387511,62117.460112,34141.942871,31563.50769,60800.125641,272468.916389,283260.912022,29012.956117,17161.076195
7,2021-09-15,32685.713371,34832.51571,63508.376962,170852.869442,92230.43712,17115.794929,52915.561231,82088.080814,104468.368148,...,49233.224147,49574.277167,61590.448579,33482.279318,31681.939745,60519.99809,270519.917877,284141.225448,29491.336581,16914.565438
8,2021-09-16,32741.287504,34490.736569,64227.139002,171816.045942,92520.113423,17434.506052,52693.394457,84174.978725,104384.916406,...,49675.819482,52124.205587,62289.517432,34336.341679,31741.896058,61011.840473,275868.573042,283285.605105,29432.568948,17761.589897
9,2021-09-17,33104.693414,34503.521015,64305.709038,171458.248722,91040.864301,17985.902971,53261.513619,87167.796398,105540.317803,...,49021.983405,49433.505555,61986.242576,32646.864373,31892.554196,60858.153263,275245.403908,285286.858558,29643.272278,18478.923431
