In [None]:
!pip install finance-datareader

In [2]:
import os
import numpy as np
import pandas as pd
import FinanceDataReader as fdr

from sklearn.linear_model import LinearRegression
from tqdm import tqdm

## Get Stock List

In [3]:
path = '/content/drive/MyDrive/gh/kaggle/dacon/stock_price/data'
list_name = 'stock_list.csv'
sample_name = 'sample_submission.csv'

stock_list = pd.read_csv(os.path.join(path,list_name))
stock_list['종목코드'] = stock_list['종목코드'].apply(lambda x : str(x).zfill(6))
stock_list

Unnamed: 0,종목명,종목코드,상장시장
0,삼성전자,005930,KOSPI
1,SK하이닉스,000660,KOSPI
2,NAVER,035420,KOSPI
3,카카오,035720,KOSPI
4,삼성바이오로직스,207940,KOSPI
...,...,...,...
365,맘스터치,220630,KOSDAQ
366,다날,064260,KOSDAQ
367,제이시스메디칼,287410,KOSDAQ
368,크리스에프앤씨,110790,KOSDAQ


## Get Data & Modeling

In [4]:
start_date = '20210104'
end_date = '20211105'

start_weekday = pd.to_datetime(start_date).weekday()
max_weeknum = pd.to_datetime(end_date).strftime('%V')
Business_days = pd.DataFrame(pd.date_range(start_date,end_date,freq='B'), columns = ['Date'])

print(f'WEEKDAY of "start_date" : {start_weekday}')
print(f'NUM of WEEKS to "end_date" : {max_weeknum}')
print(f'HOW MANY "Business_days" : {Business_days.shape}', )
display(Business_days.head())

WEEKDAY of "start_date" : 0
NUM of WEEKS to "end_date" : 44
HOW MANY "Business_days" : (220, 1)


Unnamed: 0,Date
0,2021-01-04
1,2021-01-05
2,2021-01-06
3,2021-01-07
4,2021-01-08


In [5]:
sample_code = stock_list.loc[0,'종목코드']

sample = fdr.DataReader(sample_code, start = start_date, end = end_date)[['Close']].reset_index()
sample = pd.merge(Business_days, sample, how = 'outer')
sample['weekday'] = sample.Date.apply(lambda x : x.weekday())
sample['weeknum'] = sample.Date.apply(lambda x : x.strftime('%V'))
sample.Close = sample.Close.ffill()
sample = pd.pivot_table(data = sample, values = 'Close', columns = 'weekday', index = 'weeknum')
sample.head()

weekday,0,1,2,3,4
weeknum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,83000.0,83900.0,82200.0,82900.0,88800.0
2,91000.0,90600.0,89700.0,89700.0,88000.0
3,85000.0,87000.0,87200.0,88100.0,86800.0
4,89400.0,86700.0,85600.0,83700.0,82000.0
5,83000.0,84400.0,84600.0,82500.0,83500.0


In [6]:
model = LinearRegression()

In [7]:
x = sample.iloc[0:-2].to_numpy()
x.shape

(42, 5)

In [8]:
y = sample.iloc[1:-1].to_numpy()
y_0 = y[:,0]
y_1 = y[:,1]
y_2 = y[:,2]
y_3 = y[:,3]
y_4 = y[:,4]

y_values = [y_0, y_1, y_2, y_3, y_4]

In [9]:
x_public = sample.iloc[-2].to_numpy()

In [10]:
predictions = []
for y_value in y_values :
    model.fit(x,y_value)
    prediction = model.predict(np.expand_dims(x_public,0))
    predictions.append(prediction[0])
predictions

[70206.67660106532,
 69631.42785252717,
 69062.32129096359,
 69258.21096883612,
 68846.00977524316]

In [11]:
sample.iloc[-1].values

array([69900., 71500., 70400., 70600., 70200.])

## 전체 모델링

In [12]:
sample_name = 'sample_submission.csv'
sample_submission = pd.read_csv(os.path.join(path,sample_name))

In [13]:
model = LinearRegression()
for code in tqdm(stock_list['종목코드'].values):
    data = fdr.DataReader(code, start = start_date, end = end_date)[['Close']].reset_index()
    data = pd.merge(Business_days, data, how = 'outer')
    data['weekday'] = data.Date.apply(lambda x : x.weekday())
    data['weeknum'] = data.Date.apply(lambda x : x.strftime('%V'))
    data.Close = data.Close.ffill()
    data = pd.pivot_table(data = data, values = 'Close', columns = 'weekday', index = 'weeknum')
    
    x = data.iloc[0:-2].to_numpy() # 2021년 1월 04일 ~ 2021년 10월 22일까지의 데이터로
    y = data.iloc[1:-1].to_numpy() # 2021년 1월 11일 ~ 2021년 10월 29일까지의 데이터를 학습한다.
    y_0 = y[:,0]
    y_1 = y[:,1]
    y_2 = y[:,2]
    y_3 = y[:,3]
    y_4 = y[:,4]

    y_values = [y_0, y_1, y_2, y_3, y_4]
    x_public = data.iloc[-2].to_numpy() # 2021년 11월 1일부터 11월 5일까지의 데이터를 예측할 것이다.
    
    predictions = []
    for y_value in y_values :
        model.fit(x,y_value)
        prediction = model.predict(np.expand_dims(x_public,0))
        predictions.append(prediction[0])
    sample_submission.loc[:,code] = predictions * 2
sample_submission.isna().sum().sum()

100%|██████████| 370/370 [06:54<00:00,  1.12s/it]


0

In [14]:
sample_submission.columns

Index(['Day', '000060', '000080', '000100', '000120', '000150', '000240',
       '000250', '000270', '000660',
       ...
       '330860', '336260', '336370', '347860', '348150', '348210', '352820',
       '357780', '363280', '950130'],
      dtype='object', length=371)

In [15]:
columns = list(sample_submission.columns[1:])

columns = ['Day'] + [str(x).zfill(6) for x in columns]

sample_submission.columns = columns

In [None]:
sample_submission.to_csv('BASELINE_Linear.csv',index=False)

In [16]:
sample_submission

Unnamed: 0,Day,000060,000080,000100,000120,000150,000240,000250,000270,000660,000670,000720,000810,000880,000990,001230,001440,001450,001740,002380,002790,003000,003090,003380,003410,003490,003670,003800,004000,004020,004170,004370,004490,004800,004990,005250,005290,005300,005380,005385,...,272290,273130,278280,278530,282330,285130,287410,290510,290650,292150,293490,293780,294090,294870,298000,298020,298050,298380,299030,299660,299900,307950,314130,316140,319400,319660,321550,323990,326030,330590,330860,336260,336370,347860,348150,348210,352820,357780,363280,950130
0,2021-11-01,27919.530611,34687.673458,60773.779528,142621.815394,104901.698658,16669.447967,47219.595113,85236.83317,103490.352393,690833.776141,50608.517303,237263.538679,34034.674854,57893.438176,17025.323281,2225.505721,26266.558909,5094.280802,328236.605306,49753.544422,13628.714811,31259.722365,9667.113863,7853.568107,30476.119968,145414.530997,47710.122653,85002.108239,44704.138686,249051.995178,285145.860714,82930.255665,103138.716096,32955.111852,29808.904546,34967.536448,147544.524471,209546.723955,96842.267858,...,41762.934054,106288.779582,286529.154325,12846.315739,161730.681424,181860.643597,7180.723244,7174.533233,34407.665139,13413.322949,79037.338507,49126.852192,52645.835387,25575.906455,284001.394987,603434.766009,709297.42818,20860.673495,54723.321055,70839.068455,27124.841285,116936.831486,37104.495707,13232.112864,3462.162648,38897.399454,18242.086113,67178.399306,94416.390435,5647.319175,49749.405974,51984.322942,84384.021784,36846.592704,25721.026664,53328.350326,336697.743579,262257.538308,27176.08509,17382.219194
1,2021-11-02,28750.750484,35032.651375,60312.130021,143012.927861,107216.342323,17001.594758,46672.351191,85360.327648,102788.687368,694357.607135,50760.18054,236510.356775,34526.198608,57809.78502,17311.490454,2259.170432,26465.064492,5142.527468,330851.115113,49866.137582,13731.344563,32405.316597,9633.518674,7831.761311,30449.44253,145316.262523,49093.785149,85579.601582,45043.820372,251759.843143,287030.385665,83929.784768,102997.278269,33270.781098,29327.755956,34527.741215,152186.925769,209302.264708,96796.209371,...,40157.848296,106159.267737,282016.978244,12804.314052,164926.272981,180775.713108,7172.534157,7194.82504,34284.494575,13325.025021,78309.497631,49637.073961,51197.91245,26374.913621,284798.925527,610662.960773,699009.826053,20849.846866,54547.22124,70040.400968,27079.95456,116522.742088,36807.009328,13396.197668,3388.393992,38301.220896,18175.749848,68204.295768,94506.028474,5630.088053,48923.253693,51539.56413,84739.157977,35258.227509,25330.52806,53645.466661,335662.149461,264562.230652,27415.549191,17338.522537
2,2021-11-03,28858.095631,34995.888574,60241.47041,145626.792237,111192.733424,17665.577952,45757.683516,85665.326378,102943.419081,694827.202108,50596.013731,234280.033838,34129.464791,58104.951715,17234.8186,2245.086944,26338.545166,5173.518369,328934.025155,50223.673421,13688.80014,32060.189467,9610.962131,7839.782996,30364.057367,149152.562966,48050.384365,85430.587779,44885.403954,250800.226652,286120.928746,83566.02193,103669.597561,33162.569257,29271.435795,34294.536311,154002.307421,208574.18041,96764.043484,...,39434.845888,106019.07134,282186.974336,12767.143799,164514.051335,178267.792981,7036.641965,7215.058888,34165.681853,13226.270003,77853.29654,49205.783022,52259.149146,25838.476183,286759.470513,612224.424643,691370.129662,20483.53137,54650.48103,71755.113819,27817.495081,116453.800867,37902.994878,13510.141668,3416.103903,37753.518026,18043.378263,65758.000414,97335.266225,5616.633336,48834.428543,49341.153729,85450.80606,35320.479243,25623.131825,55136.12148,329167.718872,264623.119599,27466.446666,17286.577226
3,2021-11-04,28901.301911,34866.098057,59701.569734,145351.536595,109530.741544,17846.022961,46380.15235,85494.864447,99958.476851,688281.852061,50551.637343,230973.448553,33921.763141,58495.140974,17238.604612,2234.132431,26384.259775,5206.594805,328556.46616,50030.369371,13309.767872,32647.123786,9587.939751,7798.041681,30346.101706,149398.740182,47464.822349,85208.109405,44998.394494,253128.475096,287592.756386,83489.183094,103787.428025,33176.524891,29584.105076,34234.768276,154678.387112,208781.238048,97326.657306,...,39758.779837,106161.069318,283996.725855,12785.239886,165917.165306,180194.33423,7062.703656,7270.073959,34146.68156,13162.818238,80730.454573,48069.374015,52718.104309,26248.727506,293741.541632,617095.75446,697496.128623,20121.939941,55091.119613,73514.621678,27990.535454,117048.190663,38841.404051,13536.127644,3397.884814,37724.109694,18087.730398,70814.174111,95947.091314,5621.703211,48954.93564,48626.256108,84823.889868,34343.016848,25878.11514,55964.135511,329482.609718,261821.140588,27594.351745,17247.876124
4,2021-11-05,28152.93854,34873.93908,59968.39316,143505.827198,108761.777883,18078.266972,46975.701291,84943.135732,100294.829339,685402.4236,50348.659384,233688.045215,33643.680824,58191.586195,17192.256594,2236.630157,25921.679527,5216.702491,327745.546412,50501.097623,13378.024398,32872.866374,9501.589094,7800.150194,30279.052101,149416.844286,47145.706758,85731.558816,44828.752651,254156.414269,287638.084087,83559.850886,103112.470363,33162.056551,29015.400135,33867.449804,152944.903061,209140.389323,97780.940236,...,40432.100709,106121.973127,282122.850831,12789.420322,166460.106106,181139.11733,7065.441252,7196.216243,34048.214752,13228.653546,75711.921502,49061.452753,53628.032621,26138.268111,294423.364642,616453.553939,700608.156637,18355.542676,56090.585285,74028.329752,28523.68506,117919.486428,38392.174899,13576.63585,3434.621123,37920.82663,18303.427202,70757.778479,96616.252157,5641.797195,49427.019462,47063.105078,86397.651814,34062.808374,26472.657621,55323.587424,321108.356663,264131.897754,27408.36665,17492.773824
5,2021-11-29,27919.530611,34687.673458,60773.779528,142621.815394,104901.698658,16669.447967,47219.595113,85236.83317,103490.352393,690833.776141,50608.517303,237263.538679,34034.674854,57893.438176,17025.323281,2225.505721,26266.558909,5094.280802,328236.605306,49753.544422,13628.714811,31259.722365,9667.113863,7853.568107,30476.119968,145414.530997,47710.122653,85002.108239,44704.138686,249051.995178,285145.860714,82930.255665,103138.716096,32955.111852,29808.904546,34967.536448,147544.524471,209546.723955,96842.267858,...,41762.934054,106288.779582,286529.154325,12846.315739,161730.681424,181860.643597,7180.723244,7174.533233,34407.665139,13413.322949,79037.338507,49126.852192,52645.835387,25575.906455,284001.394987,603434.766009,709297.42818,20860.673495,54723.321055,70839.068455,27124.841285,116936.831486,37104.495707,13232.112864,3462.162648,38897.399454,18242.086113,67178.399306,94416.390435,5647.319175,49749.405974,51984.322942,84384.021784,36846.592704,25721.026664,53328.350326,336697.743579,262257.538308,27176.08509,17382.219194
6,2021-11-30,28750.750484,35032.651375,60312.130021,143012.927861,107216.342323,17001.594758,46672.351191,85360.327648,102788.687368,694357.607135,50760.18054,236510.356775,34526.198608,57809.78502,17311.490454,2259.170432,26465.064492,5142.527468,330851.115113,49866.137582,13731.344563,32405.316597,9633.518674,7831.761311,30449.44253,145316.262523,49093.785149,85579.601582,45043.820372,251759.843143,287030.385665,83929.784768,102997.278269,33270.781098,29327.755956,34527.741215,152186.925769,209302.264708,96796.209371,...,40157.848296,106159.267737,282016.978244,12804.314052,164926.272981,180775.713108,7172.534157,7194.82504,34284.494575,13325.025021,78309.497631,49637.073961,51197.91245,26374.913621,284798.925527,610662.960773,699009.826053,20849.846866,54547.22124,70040.400968,27079.95456,116522.742088,36807.009328,13396.197668,3388.393992,38301.220896,18175.749848,68204.295768,94506.028474,5630.088053,48923.253693,51539.56413,84739.157977,35258.227509,25330.52806,53645.466661,335662.149461,264562.230652,27415.549191,17338.522537
7,2021-12-01,28858.095631,34995.888574,60241.47041,145626.792237,111192.733424,17665.577952,45757.683516,85665.326378,102943.419081,694827.202108,50596.013731,234280.033838,34129.464791,58104.951715,17234.8186,2245.086944,26338.545166,5173.518369,328934.025155,50223.673421,13688.80014,32060.189467,9610.962131,7839.782996,30364.057367,149152.562966,48050.384365,85430.587779,44885.403954,250800.226652,286120.928746,83566.02193,103669.597561,33162.569257,29271.435795,34294.536311,154002.307421,208574.18041,96764.043484,...,39434.845888,106019.07134,282186.974336,12767.143799,164514.051335,178267.792981,7036.641965,7215.058888,34165.681853,13226.270003,77853.29654,49205.783022,52259.149146,25838.476183,286759.470513,612224.424643,691370.129662,20483.53137,54650.48103,71755.113819,27817.495081,116453.800867,37902.994878,13510.141668,3416.103903,37753.518026,18043.378263,65758.000414,97335.266225,5616.633336,48834.428543,49341.153729,85450.80606,35320.479243,25623.131825,55136.12148,329167.718872,264623.119599,27466.446666,17286.577226
8,2021-12-02,28901.301911,34866.098057,59701.569734,145351.536595,109530.741544,17846.022961,46380.15235,85494.864447,99958.476851,688281.852061,50551.637343,230973.448553,33921.763141,58495.140974,17238.604612,2234.132431,26384.259775,5206.594805,328556.46616,50030.369371,13309.767872,32647.123786,9587.939751,7798.041681,30346.101706,149398.740182,47464.822349,85208.109405,44998.394494,253128.475096,287592.756386,83489.183094,103787.428025,33176.524891,29584.105076,34234.768276,154678.387112,208781.238048,97326.657306,...,39758.779837,106161.069318,283996.725855,12785.239886,165917.165306,180194.33423,7062.703656,7270.073959,34146.68156,13162.818238,80730.454573,48069.374015,52718.104309,26248.727506,293741.541632,617095.75446,697496.128623,20121.939941,55091.119613,73514.621678,27990.535454,117048.190663,38841.404051,13536.127644,3397.884814,37724.109694,18087.730398,70814.174111,95947.091314,5621.703211,48954.93564,48626.256108,84823.889868,34343.016848,25878.11514,55964.135511,329482.609718,261821.140588,27594.351745,17247.876124
9,2021-12-03,28152.93854,34873.93908,59968.39316,143505.827198,108761.777883,18078.266972,46975.701291,84943.135732,100294.829339,685402.4236,50348.659384,233688.045215,33643.680824,58191.586195,17192.256594,2236.630157,25921.679527,5216.702491,327745.546412,50501.097623,13378.024398,32872.866374,9501.589094,7800.150194,30279.052101,149416.844286,47145.706758,85731.558816,44828.752651,254156.414269,287638.084087,83559.850886,103112.470363,33162.056551,29015.400135,33867.449804,152944.903061,209140.389323,97780.940236,...,40432.100709,106121.973127,282122.850831,12789.420322,166460.106106,181139.11733,7065.441252,7196.216243,34048.214752,13228.653546,75711.921502,49061.452753,53628.032621,26138.268111,294423.364642,616453.553939,700608.156637,18355.542676,56090.585285,74028.329752,28523.68506,117919.486428,38392.174899,13576.63585,3434.621123,37920.82663,18303.427202,70757.778479,96616.252157,5641.797195,49427.019462,47063.105078,86397.651814,34062.808374,26472.657621,55323.587424,321108.356663,264131.897754,27408.36665,17492.773824
