In [15]:
import pandas as pd
import numpy as np
import os
import FinanceDataReader as fdr
from sklearn.linear_model import LinearRegression
from tqdm import tqdm

In [7]:
path = './open'
list_name = 'Stock_list.csv'
sample_name = 'sample_submision.csv'

stock_list = pd.read_csv(os.path.join(path, list_name))
stock_list['종목코드'] = stock_list['종목코드'].apply(lambda x : str(x).zfill(6))
stock_list

Unnamed: 0,종목명,종목코드,상장시장
0,삼성전자,005930,KOSPI
1,SK하이닉스,000660,KOSPI
2,NAVER,035420,KOSPI
3,카카오,035720,KOSPI
4,삼성바이오로직스,207940,KOSPI
...,...,...,...
371,더네이쳐홀딩스,298540,KOSDAQ
372,코엔텍,029960,KOSDAQ
373,원익홀딩스,030530,KOSDAQ
374,웹케시,053580,KOSDAQ


In [8]:
start_date = '20210104'
end_date = '20210813'

start_weekday = pd.to_datetime(start_date).weekday()
max_weeknum = pd.to_datetime(end_date).strftime('%V')
Business_days = pd.DataFrame(pd.date_range(start_date, end_date, freq = 'B'), columns = ['Date'])

print(f'WEEKDAY of "start_date" : {start_weekday}')
print(f'NUM of WEEKS to "end_date" : {max_weeknum}')
print(f'HOW MANY "Business_days" : {Business_days.shape}', )
display(Business_days.head())

WEEKDAY of "start_date" : 0
NUM of WEEKS to "end_date" : 32
HOW MANY "Business_days" : (160, 1)


Unnamed: 0,Date
0,2021-01-04
1,2021-01-05
2,2021-01-06
3,2021-01-07
4,2021-01-08


In [9]:
sample_code = stock_list.loc[0,'종목코드']

sample = fdr.DataReader(sample_code, start = start_date, end = end_date)[['Close']].reset_index()
sample = pd.merge(Business_days, sample, how = 'outer')
sample['weekday'] = sample.Date.apply(lambda x : x.weekday())
sample['weeknum'] = sample.Date.apply(lambda x : x.strftime('%V'))
sample.Close = sample.Close.ffill()
sample = pd.pivot_table(data = sample, values = 'Close', columns = 'weekday', index = 'weeknum')
sample.head()

weekday,0,1,2,3,4
weeknum,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,83000.0,83900.0,82200.0,82900.0,88800.0
2,91000.0,90600.0,89700.0,89700.0,88000.0
3,85000.0,87000.0,87200.0,88100.0,86800.0
4,89400.0,86700.0,85600.0,83700.0,82000.0
5,83000.0,84400.0,84600.0,82500.0,83500.0


In [10]:
model = LinearRegression()

In [11]:
x = sample.iloc[0:-2].to_numpy()
x.shape

(30, 5)

In [12]:
y = sample.iloc[1:-1].to_numpy()
y_0 = y[:,0]
y_1 = y[:,1]
y_2 = y[:,2]
y_3 = y[:,3]
y_4 = y[:,4]

y_values = [y_0, y_1, y_2, y_3, y_4]

In [13]:
# 7월 마지막 주 데이터 (31주차 데이터)
x_public = sample.iloc[-2].to_numpy()

In [16]:
predictions = []
for y_value in y_values :
    model.fit(x,y_value)
    prediction = model.predict(np.expand_dims(x_public,0))
    predictions.append(prediction[0])
predictions

[81651.47003256956,
 80637.66402016714,
 80063.78403724232,
 80261.05815274538,
 80566.37823375824]

In [17]:
sample.iloc[-1].values

array([81500., 80200., 78500., 77000., 74400.])

# 전체 모델링

In [18]:
sample_name = 'sample_submission.csv'
sample_submission = pd.read_csv(os.path.join(path,sample_name))

In [19]:
model = LinearRegression()
for code in tqdm(stock_list['종목코드'].values):
    data = fdr.DataReader(code, start = start_date, end = end_date)[['Close']].reset_index()
    data = pd.merge(Business_days, data, how = 'outer')
    data['weekday'] = data.Date.apply(lambda x : x.weekday())
    data['weeknum'] = data.Date.apply(lambda x : x.strftime('%V'))
    data.Close = data.Close.ffill()
    data = pd.pivot_table(data = data, values = 'Close', columns = 'weekday', index = 'weeknum')
    
    x = data.iloc[0:-2].to_numpy()
    y = data.iloc[1:-1].to_numpy()
    y_0 = y[:,0]
    y_1 = y[:,1]
    y_2 = y[:,2]
    y_3 = y[:,3]
    y_4 = y[:,4]

    y_values = [y_0, y_1, y_2, y_3, y_4]
    x_public = data.iloc[-2].to_numpy()
    
    predictions = []
    for y_value in y_values :
        model.fit(x,y_value)
        prediction = model.predict(np.expand_dims(x_public,0))
        predictions.append(prediction[0])
    sample_submission.loc[:,code] = predictions * 2
sample_submission.isna().sum().sum()

100%|██████████| 376/376 [01:05<00:00,  5.77it/s]


0

In [20]:
sample_submission.to_csv('BASELINE_Linear.csv', index = False)

In [21]:
sample_submission

Unnamed: 0,Day,000060,000080,000100,000120,000150,000240,000250,000270,000660,...,330860,336260,336370,347860,348150,348210,352820,357780,363280,950130
0,2021-08-09,25088.774605,34714.183395,62354.858793,174148.357879,99645.99144,18450.666819,57349.368469,85569.646903,118405.569322,...,54361.462752,50296.960691,62560.506714,41039.206014,31038.382064,60263.374643,308513.364845,334624.895895,31542.191165,27997.595012
1,2021-08-10,25310.344434,34906.631547,62421.045224,175344.854125,99456.81102,18545.155882,57910.759392,85674.60676,117799.663627,...,53713.801608,50024.815124,61815.110556,38727.25993,31501.793919,60965.999742,308613.184035,329380.415292,31296.168187,27805.80861
2,2021-08-11,25477.476177,34760.123065,62327.310315,179012.58717,98170.364318,18504.002732,58975.654433,85955.600532,117706.256291,...,54992.782414,50420.069668,61470.855325,38131.961839,31712.51667,61642.918332,307185.993759,329177.608851,31421.097728,28045.7526
3,2021-08-12,26173.790699,34920.540019,62012.861722,179203.939916,94559.29081,18559.460101,59168.801436,85695.685306,116628.596503,...,53216.691798,50168.091037,62373.488049,37203.504309,31618.857532,62218.118154,308168.046414,332080.961423,31095.70033,27760.830942
4,2021-08-13,26294.61519,35050.362444,62206.564371,176981.309028,94130.395799,18470.399872,58578.363991,85917.695693,117312.379453,...,53570.43976,50379.607826,62252.456624,37036.563545,31900.015153,61979.602842,306452.057782,332925.043789,31474.534603,27778.484687
5,2021-08-30,25088.774605,34714.183395,62354.858793,174148.357879,99645.99144,18450.666819,57349.368469,85569.646903,118405.569322,...,54361.462752,50296.960691,62560.506714,41039.206014,31038.382064,60263.374643,308513.364845,334624.895895,31542.191165,27997.595012
6,2021-08-31,25310.344434,34906.631547,62421.045224,175344.854125,99456.81102,18545.155882,57910.759392,85674.60676,117799.663627,...,53713.801608,50024.815124,61815.110556,38727.25993,31501.793919,60965.999742,308613.184035,329380.415292,31296.168187,27805.80861
7,2021-09-01,25477.476177,34760.123065,62327.310315,179012.58717,98170.364318,18504.002732,58975.654433,85955.600532,117706.256291,...,54992.782414,50420.069668,61470.855325,38131.961839,31712.51667,61642.918332,307185.993759,329177.608851,31421.097728,28045.7526
8,2021-09-02,26173.790699,34920.540019,62012.861722,179203.939916,94559.29081,18559.460101,59168.801436,85695.685306,116628.596503,...,53216.691798,50168.091037,62373.488049,37203.504309,31618.857532,62218.118154,308168.046414,332080.961423,31095.70033,27760.830942
9,2021-09-03,26294.61519,35050.362444,62206.564371,176981.309028,94130.395799,18470.399872,58578.363991,85917.695693,117312.379453,...,53570.43976,50379.607826,62252.456624,37036.563545,31900.015153,61979.602842,306452.057782,332925.043789,31474.534603,27778.484687
