In [4]:
%load_ext autoreload
%autoreload
import time, datetime
import os
import pandas as pd

from utils import get_volumeKospiHistory, get_volumeKosdaqHistory, get_priceHistory

pd.set_option('display.max_colwidth', -1)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
path = "./Kospi/"
file_list = os.listdir(path)
kospi_file_list_csv = [file for file in file_list if file.endswith(".csv")]
# for v in kospi_file_list_csv:
#     print(v[:6] + v[7:-4])

In [6]:
path = "./Kosdaq/"
file_list = os.listdir(path)
kosdaq_file_list_csv = [file for file in file_list if file.endswith(".csv")]
# for v in kosdaq_file_list_csv:
#     print(v[:6] + v[7:-4])

In [7]:
code = "005930"
name = "삼성전자"
start_date = '2010-01-01'
end_date = '2019-12-31'

def getKospiMergedTab(code, name, start_date, end_date):
    vol = get_volumeKospiHistory(code, name, start_date, end_date).fillna(0)
    p = get_priceHistory(code,name, start_date, end_date).fillna(0)
    df = pd.concat([vol, p], axis=1, join='inner')
    
    return df

df008500merged = getKospiMergedTab(code, name, start_date, end_date)

In [8]:
def accumConsecutiveCount(df):
    df['accum'] = (df['NetBuyAmount_Inst'] > 0).astype(int)
    last_index = df.index[0]
    init_index = last_index + pd.DateOffset(1)
    for index, row in df[init_index:].iterrows():
        if df.loc[index].accum > 0:
            df.loc[index,'accum'] = df.loc[last_index, 'accum'] + 1
        last_index = index
    return df

df008500 = accumConsecutiveCount(df008500merged)

In [12]:
def computeRevenue(df):
    output_df = pd.DataFrame(columns=['매수일', '6th', '7th','8th','9th','10th','11th','12th','13th','14th','15th'])
    for i, _ in df[df['accum']>=5].iterrows():
        next_day = df.index.get_loc(i) + 1
        if (next_day+10) < df.shape[0]:                
            tmp_v = list()
            base_day = df.iloc[next_day-6].at['date'][0]
            tmp_v.append(base_day)
            base_price = df.iloc[next_day]['Open']
            for _ in range(10):
                close_price = df.iloc[next_day]['Close']
                revenue = ((close_price - base_price) * 100) / base_price
                tmp_v.append(revenue)
                next_day = next_day + 1
            tmp_series = pd.Series(tmp_v, index = output_df.columns)
            output_df = output_df.append(tmp_series, ignore_index=True)
    return output_df
df = computeRevenue(df008500)

file_path = './output/raw_' + code + "_" + name + ".csv"
df.to_csv(file_path, index = False, header=True)

In [13]:
def getSummary(code, name, df):
    output_df = pd.DataFrame(columns=['종목명','종목코드','항목', '6일째 수익률', '7일째 수익률','8일째 수익률','9일째 수익률','10일째 수익률','11일째 수익률','12일째 수익률','13일째 수익률','14일째 수익률','15일째 수익률'])
    tmpList = list()
    tmpList.append(code)
    tmpList.append(name)
    tmpList.append('MAX')
    for i in range(6, 16, 1):
        df_min = df[str(i)+'th'].max()
        tmpList.append(df_min)
    tmp_series = pd.Series(tmpList, index = output_df.columns)
    output_df = output_df.append(tmp_series, ignore_index=True)

    tmpList = list()
    tmpList.append(code)
    tmpList.append(name)
    tmpList.append('MIN')
    for i in range(6, 16, 1):
        df_max = df[str(i)+'th'].min()
        tmpList.append(df_max)
    tmp_series = pd.Series(tmpList, index = output_df.columns)
    output_df = output_df.append(tmp_series, ignore_index=True)

    tmpList = list()
    tmpList.append(code)
    tmpList.append(name)
    tmpList.append('Mean')
    for i in range(6, 16, 1):
        df_mean = df[str(i)+'th'].mean()
        tmpList.append(df_mean)
    tmp_series = pd.Series(tmpList, index = output_df.columns)
    output_df = output_df.append(tmp_series, ignore_index=True)

    tmpList = list()
    tmpList.append(code)
    tmpList.append(name)
    tmpList.append('Median')
    for i in range(6, 16, 1):
        df_median = df[str(i)+'th'].median()
        tmpList.append(df_median)
    tmp_series = pd.Series(tmpList, index = output_df.columns)
    output_df = output_df.append(tmp_series, ignore_index=True)

    tmpList = list()
    tmpList.append(code)
    tmpList.append(name)
    tmpList.append('Probability')
    for i in range(6, 16, 1):
        df_prob = df[df[str(i)+'th']>0].shape[0]/df.shape[0]
        tmpList.append(df_prob)
    tmp_series = pd.Series(tmpList, index = output_df.columns)
    output_df = output_df.append(tmp_series, ignore_index=True)

    return output_df

output_df = getSummary(code, name, df)
file_path = './output/summary_' + str(code) + "_" + name + ".csv"
output_df.to_csv(file_path, index = False, header=True)