In [1]:
import os
from pathlib import Path

import pandas as pd
from datetime import datetime, timedelta

import sqlite3

from pykrx import stock

In [2]:
# Set Path
folderPath = Path.cwd().joinpath('DataStock')
os.chdir(folderPath)

## **Stock List DB**

### **1. Definition**

In [4]:
def createTickerDf(day) :
    tickers = stock.get_market_ticker_list(day, market="KOSPI")   # KOSPI / KOSDAQ / KONEX / ALL / default = KOSPI
    df = pd.DataFrame(tickers, columns=['ticker'])
    df = df.assign(name = df['ticker'].apply(lambda x: stock.get_market_ticker_name(x)))
    df = df.assign(key=df.apply(lambda x: x.loc['ticker']+x.loc['name'], axis=1))
    return df

In [5]:
def filterDfs(filteringDf, filteredDf) :
    filteringkeys = filteringDf['key'].tolist()
    con = filteredDf['key'].isin(filteringkeys)
    uniqueDf = filteredDf.loc[~con]
    return uniqueDf

In [6]:
def differenceDf(oldDay, newDay) :
    oldDf = createTickerDf(oldDay)
    newDf = createTickerDf(newDay)
    delistedDf = filterDfs(newDf, oldDf)
    
    listedDf = filterDfs(oldDf, newDf)
    
    return delistedDf, listedDf

In [7]:
def creatingTickerInfoDf(day) :
    tickers = stock.get_market_ticker_list(day, market="KOSPI")   # KOSPI / KOSDAQ / KONEX / ALL / default = KOSPI
    df = pd.DataFrame(tickers, columns=['ticker'])
    df = df.assign(name = df['ticker'].apply(lambda x: stock.get_market_ticker_name(x)))
    df = df.assign(key=df.apply(lambda x: x.loc['ticker']+x.loc['name'], axis=1))
    df = df.assign(firstDay=day)
    df = df.assign(endDay=day)
    return df

In [8]:
def updateTickerInfoDf(stockListDf, delistedDf, listedDf, oldDay, newDay) :
    delistedKey = delistedDf['key'].tolist()
    con1 = stockListDf['key'].isin(delistedKey)
    con2 = stockListDf['endDay'].isin([oldDay])
    index = stockListDf.loc[(~con1)&(con2)].index
    stockListDf.loc[index, 'endDay'] = newDay
    
    listedDf = listedDf.assign(firstDay=newDay)
    listedDf = listedDf.assign(endDay=newDay)
    
    stockListDf = pd.concat([stockListDf, listedDf])
    stockListDf = stockListDf.reset_index(drop=True)
    
    return stockListDf

In [9]:
def getDayList(startDay, endDay):
    # Day List
    start = datetime.strptime(startDay, "%Y%m%d")
    end = datetime.strptime(endDay, "%Y%m%d")
    date_generated = [start + timedelta(days=x) for x in range(0, (end-start).days+1)]

    dayList = []
    for date in date_generated:
        dayList.append(date.strftime("%Y%m%d"))

    return dayList

### **2. Creating DB : The Frist Day**

#### 2.1. Ready for Stage 

In [12]:
dayList = getDayList('20100101', '20211008')

for k in range(0, len(dayList)) :
    print(dayList[k])
    if k == 0 :
        stockListDf = creatingTickerInfoDf(dayList[k])
    else :
        oldDay = dayList[k-1]
        newDay = dayList[k]
        delistedDf, listedDf = differenceDf(oldDay, newDay)
        stockListDf = updateTickerInfoDf(stockListDf, delistedDf, listedDf, oldDay, newDay)
stockListDf['firstDay'] = pd.to_datetime(stockListDf['firstDay'], format="%Y%m%d") # str(format 20211127) -> datetime
stockListDf['endDay'] = pd.to_datetime(stockListDf['endDay'], format="%Y%m%d") # str(format 20211127) -> datetime
stockListDf['firstDay']=stockListDf['firstDay'].apply(lambda x: x.strftime(format='%Y-%m-%d')) # datetime -> str (format 2021-11-27)
stockListDf['endDay']=stockListDf['endDay'].apply(lambda x: x.strftime(format='%Y-%m-%d'))# datetime -> str (format 2021-11-27)
stockListDf

20100101
20100102
20100103
20100104
20100105
20100106
20100107
20100108
20100109
20100110
20100111
20100112
20100113
20100114
20100115
20100116
20100117
20100118
20100119
20100120
20100121
20100122
20100123
20100124
20100125
20100126
20100127
20100128
20100129
20100130
20100131
20100201
20100202
20100203
20100204
20100205
20100206
20100207
20100208
20100209
20100210
20100211
20100212
20100213
20100214
20100215
20100216
20100217
20100218
20100219
20100220
20100221
20100222
20100223
20100224
20100225
20100226
20100227
20100228
20100301
20100302
20100303
20100304
20100305
20100306
20100307
20100308
20100309
20100310
20100311
20100312
20100313
20100314
20100315
20100316
20100317
20100318
20100319
20100320
20100321
20100322
20100323
20100324
20100325
20100326
20100327
20100328
20100329
20100330
20100331
20100401
20100402
20100403
20100404
20100405
20100406
20100407
20100408
20100409
20100410
20100411
20100412
20100413
20100414
20100415
20100416
20100417
20100418
20100419
20100420
20100421
2

Unnamed: 0,ticker,name,key,firstDay,endDay
0,004560,현대비앤지스틸,004560현대비앤지스틸,2010-01-01,2021-10-08
1,004565,현대비앤지스틸우,004565현대비앤지스틸우,2010-01-01,2021-10-08
2,001460,BYC,001460BYC,2010-01-01,2021-10-08
3,001465,BYC우,001465BYC우,2010-01-01,2021-10-08
4,084680,이월드,084680이월드,2010-01-01,2021-10-08
...,...,...,...,...,...
1180,139990,아주스틸,139990아주스틸,2021-08-20,2021-10-08
1181,377190,디앤디플랫폼리츠,377190디앤디플랫폼리츠,2021-08-27,2021-10-08
1182,271940,일진하이솔루스,271940일진하이솔루스,2021-09-01,2021-10-08
1183,395400,SK리츠,395400SK리츠,2021-09-14,2021-10-08


In [13]:
folderPath

WindowsPath('c:/Users/ajcltm/PycharmProjects/DataAPI/DataStock')

In [15]:
stockListDf['firstDay'] = pd.to_datetime(stockListDf['firstDay'], format='%Y-%m-%d') # str(format 2021-11-27) -> datetime
stockListDf['endDay'] = pd.to_datetime(stockListDf['endDay'], format='%Y-%m-%d') # str(format 2021-11-27) -> datetime

stockListDf.to_parquet(folderPath/'stockListDf.parquet')

#### 2.2. Creating SQLite DB

In [19]:
# create DB
conn = sqlite3.connect("stockListDB.db") 
cur = conn.cursor()
conn.execute(
    'CREATE TABLE StockListDB (id INTEGER PRIMARY KEY AUTOINCREMENT, ticker TEXT, name TEXT, firstDay TIMESTAMP, endDay TIMESTAMP)'
)
conn.commit()
conn.close()

#### 2.3. insert data into DB

In [17]:
# see the Staged df row

stockListDf['firstDay']=stockListDf['firstDay'].apply(lambda x: x.strftime(format='%Y-%m-%d')) # datetime -> str (format 2021-11-27)
stockListDf['endDay']=stockListDf['endDay'].apply(lambda x: x.strftime(format='%Y-%m-%d'))# datetime -> str (format 2021-11-27)

for row in stockListDf.itertuples():
    print(row[1], row[2], row[3], row[4], row[5], sep='/')

004560/현대비앤지스틸/004560현대비앤지스틸/2010-01-01/2021-10-08
004565/현대비앤지스틸우/004565현대비앤지스틸우/2010-01-01/2021-10-08
001460/BYC/001460BYC/2010-01-01/2021-10-08
001465/BYC우/001465BYC우/2010-01-01/2021-10-08
084680/이월드/084680이월드/2010-01-01/2021-10-08
001040/CJ/001040CJ/2010-01-01/2021-10-08
079160/CJ CGV/079160CJ CGV/2010-01-01/2021-10-08
001049/CJ3우B/001049CJ3우B/2010-01-01/2010-01-12
001045/CJ우/001045CJ우/2010-01-01/2021-10-08
097950/CJ제일제당/097950CJ제일제당/2010-01-01/2021-10-08
097959/CJ제일제당 3우B/097959CJ제일제당 3우B/2010-01-01/2010-01-12
097955/CJ제일제당 우/097955CJ제일제당 우/2010-01-01/2021-10-08
069730/DSR제강/069730DSR제강/2010-01-01/2021-10-08
017940/E1/017940E1/2010-01-01/2021-10-08
007700/F&F홀딩스/007700F&F홀딩스/2010-01-01/2021-10-08
114090/GKL/114090GKL/2010-01-01/2021-10-08
078930/GS/078930GS/2010-01-01/2021-10-08
006360/GS건설/006360GS건설/2010-01-01/2021-10-08
001250/GS글로벌/001250GS글로벌/2010-01-01/2021-10-08
078935/GS우/078935GS우/2010-01-01/2021-10-08
035000/지투알/035000지투알/2010-01-01/2021-10-08
001500/현대차증권/001500현대차증권/20

In [22]:
# Insert data into DB
connect = sqlite3.connect('./StockListDB.db')
cursor = connect.cursor()
for row in stockListDf.itertuples():
    sql = "insert into StockListDB (ticker, name, firstDay, endDay) values (?, ?, ?, ?)"
    cursor.execute(sql, (row[1], row[2], row[4], row[5]))
connect.commit()
connect.close()

#### 2.4. Save DB as the format of Parquet

In [None]:
stockListDf['firstDay'] = pd.to_datetime(stockListDf['firstDay'], format='%Y-%m-%d') # str(format 2021-11-27) -> datetime
stockListDf['endDay'] = pd.to_datetime(stockListDf['endDay'], format='%Y-%m-%d') # str(format 2021-11-27) -> datetime
stockListDf.to_parquet('./stockListDB.parquet')

### **3. Update DB**

#### 3.1. Load The Old df (from DB)

In [14]:
# query the exited DB and get the python dataframe
conn = sqlite3.connect('./StockListDB.db', isolation_level=None)
stockListDf = pd.read_sql_query('SELECT * FROM StockListDB', conn)
stockListDf = stockListDf.assign(key=stockListDf.apply(lambda x: x.loc['ticker']+x.loc['name'], axis=1))

stockListDf['firstDay'] = pd.to_datetime(stockListDf['firstDay'], format='%Y-%m-%d') # str(format 2021-11-27) -> datetime
stockListDf['endDay'] = pd.to_datetime(stockListDf['endDay'], format='%Y-%m-%d') # str(format 2021-11-27) -> datetime
stockListDf['firstDay'] = stockListDf['firstDay'].apply(lambda x: x.strftime('%Y%m%d')) # datetime -> str (format 20211127)
stockListDf['endDay'] = stockListDf['endDay'].apply(lambda x: x.strftime('%Y%m%d')) # datetime -> str (format 20211127)

stockListDf = stockListDf.drop('id', axis=1)

conn.close()
stockListDf

Unnamed: 0,ticker,name,firstDay,endDay,key
0,004560,현대비앤지스틸,20100101,20211008,004560현대비앤지스틸
1,004565,현대비앤지스틸우,20100101,20211008,004565현대비앤지스틸우
2,001460,BYC,20100101,20211008,001460BYC
3,001465,BYC우,20100101,20211008,001465BYC우
4,084680,이월드,20100101,20211008,084680이월드
...,...,...,...,...,...
1180,139990,아주스틸,20210820,20211008,139990아주스틸
1181,377190,디앤디플랫폼리츠,20210827,20211008,377190디앤디플랫폼리츠
1182,271940,일진하이솔루스,20210901,20211008,271940일진하이솔루스
1183,395400,SK리츠,20210914,20211008,395400SK리츠


#### 3.2. Update the Df 

In [15]:
dayList = getDayList('20211008', '20211011')  # ex) if the last day of the exited df  : 20211008 -> inset(20211008, 20211011) 

for k in range(1, len(dayList)) :
    print(dayList[k])
    oldDay = dayList[k-1]
    newDay = dayList[k]
    delistedDf, listedDf = differenceDf(oldDay, newDay)
    stockListDf = updateTickerInfoDf(stockListDf, delistedDf, listedDf, oldDay, newDay)
stockListDf['firstDay'] = pd.to_datetime(stockListDf['firstDay'], format="%Y%m%d") # str(format 20211127) -> datetime
stockListDf['endDay'] = pd.to_datetime(stockListDf['endDay'], format="%Y%m%d") # str(format 20211127) -> datetime
stockListDf['firstDay']=stockListDf['firstDay'].apply(lambda x: x.strftime(format='%Y-%m-%d')) # datetime -> str (format 2021-11-27)
stockListDf['endDay']=stockListDf['endDay'].apply(lambda x: x.strftime(format='%Y-%m-%d'))# datetime -> str (format 2021-11-27)
stockListDf

20211009
20211010
20211011


Unnamed: 0,ticker,name,firstDay,endDay,key
0,004560,현대비앤지스틸,2010-01-01,2021-10-11,004560현대비앤지스틸
1,004565,현대비앤지스틸우,2010-01-01,2021-10-11,004565현대비앤지스틸우
2,001460,BYC,2010-01-01,2021-10-11,001460BYC
3,001465,BYC우,2010-01-01,2021-10-11,001465BYC우
4,084680,이월드,2010-01-01,2021-10-11,084680이월드
...,...,...,...,...,...
1180,139990,아주스틸,2021-08-20,2021-10-11,139990아주스틸
1181,377190,디앤디플랫폼리츠,2021-08-27,2021-10-11,377190디앤디플랫폼리츠
1182,271940,일진하이솔루스,2021-09-01,2021-10-11,271940일진하이솔루스
1183,395400,SK리츠,2021-09-14,2021-10-11,395400SK리츠


In [16]:
# 1. drop the existed DB -> 2. create all new DB -> 3. insert data into DB
conn = sqlite3.connect("StockListDB.db") 
cur = conn.cursor()
conn.execute('DROP TABLE StockListDB')
conn.execute(
    'CREATE TABLE StockListDB (id INTEGER PRIMARY KEY AUTOINCREMENT, ticker TEXT, name TEXT, firstDay TIMESTAMP, endDay TIMESTAMP)'
)
for row in stockListDf.itertuples():
    sql = "insert into StockListDB (ticker, name, firstDay, endDay) values (?, ?, ?, ?)"
    conn.execute(sql, (row[1], row[2], row[3], row[4]))
conn.commit()
conn.close()

#### 3.3. Save DB as the format of Parquet

In [17]:
stockListDf['firstDay'] = pd.to_datetime(stockListDf['firstDay'], format='%Y-%m-%d') # str(format 2021-11-27) -> datetime
stockListDf['endDay'] = pd.to_datetime(stockListDf['endDay'], format='%Y-%m-%d') # str(format 2021-11-27) -> datetime
stockListDf.to_parquet('./stockListDB.parquet')

### **4. Query DB**

In [18]:
conn = sqlite3.connect('./StockListDB.db', isolation_level=None)
stockListDf = pd.read_sql_query('SELECT * FROM StockListDB', conn)
stockListDf['firstDay'] = pd.to_datetime(stockListDf['firstDay'], format='%Y-%m-%d') # str(format 2021-11-27) -> datetime
stockListDf['endDay'] = pd.to_datetime(stockListDf['endDay'], format='%Y-%m-%d') # str(format 2021-11-27) -> datetime
conn.close()
stockListDf

Unnamed: 0,id,ticker,name,firstDay,endDay
0,1,004560,현대비앤지스틸,2010-01-01,2021-10-11
1,2,004565,현대비앤지스틸우,2010-01-01,2021-10-11
2,3,001460,BYC,2010-01-01,2021-10-11
3,4,001465,BYC우,2010-01-01,2021-10-11
4,5,084680,이월드,2010-01-01,2021-10-11
...,...,...,...,...,...
1180,1181,139990,아주스틸,2021-08-20,2021-10-11
1181,1182,377190,디앤디플랫폼리츠,2021-08-27,2021-10-11
1182,1183,271940,일진하이솔루스,2021-09-01,2021-10-11
1183,1184,395400,SK리츠,2021-09-14,2021-10-11
