In [1]:
import mysql.connector
import pandas as pd
from sqlalchemy import create_engine
from tqdm.auto import tqdm
import time
import xml.etree.ElementTree as elemTree

In [2]:
csv_paths = [ 
  ('069500', r'..\data\kodex_200.csv'),
  ('114800', r'..\data\kodex_inverse.csv'),
  ('226490', r'..\data\kodex_kospi.csv'),
  ('001', r'..\data\kospi.csv'),
  ('201', r'..\data\kospi200.csv')
]

In [3]:
dfs = []
for st_code, csv_path in tqdm(csv_paths):
  df = pd.read_csv(csv_path, dtype={'체결시간':str})[['체결시간', '시가', '고가', '저가', '현재가', '거래량']]
  df.columns = ['dt', 'open', 'high', 'low', 'close', 'volume']
  df['dt'] = pd.to_datetime(df['dt'], format='%Y%m%d%H%M%S')
  df['st_code'] = st_code
  for col in ['open', 'high', 'low', 'close', 'volume']:
    df[col] = df[col].abs()
  dfs.append(df)
whole_df = pd.concat(dfs, ignore_index=True)

  0%|          | 0/5 [00:00<?, ?it/s]

# Insert 속도 테스트

In [4]:
tree = elemTree.parse(r'../config/.config.xml')
root = tree.getroot()
node_mysql = root.find('./DBMS/mysql')
config_db = {tag:node_mysql.find(tag).text for tag in ['host', 'user', 'password', 'database']}

In [9]:
db_engine = create_engine(f'mysql+mysqlconnector://{config_db["user"]}:{config_db["password"]}@{config_db["host"]}/{config_db["database"]}')

## SqlAlchemy + mysql8.0 + pd.DataFrame.to_sql()

SqlAlchemy + mysql8.0 + pd.DataFrame.to_sql()
- 2m 9s (1m 54s, 2m 2s, 2m 3s)

In [10]:
db_engine.execute('DROP TABLE IF EXISTS data_in_minute')

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x16d405e0>

In [11]:
table_query ='''
CREATE TABLE `data_in_minute` (
  `st_code` char(7) NOT NULL COMMENT '종목코드 (예: 005930)',
  `dt` datetime NOT NULL COMMENT '타임스탬프 (타임존 무시)',
  `open` int DEFAULT '0' COMMENT '시가',
  `high` int DEFAULT '0' COMMENT '고가',
  `low` int DEFAULT '0' COMMENT '저가',
  `close` int DEFAULT '0' COMMENT '종가',
  `volume` int DEFAULT '0' COMMENT '거래량',
  PRIMARY KEY (`st_code`,`dt`) COMMENT '종목코드, dt의 클러스터형 인덱스'
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3 COMMENT='분봉 데이터 (static)'
'''

In [12]:
db_engine.execute(table_query)

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x66f2c88>

In [13]:
whole_df.to_sql(name='data_in_minute', con=db_engine, if_exists='append', index=False)

-1

## mysql-connector-python

2m 56s, 3m 11s, 3m 10s, 3m 9s

In [14]:
cnx = mysql.connector.connect(
  user=config_db['user'], 
  password=config_db['password'], 
  host=config_db['host'], 
  database=config_db['database']
  )

In [15]:
cursor = cnx.cursor()

In [16]:
cursor.execute('DROP TABLE IF EXISTS data_in_minute')
cursor.execute(table_query)

In [17]:
add_minute_data = (
  "INSERT INTO data_in_minute "
  "(st_code, dt, open, high, low, close, volume) "
  "VALUES (%s, %s, %s, %s, %s, %s, %s)"
)

In [18]:
for i, row in whole_df.iterrows():
  cursor.execute(
    add_minute_data, 
    (row['st_code'], 
    row['dt'].to_pydatetime(), 
    row['open'], 
    row['high'], 
    row['low'], 
    row['close'], 
    row['volume'])
  )
cnx.commit()

cursor.close()

True