In [2]:
from sqlalchemy import create_engine
from tqdm import tqdm
import pandas as pd
import numpy as np
import pymysql

In [5]:
# mysql connect하기 위한 아이디 비밀번호 포트 데이터베이스 등록 및 conn 리턴
def sqlalchemy_connect_ip(ip_address, db_name):
    engine = create_engine("mysql+pymysql://admin:"
                +"big15" # user password
                +"@{0}:3306/{1}?charset=utf8".format(ip_address, db_name)
                , encoding='utf8')
    
    return engine.connect()

# mysql connect하기 위한 아이디 비밀번호 포트 데이터베이스 등록 및 conn 리턴
def get_pymysql_connection(ip_address, db_name):

    conn = pymysql.connect(host=ip_address, user='admin', password='big15'
                        , db=db_name, charset='utf8')

    return conn

#DB 내 존재하는 테이블(종목) 리스트 추출
def get_pymysql_stock_list(conn, db_name):

    # 원하는 폴더의 테이블(종목) 추출
    sql = "SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{0}'".format(db_name)

    with conn:
        with conn.cursor() as cur:
            cur.execute(sql)
            result = [item[0] for item in cur.fetchall()]
            cur.close()

            return result
               
# 병합을 위해 날짜만을 가지고 있는 데이터 프레임 생성
def get_empty_day_df(sqlalchemy_conn):
     
    sql = "SELECT * FROM investing_data.aedkrw내역 where 날짜 > 20210101 and 날짜 < 20211231"
            
    result = sqlalchemy_conn.execute(sql)
    empty_day_df = pd.DataFrame(result.fetchall())
    
    empty_day_df = empty_day_df.set_index('날짜')
    empty_day_df = empty_day_df.notnull().replace(True, np.NaN)

    empty_day_df = empty_day_df.reset_index()
    empty_day_df = empty_day_df.drop(columns=['AEDKRW내역_종가','AEDKRW내역_오픈','AEDKRW내역_고가'
                                      ,'AEDKRW내역_저가','AEDKRW내역_거래량','AEDKRW내역_변동'])
    
    return empty_day_df


# investing Data로 이루어진 데이터 프레임 추출
def get_sqlalchemy_investing_df(empty_day_df, investing_table_list):
    
    investing_df = pd.DataFrame(empty_day_df)
    investing_df['날짜'] = investing_df['날짜'].astype(str).astype(int) 
    for table in investing_table_list:
        conn = sqlalchemy_connect_ip('192.168.50.123', 'investing_data')
        
        sql = "SELECT * FROM investing_data.`{0}` where 날짜 > 20210101 and 날짜 < 20211231".format(table)
        result = conn.execute(sql)
        table_df = pd.DataFrame(result.fetchall())
        table_df['날짜'] = table_df['날짜'].astype(str).astype(int)
        investing_df = pd.merge(investing_df, table_df,on='날짜', how='left')
        
    for c in list(investing_df.columns):
        if c.split('_')[-1] == '거래량' or c.split('_')[-1] == '변동':
            investing_df[c] = investing_df[c].fillna(0)
        else:
            investing_df[c] = investing_df[c].fillna(method='bfill')
            
    return investing_df  

# stock df와 investing df를 병합
def get_sqlalchemy_stock_investing_merge_df(conn, stock_table_list, investing_df):
    
    complete_df = pd.DataFrame()
    investing_df['날짜'] = investing_df['날짜'].astype(str).astype(int) # 날짜 타입 int로 통일
    for table in tqdm(stock_table_list):
        sql = "SELECT * FROM stock_info.`{0}` where 날짜 > 20210101 and 날짜 < 20211231".format(table)
        table_data = conn.execute(sql)
        stock_df = pd.DataFrame(table_data.fetchall()) # DB내 테이블을 DF로 변환
        
        stock_df['날짜'] = stock_df['날짜'].astype(str).astype(int) # 날짜 타입 int로 통일
        merge_df = pd.merge(stock_df, investing_df, on='날짜') # stock df 와 investing df 를 날짜 기준으로 merge

        complete_df = pd.concat([complete_df, merge_df], axis=0) # merge_Df가 된 
        
    conn.close()
    return complete_df     


In [6]:
pymysql_conn = get_pymysql_connection('192.168.50.123', 'investing_data')
sqlalchemy_conn = sqlalchemy_connect_ip('192.168.50.123', 'investing_data')
investing_table_list = get_pymysql_stock_list(pymysql_conn, 'investing_data')
empty_day_df = get_empty_day_df(sqlalchemy_conn)
investing_df = get_sqlalchemy_investing_df(empty_day_df, investing_table_list)

In [7]:
# investing_df.to_csv('../cor/investing_df_update.csv', encoding='utf-8-sig')

In [8]:
pymysql_conn = get_pymysql_connection('192.168.50.123', 'investing_data')
sqlalchemy_conn = sqlalchemy_connect_ip('192.168.50.123', 'investing_data')
stock_table_list = ['005930'] # ,'373220','207940','000660','051910','247540','091990','066970','293490','028300'
complete_df = get_sqlalchemy_stock_investing_merge_df(sqlalchemy_conn, stock_table_list, investing_df)
# for code in stock_table_list:
#     pymysql_conn = get_pymysql_connection('192.168.50.123', 'investing_data')
#     sqlalchemy_conn = sqlalchemy_connect_ip('192.168.50.123', 'investing_data')
#     complete_df = get_sqlalchemy_stock_investing_merge_df(sqlalchemy_conn, code, investing_df)
#     complete_df.to_pickle(f'../colume_pickle/6개월_choice_10개_대형주_update_{code}.pkl')
    
# ['003070','006220','014580','118990','027410','000060','008560','003520','067290','064350']
# get_pymysql_stock_list(pymysql_conn, 'stock_info')

100%|██████████| 1/1 [00:06<00:00,  6.86s/it]


In [9]:
complete_df.to_csv('./data/csv/005930.csv', encoding='utf-8-sig')