In [9]:
from sqlalchemy import create_engine
from tqdm import tqdm
import pymysql
import pandas as pd
import numpy as np
import pickle
import csv
import os
import time

In [10]:
# mysql connect하기 위한 아이디 비밀번호 포트 데이터베이스 등록 및 conn 리턴
def sqlalchemy_connect_ip(ip_address, db_name):
    engine = create_engine("mysql+pymysql://admin:"
                +"big15" # user password
                +"@{0}:3306/{1}?charset=utf8".format(ip_address, db_name)
                , encoding='utf8')
    
    return engine.connect()


def get_pymysql_connection(ip_address, db_name):

    conn = pymysql.connect(host=ip_address, user='admin', password='big15'
                        , db=db_name, charset='utf8')

    return conn

def get_pymysql_stock_list(conn, db_name):

    # 현재 DB 내 존재하는 테이블(종목) 추출
    sql = "SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{0}'".format(db_name)

    with conn:
        with conn.cursor() as cur:
            cur.execute(sql)
            result = [item[0] for item in cur.fetchall()]
            cur.close()

            return result
               

def get_empty_day_df(sqlalchemy_conn):
     
    sql = "SELECT * FROM investing_data.aedkrw내역 where 날짜 > 20220430 and 날짜 < 20221122"
            
    result = sqlalchemy_conn.execute(sql)
    empty_day_df = pd.DataFrame(result.fetchall())
    
    empty_day_df = empty_day_df.set_index('날짜')
    empty_day_df = empty_day_df.notnull().replace(True, np.NaN)

    empty_day_df = empty_day_df.reset_index()
    empty_day_df = empty_day_df.drop(columns=['AEDKRW내역_종가','AEDKRW내역_오픈','AEDKRW내역_고가'
                                      ,'AEDKRW내역_저가','AEDKRW내역_거래량','AEDKRW내역_변동'])
    empty_day_df.to_pickle('./pickle/empty_day_df.pkl')
    
    return empty_day_df

def get_sqlalchemy_investing_data(day_df, conn, investing_table_list):
    
    investing_df = pd.DataFrame(day_df)
    for table in investing_table_list:
        conn = sqlalchemy_connect_ip('192.168.50.123', 'investing_data')
        
        sql = "SELECT * FROM investing_data.`{0}` where 날짜 > 20220430 and 날짜 < 20221122".format(table)
        result = conn.execute(sql)
        table_df = pd.DataFrame(result.fetchall())
        # table_df = table_df.set_index('날짜')
        # df1.join(df2)
        investing_df = pd.merge(investing_df, table_df,on='날짜', how='left')
        
    for c in list(investing_df.columns):
        if c.split('_')[-1] == '거래량' or c.split('_')[-1] == '변동':
            investing_df[c] = investing_df[c].fillna(0)
        else:
            investing_df[c] = investing_df[c].fillna(method='bfill')
            
    return investing_df  
        
def get_sqlalchemy_stock_data(conn, stock_table_list, investing_df):
    
    stock_df = pd.DataFrame()
    investing_df['날짜'] = investing_df['날짜'].astype(str).astype(int) 
    for table in tqdm(stock_table_list[:10]):
        sql = "SELECT * FROM stock_info.`{0}` where 날짜 > 20220430 and 날짜 < 20221122".format(table)
        print(table)
        result = conn.execute(sql)
        table_df = pd.DataFrame(result.fetchall())
        # if table_df.isnull().sum() == 0:
        table_df['날짜'] = table_df['날짜'].astype(str).astype(int)
        merge_df = pd.merge(table_df, investing_df, on='날짜', how='left')
        # merge_df = merge_df.fillna(method='bfill')

        stock_df = pd.concat([stock_df, merge_df], axis=0)
    
    
    conn.close()
    return stock_df     


In [11]:
pymysql_conn = get_pymysql_connection('192.168.50.123', 'investing_data')
sqlalchemy_conn = sqlalchemy_connect_ip('192.168.50.123', 'investing_data')
investing_table_list = get_pymysql_stock_list(pymysql_conn, 'investing_data')
empty_day_df = get_empty_day_df(sqlalchemy_conn)

In [12]:
investing_df = get_sqlalchemy_investing_data(empty_day_df, sqlalchemy_conn, investing_table_list)

In [13]:
investing_df

Unnamed: 0,날짜,AEDKRW내역_종가,AEDKRW내역_오픈,AEDKRW내역_고가,AEDKRW내역_저가,AEDKRW내역_거래량,AEDKRW내역_변동,ATX내역_종가,ATX내역_오픈,ATX내역_고가,...,헝가리BUX내역_고가,헝가리BUX내역_저가,헝가리BUX내역_거래량,헝가리BUX내역_변동,호주S&PASX내역_종가,호주S&PASX내역_오픈,호주S&PASX내역_고가,호주S&PASX내역_저가,호주S&PASX내역_거래량,호주S&PASX내역_변동
0,20220502,345.04,344.01,345.96,343.82,0,0.35,3236.28,3286.05,3286.05,...,43468.19,42571.10,0,-1.76,7347.0,7435.0,7435.0,7301.6,565050000.0,-1.18
1,20220503,343.09,345.11,345.43,343.07,0,-0.57,3291.52,3236.16,3304.86,...,43592.87,42714.09,0,2.08,7316.2,7347.0,7355.2,7299.3,580710000.0,-0.42
2,20220504,340.69,343.30,345.08,340.48,0,-0.70,3256.74,3292.30,3306.64,...,44126.28,43048.23,0,-0.97,7304.7,7316.2,7366.8,7299.9,726550000.0,-0.16
3,20220505,345.50,340.73,346.95,340.42,0,1.41,3151.27,3259.12,3283.49,...,43788.52,42507.45,0,-1.52,7364.7,7304.7,7369.1,7304.7,808190000.0,0.82
4,20220506,345.72,345.64,347.52,344.95,0,0.06,3120.26,3152.09,3168.10,...,42542.52,41543.95,0,-0.96,7205.6,7364.7,7364.7,7161.4,861010000.0,-2.16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
141,20221115,358.30,360.53,361.42,356.14,0,-0.58,3224.43,3206.24,3229.65,...,44596.27,43916.77,0,0.34,7141.6,7146.3,7146.3,7114.2,937490000.0,-0.07
142,20221116,363.86,358.43,364.57,358.05,0,1.55,3187.11,3224.01,3225.53,...,44374.29,43499.97,0,-0.07,7122.2,7141.6,7142.5,7102.9,737800000.0,-0.27
143,20221117,365.50,363.99,368.27,362.21,0,0.45,3175.19,3187.70,3207.76,...,44666.81,43589.84,0,-0.54,7135.7,7122.2,7146.9,7107.0,730950000.0,0.19
144,20221118,364.70,365.63,366.16,363.68,0,-0.22,3225.43,3175.07,3239.01,...,44512.99,43739.37,0,1.04,7151.8,7135.7,7166.2,7135.7,762720000.0,0.23


In [14]:
pymysql_conn = get_pymysql_connection('192.168.50.123', 'investing_data')
sqlalchemy_conn = sqlalchemy_connect_ip('192.168.50.123', 'investing_data')
stock_table_list = get_pymysql_stock_list(pymysql_conn, 'stock_info')
# ['000060','008560','003520','067290','064350']
complete_df = get_sqlalchemy_stock_data(sqlalchemy_conn, stock_table_list, investing_df)

  0%|          | 0/10 [00:00<?, ?it/s]

000020


 10%|█         | 1/10 [00:12<01:54, 12.76s/it]

000040


 20%|██        | 2/10 [00:27<01:51, 13.88s/it]

000050


 30%|███       | 3/10 [00:45<01:50, 15.83s/it]

000060


 40%|████      | 4/10 [00:59<01:30, 15.11s/it]

000070


 50%|█████     | 5/10 [01:11<01:09, 13.90s/it]

000100


 60%|██████    | 6/10 [01:23<00:53, 13.32s/it]

000120


 70%|███████   | 7/10 [01:38<00:41, 13.77s/it]

000140


 80%|████████  | 8/10 [01:52<00:27, 13.92s/it]

000150


 90%|█████████ | 9/10 [02:07<00:14, 14.36s/it]

000180


100%|██████████| 10/10 [02:19<00:00, 13.98s/it]


In [16]:
complete_df

Unnamed: 0,날짜,시간,시가,고가,저가,종가,거래량,거래대금,누적체결매수수량,누적체결매도수량,...,헝가리BUX내역_고가,헝가리BUX내역_저가,헝가리BUX내역_거래량,헝가리BUX내역_변동,호주S&PASX내역_종가,호주S&PASX내역_오픈,호주S&PASX내역_고가,호주S&PASX내역_저가,호주S&PASX내역_거래량,호주S&PASX내역_변동
0,20220502,901,12450.0,12550.0,12450.0,12550.0,3049.0,37980000.0,3049.0,0.0,...,43468.19,42571.10,0,-1.76,7347.0,7435.0,7435.0,7301.6,565050000.0,-1.18
1,20220502,902,12500.0,12600.0,12500.0,12500.0,662.0,8300000.0,3539.0,172.0,...,43468.19,42571.10,0,-1.76,7347.0,7435.0,7435.0,7301.6,565050000.0,-1.18
2,20220502,903,12550.0,12600.0,12500.0,12600.0,392.0,4920000.0,3890.0,213.0,...,43468.19,42571.10,0,-1.76,7347.0,7435.0,7435.0,7301.6,565050000.0,-1.18
3,20220502,904,12600.0,12600.0,12550.0,12600.0,1092.0,13760000.0,4700.0,495.0,...,43468.19,42571.10,0,-1.76,7347.0,7435.0,7435.0,7301.6,565050000.0,-1.18
4,20220502,905,12600.0,12600.0,12600.0,12600.0,113.0,1420000.0,4813.0,495.0,...,43468.19,42571.10,0,-1.76,7347.0,7435.0,7435.0,7301.6,565050000.0,-1.18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52573,20221121,1517,2060.0,2070.0,2060.0,2070.0,0.0,0.0,24646.0,49781.0,...,44778.83,44034.23,0,0.22,7139.3,7151.8,7174.4,7131.7,598630000.0,-0.17
52574,20221121,1518,2070.0,2070.0,2070.0,2070.0,211.0,440000.0,24857.0,49781.0,...,44778.83,44034.23,0,0.22,7139.3,7151.8,7174.4,7131.7,598630000.0,-0.17
52575,20221121,1519,2070.0,2070.0,2070.0,2070.0,0.0,0.0,24857.0,49781.0,...,44778.83,44034.23,0,0.22,7139.3,7151.8,7174.4,7131.7,598630000.0,-0.17
52576,20221121,1520,2070.0,2070.0,2070.0,2070.0,0.0,0.0,24857.0,49781.0,...,44778.83,44034.23,0,0.22,7139.3,7151.8,7174.4,7131.7,598630000.0,-0.17


In [17]:
complete_df.to_pickle('./pickle/test_complete_df_6개월_10개.pkl')

In [1]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 15751963213133421779
 xla_global_id: -1,
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 3667263488
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 4189665415739355941
 physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 3060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.6"
 xla_global_id: 416903419]