In [4]:
from datetime import date

def convert_number(text) -> float:
    negative = 1
    if text[0] == '+':
        text = text[1:]
        negative = 1
    elif text[0] == '-':
        text = text[1:]
        negative = -1

    if text.endswith('%'):
        text = text[:-1]

    text = text.replace(',', '')
    return float(text) * negative

print(convert_number('12,123,123'))

def convert_updown_number(text) -> float:
    if text.find('보합') == 0:
        return 0.0
    
    tokens = text.split()
    conv = convert_number(tokens[1])
    
    if tokens[0] == '하락':
        return conv * -1
    else :
        return conv

tx = '''하락
				1,425'''
print(convert_updown_number(tx))



12123123.0
-1425.0


In [5]:
from typing import Optional, List
from bs4 import BeautifulSoup, Tag
import requests

class DataCompFor:
    def __init__(self, date, end_value, delta, updown_per, volume, comp_buy, for_buy, for_cont, for_per):
        self.date = date
        self.end_value = end_value
        self.delta = delta
        self.updown_per = updown_per
        self.volume = volume
        self.comp_buy = comp_buy
        self.for_buy = for_buy
        self.for_cont = for_cont
        self.for_per = for_per

    def __str__(self):
        return "date: " + self.date + ", end_value: " + self.end_value

def get_stock_data_list(url) -> List[DataCompFor]:
    req = requests.get(url, headers={'User-agent': 'Mozilla/5.0'})
    dom = BeautifulSoup(req.text, "html.parser")
    sel = dom.select('table.type2 > tr > td.tc')

    sel_c: List[DataCompFor] = []
    for trd in sel:
        tr = trd.parent
        
        try :
            sel_c.append(DataCompFor(
                tr.select_one('td:nth-child(1)').text,
                convert_number(tr.select_one('td:nth-child(2)').text),
                convert_updown_number(tr.select_one('td:nth-child(3)').text.strip()),
                convert_number(tr.select_one('td:nth-child(4)').text.strip()),
                convert_number(tr.select_one('td:nth-child(5)').text),
                convert_number(tr.select_one('td:nth-child(6)').text),
                convert_number(tr.select_one('td:nth-child(7)').text),
                convert_number(tr.select_one('td:nth-child(8)').text),
                convert_number(tr.select_one('td:nth-child(9)').text)
            ))
        except e:
            print('-- parsing error -->' + tr.select_one('td:nth-child(3)').text.strip())
            # print(e)
            
    return sel_c

data = get_stock_data_list(url = 'https://finance.naver.com/item/frgn.naver?code=233740&page=3')

for e in data:
    print(e.__dict__)
    

{'date': '2024.06.20', 'end_value': 10855.0, 'delta': -125.0, 'updown_per': -1.14, 'volume': 14889410.0, 'comp_buy': -1097939.0, 'for_buy': -228344.0, 'for_cont': 1811444.0, 'for_per': 1.67}
{'date': '2024.06.19', 'end_value': 10980.0, 'delta': 35.0, 'updown_per': 0.32, 'volume': 15754828.0, 'comp_buy': -491188.0, 'for_buy': 203355.0, 'for_cont': 2039788.0, 'for_per': 1.94}
{'date': '2024.06.18', 'end_value': 10945.0, 'delta': 0.0, 'updown_per': 0.0, 'volume': 15857105.0, 'comp_buy': 167830.0, 'for_buy': 8639.0, 'for_cont': 1936433.0, 'for_per': 1.9}
{'date': '2024.06.17', 'end_value': 10945.0, 'delta': -25.0, 'updown_per': -0.23, 'volume': 14217299.0, 'comp_buy': 235073.0, 'for_buy': -333963.0, 'for_cont': 1927794.0, 'for_per': 1.84}
{'date': '2024.06.14', 'end_value': 10970.0, 'delta': -330.0, 'updown_per': -2.92, 'volume': 14509296.0, 'comp_buy': -1291706.0, 'for_buy': -284855.0, 'for_cont': 2261757.0, 'for_per': 2.1}
{'date': '2024.06.13', 'end_value': 11300.0, 'delta': -50.0, 'upd

In [7]:
import pymysql
def crawl2db(stockCode, page):
    targetUrl = 'https://finance.naver.com/item/frgn.naver?code=%s&page=%s' % (stockCode, page)
    print(targetUrl)
    data = get_stock_data_list(url = targetUrl)

    conn = pymysql.connect(host='horusa', user='root', password='root', db='zio', charset='utf8mb4')
    cur = conn.cursor()

    sql = "INSERT INTO CRWAL_INVESTOR_STOCK_TYPE(TYPE_CODE, DATEON, END_VALUE, DELTA, UPDOWN_PER, VOLUME, COMP_BUY, FOR_BUY, FOR_CONT, FOR_PER) values ('233740', %s, %s, %s, %s, %s, %s, %s, %s, %s)"

    for row in data:
        try:
            print(row.__dict__)
            cur.execute(sql, (row.date, row.end_value, row.delta, row.updown_per, row.volume,  row.comp_buy, row.for_buy, row.for_cont, row.for_per))
        except:
            print('error > duplicated key - {}'.format(row.date))

    conn.commit()
    conn.close()
    
crawl2db('233740', 1)
    

https://finance.naver.com/item/frgn.naver?code=233740&page=1
{'date': '2024.08.16', 'end_value': 9655.0, 'delta': 370.0, 'updown_per': 3.98, 'volume': 16413173.0, 'comp_buy': 5421589.0, 'for_buy': 35080.0, 'for_cont': 773321.0, 'for_per': 0.51}
{'date': '2024.08.14', 'end_value': 9285.0, 'delta': 230.0, 'updown_per': 2.54, 'volume': 16664766.0, 'comp_buy': 3288490.0, 'for_buy': 2949.0, 'for_cont': 738241.0, 'for_per': 0.48}
{'date': '2024.08.13', 'end_value': 9055.0, 'delta': -125.0, 'updown_per': -1.36, 'volume': 23539151.0, 'comp_buy': -1675166.0, 'for_buy': 188.0, 'for_cont': 635292.0, 'for_per': 0.4}
{'date': '2024.08.12', 'end_value': 9180.0, 'delta': 105.0, 'updown_per': 1.16, 'volume': 15511602.0, 'comp_buy': 1177707.0, 'for_buy': -38305.0, 'for_cont': 535104.0, 'for_per': 0.34}
{'date': '2024.08.09', 'end_value': 9075.0, 'delta': 490.0, 'updown_per': 5.71, 'volume': 30441582.0, 'comp_buy': 4745422.0, 'for_buy': 41395.0, 'for_cont': 473409.0, 'for_per': 0.29}
{'date': '2024.08.0

In [8]:
import time

for i in range(1, 2):
    print("turn #{}".format(i))
    crawl2db('233740', i)
    time.sleep(1)

turn #1
https://finance.naver.com/item/frgn.naver?code=233740&page=1
{'date': '2024.08.16', 'end_value': 9655.0, 'delta': 370.0, 'updown_per': 3.98, 'volume': 16413173.0, 'comp_buy': 5421589.0, 'for_buy': 35080.0, 'for_cont': 773321.0, 'for_per': 0.51}
error > duplicated key - 2024.08.16
{'date': '2024.08.14', 'end_value': 9285.0, 'delta': 230.0, 'updown_per': 2.54, 'volume': 16664766.0, 'comp_buy': 3288490.0, 'for_buy': 2949.0, 'for_cont': 738241.0, 'for_per': 0.48}
error > duplicated key - 2024.08.14
{'date': '2024.08.13', 'end_value': 9055.0, 'delta': -125.0, 'updown_per': -1.36, 'volume': 23539151.0, 'comp_buy': -1675166.0, 'for_buy': 188.0, 'for_cont': 635292.0, 'for_per': 0.4}
error > duplicated key - 2024.08.13
{'date': '2024.08.12', 'end_value': 9180.0, 'delta': 105.0, 'updown_per': 1.16, 'volume': 15511602.0, 'comp_buy': 1177707.0, 'for_buy': -38305.0, 'for_cont': 535104.0, 'for_per': 0.34}
error > duplicated key - 2024.08.12
{'date': '2024.08.09', 'end_value': 9075.0, 'delta'

In [18]:
import time

print('Sleep 5 sec')
time.sleep(5)
print('wake up')

Sleep 5 sec
wake up


In [62]:
import re

def check_number(text):
    regex = re.compile(r'((-)?\d{1,3}(,\d{3})*(\.\d+)?)')

    search = regex.search(text)
    if search:
        print('%s is a number!!' % search.group(1))
    else:
        print('%s is not a number!!' % text)


print(check_number('11,893,377'))

11,893,377 is a number!!
None


In [2]:
## mysql ##
import pymysql
conn = pymysql.connect(host='localhost', user='root', password='root', db='zio', charset='utf8mb4')
cur = conn.cursor()

cur.execute("select * from USER_TABLE")

while(True):
    row = cur.fetchone()
    if row == None:
        break
    data1 = row[0]
    data2 = row[1]
    data3 = row[2]
    print("%5s %15s %d"%(data1, data2, data3))

uuid-test-1234            jeff 27
uuid-test-1235         tompson 37
