In [39]:
import abc
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
import random

from pathlib import Path
import pandas as pd

import stockInfo
import rceptnoInfo

import re
import time

In [24]:
class Nord2Parser:
    parameterKeyLst = ['text', 'id', 'rcpNo', 'dcmNo', 
                'eleId', 'offset', 'length', 'dtd', 'tocNo']


    def get_parameterValue(self, key):
        return re.findall(r'node2\[\'' + key + '\'\]\s*=\s*(".*?")', self.html)

    def get_parameterDic(self):
        parameterDic = {}
        for key in self.parameterKeyLst :
            parameterDic[key] = self.get_parameterValue(key)
        return parameterDic
    
    def parse_nord(self, parser):
        stripedDf = pd.DataFrame(self.parameterDic).applymap(lambda x:x.strip('"'))
        con = stripedDf.text.str.contains(parser)
        dic = stripedDf.loc[con].to_dict('records')
        return dic

    def __init__(self, reportHtml):
        self.html = reportHtml
        self.parameterDic = self.get_parameterDic()
        


In [25]:
class Nord1Parser:
    parameterKeyLst = ['text', 'id', 'rcpNo', 'dcmNo', 
                'eleId', 'offset', 'length', 'dtd', 'tocNo']


    def get_parameterValue(self, key):
        return re.findall(r'node1\[\'' + key + '\'\]\s*=\s*(".*?")', self.html)

    def get_parameterDic(self):
        parameterDic = {}
        for key in self.parameterKeyLst :
            parameterDic[key] = self.get_parameterValue(key)
        return parameterDic
    
    def parse_nord(self, parser):
        stripedDf = pd.DataFrame(self.parameterDic).applymap(lambda x:x.strip('"'))
        con = stripedDf.text.str.contains(parser)
        dic = stripedDf.loc[con].to_dict('records')
        return dic

    def __init__(self, reportHtml):
        self.html = reportHtml
        self.parameterDic = self.get_parameterDic()

In [26]:
class ParseNordABC(metaclass=abc.ABCMeta):

    def __init__(self, nord, parser_format) :
        self.nord = nord
        self.parser_format = parser_format
    @abc.abstractclassmethod
    def operation(self):
        pass

In [27]:
class Parser(ParseNordABC):
    def operation(self):
        return self.nord.parse_nord(self.parser_format)

In [74]:
class Handler:

    def __init__(self, parser, successor=None):
        self.parser = parser
        self.successor = successor

    def inner_handler(self, detailReportParameter):
        print('inner_handler')
        url = f'http://dart.fss.or.kr/report/viewer.do?'
        r = requests.get(url, params=detailReportParameter[0])
        html = r.text
        soup = BeautifulSoup(html, 'html.parser')
        table = soup.select('table')
        if table :
            print('got table')
            return pd.read_html(html)
        elif self.successor is not None:
            print('successor')
            return self.successor.handle_request()
        else:
            print('None')
            return None
        
    def handle_request(self):
        detailReportParameter = self.parser.operation()
        print('handler')
        if detailReportParameter :
            print('got params')
            print(detailReportParameter)
            return self.inner_handler(detailReportParameter)
        elif self.successor is not None:
            print('successor')
            return self.successor.handle_request()
        else:
            print('None')
            return None

In [29]:
path = Path.home().joinpath('Desktop', 'dataBackUp(211021)')
commonStockProvider = stockInfo.commonStockProvider()
stockinfo = stockInfo.StockInfo(path, commonStockProvider)

In [None]:
# stockInfoDic = stockinfo.get_stockInfo('000020')
# print(stockInfoDic)
stockInfoDic = stockinfo.get_batch_stockInfo()

In [87]:
stockInfoDic

{'000020': {'ticker': '000020', 'name': '동화약품', 'corp_code': '00119195'},
 '078420': {'ticker': '078420', 'name': '동북아1호', 'corp_code': '00475286'},
 '002840': {'ticker': '002840', 'name': '미원상사', 'corp_code': '00121932'},
 '078930': {'ticker': '078930', 'name': 'GS', 'corp_code': '00500254'},
 '002820': {'ticker': '002820', 'name': 'SUN&L', 'corp_code': '00132211'},
 '002810': {'ticker': '002810', 'name': '삼영무역', 'corp_code': '00127167'},
 '079160': {'ticker': '079160', 'name': 'CJ CGV', 'corp_code': '00303873'},
 '079430': {'ticker': '079430', 'name': '현대리바트', 'corp_code': '00300548'},
 '002790': {'ticker': '002790', 'name': '아모레G', 'corp_code': '00154462'},
 '002870': {'ticker': '002870', 'name': '신풍제지', 'corp_code': '00137368'},
 '079660': {'ticker': '079660', 'name': '사조해표', 'corp_code': '00503899'},
 '080030': {'ticker': '080030', 'name': '동북아2호', 'corp_code': '00502474'},
 '002780': {'ticker': '002780', 'name': '진흥기업', 'corp_code': '00150828'},
 '080180': {'ticker': '080180', 'n

In [118]:
tickerLst = list(stockInfoDic.keys())
ticker = random.choice(tickerLst)
corp_code = stockInfoDic[ticker]['corp_code']
print(stockInfoDic[ticker])

{'ticker': '006740', 'name': '영풍제지', 'corp_code': '00141404'}


In [119]:
preprocessor = rceptnoInfo.PreprocessorRceptnoInfo()
rc = rceptnoInfo.RceptnoInfo(preprocessor)
print(corp_code)
rceptnoInfoDic = rc.get_rceptnoInfo(corp_code, '20100101', '20211130')

00141404


In [120]:
rceptnoInfoDf = pd.DataFrame(rceptnoInfoDic[corp_code])
con = rceptnoInfoDf.add_info == ''
rcept_noLst = rceptnoInfoDf.loc[con].rcept_no.to_list()
len(rcept_noLst)

48

In [127]:
rcept_no = random.choice(rcept_noLst)
print(f'rcept_no : {rcept_no}')
url = f'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={rcept_no}'
r = requests.get(url)
reportHtml = r.text
parser1 = Parser(nord = Nord2Parser(reportHtml), parser_format = r'.*연결재무제표$')
parser2 = Parser(nord = Nord1Parser(reportHtml), parser_format = r'.*재무제표 등$')
parser3 = Parser(nord = Nord2Parser(reportHtml), parser_format = r'^[^가-힣]*재무제표$')
successor2 = Handler(parser3)
successor1 = Handler(parser2, successor2)
detailReportParameter = Handler(parser1, successor1).handle_request()
print(detailReportParameter)

rcept_no : 20160330004182
handler
got params
[{'text': '2. 연결재무제표', 'id': '13', 'rcpNo': '20160330004182', 'dcmNo': '5029404', 'eleId': '13', 'offset': '159753', 'length': '118', 'dtd': 'dart3.xsd', 'tocNo': '13'}]
inner_handler
successor
handler
successor
handler
got params
[{'text': '4. 재무제표', 'id': '15', 'rcpNo': '20160330004182', 'dcmNo': '5029404', 'eleId': '15', 'offset': '160009', 'length': '76854', 'dtd': 'dart3.xsd', 'tocNo': '15'}]
inner_handler
got table
[                      0
0                 재무상태표
1  제 46 기 2015.12.31 현재
2  제 45 기 2014.12.31 현재
3  제 44 기 2013.12.31 현재
4              (단위 : 원),     Unnamed: 0           제 46 기           제 45 기           제 44 기
0           자산              NaN              NaN              NaN
1         유동자산      33796609753      51206973035      55142283019
2     현금및현금성자산       7178532779       9288803212      11986144776
3      금융기관예치금        736274436      12000000000      14000000000
4         매출채권      10976332847      12398366175      