In [1]:
import abc
import requests
from tqdm import tqdm

from pathlib import Path
import pandas as pd

import stockInfo
import rceptnoInfo

import re
import time

In [2]:
path = Path.home().joinpath('Desktop', 'dataBackUp(211021)')
commonStockProvider = stockInfo.commonStockProvider()
stockinfo = stockInfo.StockInfo(path, commonStockProvider)

In [5]:
stockinfo.get_stockInfo('000020')
stockInfoDic = stockinfo.get_batch_stockInfo()

100%|██████████| 1026/1026 [01:04<00:00, 15.84it/s]


In [6]:
tickerLst = list(stockInfoDic.keys())
tickerLst[0]

'000020'

In [7]:
corp_code = stockInfoDic[tickerLst[0]]['corp_code']
corp_code

'00119195'

In [8]:
corp_codeLst = []
for k in range(0, len(tickerLst)):
    corp_code = stockInfoDic[tickerLst[k]]['corp_code']
    corp_codeLst.append(corp_code)
    if k == 5:
        break
print(corp_codeLst)

['00119195', '00475286', '00121932', '00500254', '00132211', '00127167']


In [9]:
preprocessor = rceptnoInfo.PreprocessorRceptnoInfo()
rc = rceptnoInfo.RceptnoInfo(preprocessor)

In [11]:
dic = rc.get_batch_rceptnoInfo(corp_codeLst, '20100101', '20211130')

100%|██████████| 6/6 [00:06<00:00,  1.11s/it]


In [12]:
dic['00119195'][0]['rcept_dt']

'20211115'

In [13]:
class Nord2Parser:
    parameterKeyLst = ['text', 'id', 'rcpNo', 'dcmNo', 
                'eleId', 'offset', 'length', 'dtd', 'tocNo']


    def get_parameterValue(self, key):
        return re.findall(r'node2\[\'' + key + '\'\]\s*=\s*(".*?")', self.html)

    def get_parameterDic(self):
        parameterDic = {}
        for key in self.parameterKeyLst :
            parameterDic[key] = self.get_parameterValue(key)
        return parameterDic
    
    def parse_nord(self, parser):
        stripedDf = pd.DataFrame(self.parameterDic).applymap(lambda x:x.strip('"'))
        con = stripedDf.text.str.contains(parser)
        dic = stripedDf.loc[con].to_dict('records')
        return dic

    def __init__(self, reportHtml):
        self.html = reportHtml
        self.parameterDic = self.get_parameterDic()
        


In [14]:
class Nord1Parser:
    parameterKeyLst = ['text', 'id', 'rcpNo', 'dcmNo', 
                'eleId', 'offset', 'length', 'dtd', 'tocNo']


    def get_parameterValue(self, key):
        return re.findall(r'node1\[\'' + key + '\'\]\s*=\s*(".*?")', self.html)

    def get_parameterDic(self):
        parameterDic = {}
        for key in self.parameterKeyLst :
            parameterDic[key] = self.get_parameterValue(key)
        return parameterDic
    
    def parse_nord(self, parser):
        stripedDf = pd.DataFrame(self.parameterDic).applymap(lambda x:x.strip('"'))
        con = stripedDf.text.str.contains(parser)
        dic = stripedDf.loc[con].to_dict('records')
        return dic

    def __init__(self, reportHtml):
        self.html = reportHtml
        self.parameterDic = self.get_parameterDic()

In [15]:
tickerLst = list(dic.keys())
rcept_no = dic[tickerLst[0]][0]['rcept_no']
print(rcept_no)

20211115001703


In [17]:
class ParseNordABC(metaclass=abc.ABCMeta):

    def __init__(self, nord, parser_format) :
        self.nord = nord
        self.parser_format = parser_format
    @abc.abstractclassmethod
    def operation(self):
        pass

In [18]:
class Parser(ParseNordABC):
    def operation(self):
        return self.nord.parse_nord(self.parser_format)

In [19]:
class Handler:

    def __init__(self, parser, successor=None):
        self.parser = parser
        self.successor = successor
    
    def handle_request(self):
        detailReportParameter = self.parser.operation()
        print(detailReportParameter)
        if detailReportParameter :
            return detailReportParameter[0]
        elif self.successor is not None:
            self.successor.handle_request()
        else:
            return None

In [22]:
tickerLst = list(dic.keys())
detailReportParameterDic = {}
ticker = tickerLst[0]
print(ticker)
print(len(dic[ticker]))
for k in tqdm(range(0, len(dic[ticker]))) :
    print(k)
    time.sleep(1)
    if k > 0:
        break
    if not dic[ticker][k]['add_info']:
        print('here')
        rcept_no = dic[ticker][k]['rcept_no']
        print(rcept_no)
        url = f'http://dart.fss.or.kr/dsaf001/main.do?rcpNo={rcept_no}'
        r = requests.get(url)
        reportHtml = r.text
        parser1 = Parser(nord = Nord2Parser(reportHtml), parser_format = r'.*연결재무제표$')
        parser2 = Parser(nord = Nord1Parser(reportHtml), parser_format = r'.*재무제표.*$')
        successor = Handler(parser = parser2)
        detailReportParameter = Handler(parser = parser1, successor = successor).handle_request()
    

  0%|          | 0/51 [00:00<?, ?it/s]

00119195
51
0


  2%|▏         | 1/51 [00:01<00:55,  1.12s/it]

here
20211115001703
[{'text': '2. 연결재무제표', 'id': '19', 'rcpNo': '20211115001703', 'dcmNo': '8277451', 'eleId': '19', 'offset': '251297', 'length': '86078', 'dtd': 'dart3.xsd', 'tocNo': '19'}]
1


  2%|▏         | 1/51 [00:02<01:46,  2.12s/it]


In [23]:
print(detailReportParameter)

{'text': '2. 연결재무제표', 'id': '19', 'rcpNo': '20211115001703', 'dcmNo': '8277451', 'eleId': '19', 'offset': '251297', 'length': '86078', 'dtd': 'dart3.xsd', 'tocNo': '19'}
