In [2]:
import os
import csv
import math
from urllib.request import urlopen, Request

import OpenDartReader
from bs4 import BeautifulSoup as bs
import pandas as pd
import numpy as np
from pykrx import stock
import time

import cjw_maria

def get_html_fnguide(ticker, gb):
    """    
    :param ticker: 종목코드 
    :param gb: 데이터 종류 (0 : 재무제표, 1 : 재무비율, 2: 투자지표)
    :return: 
    """
    url=[]

    url.append("https://comp.fnguide.com/SVO2/ASP/SVD_Finance.asp?pGB=1&gicode=A" + ticker + "&cID=&MenuYn=Y&ReportGB=&NewMenuID=103&stkGb=701")
    url.append("https://comp.fnguide.com/SVO2/ASP/SVD_FinanceRatio.asp?pGB=1&gicode=A" + ticker + "&cID=&MenuYn=Y&ReportGB=&NewMenuID=104&stkGb=701")
    url.append("https://comp.fnguide.com/SVO2/ASP/SVD_Invest.asp?pGB=1&gicode=A"+ ticker + "&cID=&MenuYn=Y&ReportGB=&NewMenuID=105&stkGb=701")

    if gb>2 :
        return None

    url = url[gb]
    try:

        req = Request(url,headers={'User-Agent': 'Mozilla/5.0'})
        html_text = urlopen(req).read()

    except AttributeError as e :
        return None

    return html_text

def ext_fin_fnguide_data(ticker,gb,item,n,freq="Q"):
    """
    :param ticker: 종목코드
    :param gb: 데이터 종류 (0 : 재무제표, 1 : 재무비율, 2: 투자지표)
    :param item: html_text file에서 원하는 계정의 데이터를 가져온다.
    :param n: 최근 몇 개의 데이터를 가져 올것인지
    :param freq: Y : 연간재무, Q : 분기재무    
    :return: item의 과거 데이터
    """

    html_text = get_html_fnguide(ticker, gb)

    soup = bs(html_text, 'lxml')

    d = soup.find_all(text=item)

    if(len(d)==0) :
        return None

    #재무제표면 최근 3년을 가져오고 재무비율이면 최근 4년치를 가져온다.
    nlimit =3 if gb==0 else 4

    if n > nlimit :
        return None
    if freq == 'a':
        #연간 데이터
        d_ = d[0].find_all_next(class_="r",limit=nlimit)
        # 분기 데이터
    elif freq =='q':
        d_ = d[1].find_all_next(class_="r",limit=nlimit)
    else:
        d_ = None

    try :
        data = d_[(nlimit-n):nlimit]
        v = [v.text for v in data]

    except AttributeError as e:
        return None

    return(v)

def getDataFromFN(code, col='당기순이익'):
    profit_loss = pd.read_html(get_html_fnguide(code,gb=0))[0]
    try:
        return profit_loss[profit_loss['IFRS(연결)'] == col]['2020/09'].reset_index(drop=True)[0]
    except:
        return profit_loss[profit_loss['GAAP(개별)'] == col]['2020/09'].reset_index(drop=True)[0]

def writeError(filename, code, error):
    with open('{}.csv'.format(filename), 'a', newline='') as csvfile:
        spamwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
        if type(error) == list:
            spamwriter.writerow([code] + error)
        else:
            spamwriter.writerow([code, error])

def readError(filename):
    df = pd.read_csv(filename, encoding='CP949')
    return df

dart = OpenDartReader('488d3a46f4b1d44b4be6197c7184603361289322')
class makeDartDB():
    def __init__(self):
        self.bs_list = ['ifrs-full_IssuedCapital',
            'ifrs-full_Liabilities',
            'ifrs-full_Equity',
            'ifrs-full_CurrentAssets',
            'ifrs-full_CurrentLiabilities',
            'ifrs-full_Assets',
            'ifrs-full_IntangibleAssetsOtherThanGoodwill',
            'ifrs-full_DeferredTaxAssets',
            'ifrs-full_DeferredTaxLiabilities',
            'ifrs-full_CashAndCashEquivalents',
            'ifrs-full_ShorttermBorrowings',
            'dart_LongTermBorrowingsGross']

        self.is_list = ['ifrs-full_ProfitLoss',
                    'ifrs-full_Revenue',
                    'dart_OperatingIncomeLoss']

        self.bs_list_ = ['자본금', '부채총계', '자본총계', '유동자산', '유동부채',
                    '자산총계', '무형자산', '이연법인세자산', '이연법인세부채',
                    '현금및현금성자산', '단기차입금', '장기차입금']
        self.is_list_ = ['당기순이익(손실)', '수익(매출액)', '영업이익']
        
        # 예외처리 순서
        # 연결재무제표(finstate_all) -> id로 찾기 -> name으로 찾기 -> 다른 fs로 가보기(finstate)
        
        # 예외처리1
        # is데이터가 없고 account_id 없음 -> account_nm으로 검색해야함
        self.is_list2 = [['당기순이익', '당기순이익(손실)'], 
                         [ '매출액', '수익(매출액)', '영업수익', '매출총이익'], 
                         ['영업이익']]
        
        # 예외처리2
        # 자본금 데이터 대신에 보통주자본금 데이터가 있음
        
        # 예외처리3
        # 자본금의 account_id가 없음
        
        # 예외처리4
        # 무형자산이 다르게 표기되어있음
        
        # 예외처리5
        # 매출액이 다르게 표기되어있음 
        
        # 예외처리6
        # 당기순이익없음 fn가이드 이용
        
        # 예외처리7
        # 영업이익 id가 없음
        
        # 예외처리8
        # 영업이익이 IS에 없음
        
        # 예외처리9
        # 매출액이 CIS에 있음
        
        # 예외처리10
        # 매출액이 영업수익으로 표기되어있음
        

    def makeData(self, code):
        self.expc = 0
        # 모기업만 해당
        fs = dart.finstate(corp=code, bsns_year=2020, reprt_code='11014')
        if fs is None:
            fs_all = dart.finstate_all(corp=code, bsns_year=2020, reprt_code='11014')
            if fs_all is None:
                self.expc = 1
                writeError('error', code, 'nonetype')
                return
            
        df = fs[fs['fs_div']=='CFS'][['account_nm','thstrm_amount']].reset_index(drop=True)
        
        # 예외처리 1
        if fs_is.empty:
            fs = dart.finstate_all(corp=code, bsns_year=2020, reprt_code='11014')[['account_nm','thstrm_amount', 'sj_nm']]
            # fs들 중 가장 먼저 해당되는 값으로 지정
            for ii, i in enumerate(self.is_list2):
                for jj, j in enumerate(i): # 당기순이익, 매출액, 영업이익
                    tmp = pd.DataFrame()
                    if j in list(fs['account_nm']):
                        tmp['account_nm'] = [j]
                        tmp['account_id'] = [self.is_list[ii]]
                        tmp['thstrm_amount'] = fs[fs['account_nm'] == j]['thstrm_amount'].reset_index(drop=True)[0]
                        df = df.append(tmp).reset_index(drop=True)
                        bigo2 = fs[fs['account_nm'] == j]['sj_nm'].reset_index(drop=True)[0]
                        writeError('bigo', code, [i[0], 'none', bigo2])
                        break
        
        # 데이터는 있지만 id 또는 nm이 다른경우
        if 'ifrs-full_IssuedCapital' not in list(df['account_id']):
            if 'dart_IssuedCapitalOfCommonStock' not in list(df['account_id']):
                df.loc[(df.account_nm == '자본금'), 'account_id'] = 'ifrs-full_IssuedCapital'
            else:
                df = df.replace('dart_IssuedCapitalOfCommonStock', 'ifrs-full_IssuedCapital')
                
        if 'ifrs-full_IntangibleAssetsOtherThanGoodwill' not in list(df['account_id']):
            if 'dart_GoodwillGross' not in list(df['account_id']):
                df = df.replace('dart_OtherIntangibleAssetsGross', 'ifrs-full_IntangibleAssetsOtherThanGoodwill')
            else:
                df = df.replace('dart_GoodwillGross', 'ifrs-full_IntangibleAssetsOtherThanGoodwill')
                    
        if 'ifrs-full_Revenue' not in list(df['account_id']):
            if 'ifrs-full_GrossProfit' in list(df['account_id']):
                df = df.replace('ifrs-full_GrossProfit','ifrs-full_Revenue')
        
        # 데이터가 없는 경우
        fs = dart.finstate(corp=code, bsns_year=2020, reprt_code='11014')[['account_nm','thstrm_amount', 'fs_nm', 'sj_nm']]
        # fs들 중 가장 먼저 해당되는 값으로 지정
        for ii, i in enumerate(self.is_list2):
            if self.is_list[ii] not in list(df['account_id']):
                for jj, j in enumerate(i): # 당기순이익, 매출액, 영업이익
                    tmp = pd.DataFrame()
                    if j in list(fs['account_nm']):
                        tmp['account_nm'] = [j]
                        tmp['account_id'] = [self.is_list[ii]]
                        tmp['thstrm_amount'] = fs[fs['account_nm'] == j]['thstrm_amount'].reset_index(drop=True)[0]
                        df = df.append(tmp).reset_index(drop=True)
                        bigo1 = fs[fs['account_nm'] == j]['fs_nm'].reset_index(drop=True)[0]
                        bigo2 = fs[fs['account_nm'] == j]['sj_nm'].reset_index(drop=True)[0]
                        writeError('bigo', code, [i[0], bigo1, bigo2])
                        break
                if tmp.empty:
                    tmp = pd.DataFrame()
                    tmp['account_nm'] = [j]
                    tmp['account_id'] = [self.is_list[ii]]
                    tmp['thstrm_amount'] = int(getDataFromFN(code, i[0])) * 100000000
                    df = df.append(tmp).reset_index(drop=True)
                    writeError('bigo', code, [i[0], 'none', 'FNguide'])
    
        return df
        
    def getData(self, code, df):
        # need 데이터에 해당하는 인덱스만 추출
        self.idx_list = {}
            
        for i in range(df.shape[0]):
            if df['account_id'][i] in self.bs_list+self.is_list:
                self.idx_list[df['account_id'][i]] = i
        tmp = []
        for i in self.bs_list+self.is_list:
            try:
                tmp.append(df.iloc[self.idx_list[i]])
            except:
                continue
        
        df = pd.DataFrame(tmp).reset_index(drop=True)

        # 포함하지 못한 need 데이터 인덱스 추출
        error_list = []
        for i in range(len(self.bs_list+self.is_list)):
            if (self.bs_list+self.is_list)[i] not in list(df['account_id']):
                error_list.append(i)

        # 모든 need 데이터를 포함하는 경우
        if not error_list:
            tmp = list(df['thstrm_amount'])
            tmp_ = []
            for t in tmp:
                if t == '' or t == '-': ## 공백데이터
                    tmp_.append(1)
                else:
                    tmp_.append(np.int64(str(t).replace(',','')))
            maria.insertData('done', tuple([code] + tmp_))
            maria.commitDB()
            return ['A'+code] + tmp_

        # 모든 need 데이터를 포함하지 못한 경우
        else:
            # 못구한 데이터 2
            errors = [df[df['account_id']==i]['thstrm_amount'].iloc[0] if i in list(df['account_id']) else 2 for i in self.bs_list+self.is_list]
            tmp_ = []
            for t in errors:
                if t == '' or t == '-': ## 공백데이터 1
                    tmp_.append(1)
                else:
                    tmp_.append(np.int64(str(t).replace(',','')))
            
            maria.insertData('notyet', tuple([code] + tmp_))
            maria.commitDB()
            
            return ['A'+code] + tmp_
    

# maria = cjw_maria.MariaDB()
# done_df = maria.showData('done')
# notyet_df = maria.showData('notyet')

# stock_amount = stock.get_market_cap_by_ticker("20200929")[['상장주식수']]
# eps = stock.get_market_fundamental_by_ticker("20200929")[['EPS']]
# #merge_df = pd.merge(stock_amount, eps, left_index=True, right_index=True)
# merge_df = stock_amount
# code_list = list(merge_df.index)

# makeDD = makeDartDB()

    
# for i, c in enumerate(code_list):
#     print(c)
#     if c in list(done_df['종목코드']) or c in list(notyet_df['종목코드']):
#         continue
#     else:
#         df = makeDD.makeData(c)
#         if makeDD.expc == 0:
#             makeDD.getData(c, df)


In [56]:
# maria = cjw_maria.MariaDB()

# f2017 = maria.showData('data2017')
# f2018 = maria.showData('data2018')
# f2019 = maria.showData('data2019')
# f2020 = maria.showData('data2020')
# with open('res3', 'a', newline='') as csvfile:
#     spamwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
#     spamwriter.writerow(f2017.columns)
#     data = [f2017, f2018, f2019, f2020]
#     for d in data:
#         tmp = []
#         for c in d.columns:
#             cnt = d.value_counts(c)
#             try:
#                 cnt_0 = cnt[cnt.index==0][0]
#                 tmp.append(cnt_0)
#             except:
#                 try:
#                     cnt_0 = cnt[cnt.index==-9999999][0]
#                     tmp.append(cnt_0)
#                 except:
#                     tmp.append(0)

#         spamwriter.writerow(tmp)


In [51]:


cnt = f2017.value_counts('영업이익')
cnt_0 = cnt[cnt.index==11].iloc[0]
cnt_0

18

# dart_fss
'01150' 등 특정 기업 정보 반환 에러

In [10]:
import pandas as pd
import cjw_dart

mariaDart = cjw_dart.MariaDart()


fs = mariaDart.getJamoo('005930 ', '20200530')
fs

import pandas as pd
import cjw_dart

mariaDart = cjw_dart.MariaDart()

tmp = mariaDart.getJamoo('005930', '20200101')
tmp


Quarterly reports:   0%|          | 0/2 [00:00<?, ?report/s]

Label,Data
No.,title
corp_code,00126380
bgn_de,20200101
end_de,
separate,False
report_tp,[quarter]
lang,ko
separator,True
financial statement,"No.title0[D210000] Statement of financial position, current/non-current - Consolidated financial statements (Unit: KRW)1[D310000] Income statement, by function of expense - Consolidated financial statements (Unit: KRW)2[D410000] Statement of comprehensive income - Consolidated financial statements (Unit: KRW)3[D520000] Statement of cash flows, indirect method - Consolidated financial statements (Unit: KRW)"
No.,title
0,"[D210000] Statement of financial position, current/non-current - Consolidated financial statements (Unit: KRW)"

No.,title
0,"[D210000] Statement of financial position, current/non-current - Consolidated financial statements (Unit: KRW)"
1,"[D310000] Income statement, by function of expense - Consolidated financial statements (Unit: KRW)"
2,[D410000] Statement of comprehensive income - Consolidated financial statements (Unit: KRW)
3,"[D520000] Statement of cash flows, indirect method - Consolidated financial statements (Unit: KRW)"


# FN guide 크롤링

In [83]:
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup as bs

def get_html_fnguide(ticker, gb):
    """    
    :param ticker: 종목코드 
    :param gb: 데이터 종류 (0 : 재무제표, 1 : 재무비율, 2: 투자지표)
    :return: 
    """
    url=[]

    url.append("https://comp.fnguide.com/SVO2/ASP/SVD_Finance.asp?pGB=1&gicode=A" + ticker + "&cID=&MenuYn=Y&ReportGB=&NewMenuID=103&stkGb=701")
    url.append("https://comp.fnguide.com/SVO2/ASP/SVD_FinanceRatio.asp?pGB=1&gicode=A" + ticker + "&cID=&MenuYn=Y&ReportGB=&NewMenuID=104&stkGb=701")
    url.append("https://comp.fnguide.com/SVO2/ASP/SVD_Invest.asp?pGB=1&gicode=A"+ ticker + "&cID=&MenuYn=Y&ReportGB=&NewMenuID=105&stkGb=701")

    if gb>2 :
        return None

    url = url[gb]
    try:

        req = Request(url,headers={'User-Agent': 'Mozilla/5.0'})
        html_text = urlopen(req).read()

    except AttributeError as e :
        return None

    return html_text

def ext_fin_fnguide_data(ticker,gb,item,n,freq="Q"):
    """
    :param ticker: 종목코드
    :param gb: 데이터 종류 (0 : 재무제표, 1 : 재무비율, 2: 투자지표)
    :param item: html_text file에서 원하는 계정의 데이터를 가져온다.
    :param n: 최근 몇 개의 데이터를 가져 올것인지
    :param freq: Y : 연간재무, Q : 분기재무    
    :return: item의 과거 데이터
    """

    html_text = get_html_fnguide(ticker, gb)

    soup = bs(html_text, 'lxml')

    d = soup.find_all(text=item)

    if(len(d)==0) :
        return None

    #재무제표면 최근 3년을 가져오고 재무비율이면 최근 4년치를 가져온다.
    nlimit =3 if gb==0 else 4

    if n > nlimit :
        return None
    if freq == 'a':
        #연간 데이터
        d_ = d[0].find_all_next(class_="r",limit=nlimit)
        # 분기 데이터
    elif freq =='q':
        d_ = d[1].find_all_next(class_="r",limit=nlimit)
    else:
        d_ = None

    try :
        data = d_[(nlimit-n):nlimit]
        v = [v.text for v in data]

    except AttributeError as e:
        return None

    return(v)

def getDataFromFN(code, col='당기순이익'):
    profit_loss = pd.read_html(get_html_fnguide(code,gb=0))[0] 
    return profit_loss[profit_loss['IFRS(연결)'] == col]['2020/09'].reset_index(drop=True)[0]
    

883.0

In [298]:
#getDataFromFN('253840', '당기순이익')

profit_loss = pd.read_html(get_html_fnguide('141020',gb=0))[0]
profit_loss

Unnamed: 0,IFRS(연결),2017/12,2018/12,2019/12,2020/09,전년동기,전년동기(%)
0,매출액,464.0,252.0,386.0,15.0,372.0,-96.0
1,매출원가,401.0,203.0,345.0,,323.0,
2,매출총이익,63.0,49.0,41.0,15.0,49.0,-69.9
3,판매비와관리비계산에 참여한 계정 펼치기,43.0,123.0,214.0,17.0,150.0,-89.0
4,영업이익,20.0,-75.0,-173.0,-2.0,-101.0,적자지속
5,영업이익(발표기준),20.0,-75.0,-173.0,-2.0,-101.0,적자지속
6,금융수익계산에 참여한 계정 펼치기,2.0,7.0,15.0,1.0,10.0,-91.9
7,금융원가계산에 참여한 계정 펼치기,9.0,35.0,84.0,6.0,54.0,-89.6
8,기타수익계산에 참여한 계정 펼치기,27.0,6.0,24.0,0.0,28.0,-99.4
9,기타비용계산에 참여한 계정 펼치기,23.0,56.0,124.0,25.0,62.0,-59.2


In [175]:
df1

Unnamed: 0,종목코드,ifrs-full_IssuedCapital,ifrs-full_Liabilities,ifrs-full_Equity,ifrs-full_CurrentAssets,ifrs-full_CurrentLiabilities,ifrs-full_Assets,ifrs-full_IntangibleAssetsOtherThanGoodwill,ifrs-full_DeferredTaxAssets,ifrs-full_DeferredTaxLiabilities,ifrs-full_CashAndCashEquivalents,ifrs-full_ShorttermBorrowings,dart_LongTermBorrowingsGross,ifrs-full_ProfitLoss,ifrs-full_Revenue,dart_OperatingIncomeLoss
0,005930,897514000000,99652554000000,276136188000000,203634913000000,73046405000000,375788742000000,18980799000000,4478036000000,18362110000000,26566097000000,15856252000000,2017847000000,9360693000000,66964160000000,12353238000000
1,000660,3657652000000,18768114000000,50499059000000,17036210000000,8851618000000,69267173000000,3502348000000,680670000000,60528000000,2807138000000,3593603000000,8109190000000,0,0,0
2,051910,391406000000,21198289000000,18820580000000,15878151000000,11575488000000,40018869000000,2370550000000,766415000000,49533000000,3539014000000,0,0,570387000000,7507281000000,902084000000
3,035420,16481339500,8040822582917,7411737214992,10206941063717,7272227392553,15452559797909,0,107124177977,12592245685,1441410060661,1306060037460,158939222556,0,0,0
4,207940,165412500000,1638363579495,4498835520437,1569087979843,625855215434,6137199099932,24342694791,0,475420950811,57022223899,286847455434,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76,028670,534569000000,1911648000000,3036598000000,657447000000,603988000000,4948246000000,7303000000,0,0,213161000000,0,1255377000000,0,0,0
77,036460,0,28011088400443,7694545286755,6238076359322,6569333774819,35705633687198,1689590924019,1032908607412,1712547659920,340281270056,0,0,0,0,0
78,282330,17283906000,1775447686421,672166055610,914008091331,1091423581986,2447613742031,39738745078,12016104408,1292172327,21158231845,9994238750,14360642640,0,0,0
79,307950,10500000000,519524754362,542555912695,802333285530,419793976533,1062080667057,50946262849,2599853767,,82483939526,1657956357,,0,0,0


In [138]:
stock_amount = stock.get_market_cap_by_ticker("20210222")[['상장주식수']]
eps = stock.get_market_fundamental_by_ticker("20210222")[['EPS']]
print(stock_amount.shape)
print(eps.shape)

(2542, 1)
(898, 1)


In [139]:
stock_amount

Unnamed: 0_level_0,상장주식수
티커,Unnamed: 1_level_1
005930,5969782550
000660,728002365
051910,70592343
035420,164263395
005935,822886700
...,...
001529,89722
344860,1860000
179720,739500
225850,3332333


In [140]:
eps

Unnamed: 0_level_0,EPS
티커,Unnamed: 1_level_1
095570,982
006840,2168
027410,281
282330,8763
138930,1647
...,...
069260,1812
000540,510
000547,0
000545,0
