# 세종데이터 요약제무 (년)

페이지 이름으로 주재부제표, IFRS연결, IFRS별도를 구분 (table_main0_bus_01.html)
* http://www.sejongdata.com/business_include_fr/table_main0_bus_01.html?no=005930 주재무제표(GAAP)
* http://www.sejongdata.com/business_include_fr/table_main1_bus_01.html?no=005930 K-IFRS(연결)
* http://www.sejongdata.com/business_include_fr/table_main2_bus_01.html?no=005930 K-IFRS(별도)


gubun 값으로 년도를 구분(1: 1995~2004, 3: 2005~2014)
* 1995~2004 'http://www.sejongdata.com/business_include_fr/table_main0_bus_01.html?gubun=1&no=005930'
* 2005~2014 'http://www.sejongdata.com/business_include_fr/table_main0_bus_01.html?gubun=2&no=005930'

In [23]:
import re
import requests
import numpy as np
import pandas as pd
from pandas_datareader import data, wb

In [24]:
host = 'http://www.sejongdata.com/business_include_fr/'
    
# 주재무제표 (GAAP)
config_gaap = { 
    'url_tmpl': host + 'table_main0_bus_01.html?no=%s&gubun=%s',
    'data_dir': 'data/sj_finstate_year_gaap/'
}

# K-IFRS(연결)
config_ifrs_con = {
    'url_tmpl': host + 'table_main1_bus_01.html?no=%s&gubun=%s',
    'data_dir': 'data/sj_finstate_year_ifrs_con/'
}

# K-IFRS(별도)
config_ifrs_sep = {
    'url_tmpl': host + 'table_main2_bus_01.html?no=%s&gubun=%s',
    'data_dir': 'data/sj_finstate_year_ifrs_sep/'
}

# 아래 3라인중 하나를 선택하여 사용 (comment out)
config = config_gaap # 주재무제표 (GAAP)
config = config_ifrs_con # K-IFRS(연결)
config = config_ifrs_sep # K-IFRS(별도)


In [25]:
import os

def mkdir_not_ex(filename):
    folder=os.path.dirname(filename)
    if not os.path.exists(folder):
        os.makedirs(folder)
        
data_dir = config['data_dir']
mkdir_not_ex(data_dir)

In [32]:
url_tmpl = config['url_tmpl']

def get_finstat_year(code):
    df_1 = pd.read_html(url_tmpl % (code, '1'))[1]
    df_2 = pd.read_html(url_tmpl % (code, '2'))[1]

    df_1 = df_1.T
    df_2 = df_2.T
    
    # 컬럼 이름 지정
    cols = ['날짜', '매출액', '영업이익', '순이익', '연결순이익', '자산총계', '부채총계', '자본총계']
    df_1.columns = cols 
    df_2.columns = cols
    
    # 첫번째 row 제거
    df_1 = df_1[1:]
    df_2 = df_2[1:]    
    
    # df_1, df_2를 붙여서 새로운 df 생성 
    df = df_1.append(df_2)

    # df['년도']
    # "2014.12 (IFRS 연결)" to "2014-12"
    df['종류'] = df['날짜'].apply(lambda x: x.split(' (')[1].replace(')', ''))
    df['날짜'] = df['날짜'].apply(lambda x: x.split(' ')[0].replace('.', '-'))
    df['날짜'] = pd.to_datetime(df['날짜'])

    # 타입을 object 에서 float 로 변환
    cols = ['매출액', '영업이익', '순이익', '연결순이익', '자산총계', '부채총계', '자본총계']
    try:
        df[cols] = df[cols].astype(float) 
    except:
        pass

    df = df.set_index('날짜')
    return df



In [33]:
# 테스트
code = '005930'
fname = code + '.csv'
df = get_finstat_year(code)
df.to_csv(fname)
df = pd.read_csv(fname, index_col='날짜')
os.remove(fname)

df

Unnamed: 0_level_0,매출액,영업이익,순이익,연결순이익,자산총계,부채총계,자본총계,종류
날짜,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1996-12-01,158745.0,14468.0,1642.0,-,158385.0,108130.0,50255.0,GAAP 개별
1997-12-01,184654.0,28562.0,1260.0,-,230655.0,172356.0,58299.0,GAAP 개별
1998-12-01,200842.0,31000.0,3216.0,-,207761.0,138061.0,69700.0,GAAP 개별
1999-12-01,261178.0,44815.0,31704.0,-,247098.0,113782.0,133316.0,GAAP 개별
2000-12-01,342838.0,74352.0,60145.0,-,268950.0,107022.0,161928.0,GAAP 개별
2001-12-01,323804.0,22953.0,29469.0,-,279194.0,84457.0,194737.0,GAAP 개별
2002-12-01,405116.0,72447.0,70518.0,-,344396.0,101293.0,243103.0,GAAP 개별
2003-12-01,435820.0,71927.0,59590.0,-,392034.0,97889.0,294145.0,GAAP 개별
2004-12-01,576324.0,120169.0,107867.0,-,438165.0,93761.0,344404.0,GAAP 개별
2005-12-01,574577.0,80598.0,76402.0,-,505388.0,108822.0,396566.0,GAAP 개별


## 종목 전체
* https://gist.githubusercontent.com/plusjune/f3edace16e89c1a9a156


참고 사항
* BadStatusLine 에러가 난다면 다시 시행 
* 만들어진 파일은 SKIP하므로 여러번 다시 시행해도 상관없음

In [4]:
import os.path

master_df = pd.read_csv('https://goo.gl/i3nW13', index_col='종목코드')

for code, row in master_df.iterrows():
    fname = code + '.csv'
    if os.path.isfile(config['data_dir'] + fname):
        print (code, row['종목명'], fname, '파일 존재 SKIP')
    else:
        print (code, row['종목명'], fname)
        df = get_finstat_year(code)
        df.to_csv(config['data_dir'] + fname)