# 고금계 과제 1 데이터 사용법

- 과제1 수행에 필요한 데이터를 불러오는 방법을 알아봅니다


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path

## 경로 설정

- 과제 데이터 파일의 경로를 설정합니다. 
- 주피터노트북이 있는 폴더의 `data/` 안에 데이터를 두는 것을 권장합니다. 

In [2]:
CWD = Path('.').resolve()
DATA_DIR = CWD / 'data'

In [3]:
fndata_path = DATA_DIR / '고금계과제1_v3.3_201301-202408.csv'

## 데이터 불러오기

- 데이터 기간: 2013-01 ~ 2024-08
- 기본 전처리가 되어있습니다. 
    - 생존편향 제거됨
    - 데이터 기간 내 존재하지 않은 기업 (2013-01 이전 상장폐지) 제거됨
    - 월말일 기준 관리종목/거래정지 종목 제거됨
    - 모든 금액은 '원'단위 (천원 아님)
    - 모든 %는 1.0 == 100%
    - 금융 업종 제거됨
- 다양한 포맷으로 데이터 호출
    - long-format
        - 날짜-종목코드를 multi-index로, 여러 항목들(수익률, 이익잉여금 등)을 컬럼으로 하여 한 번에 불러올 수 있습니다. 
    - wide-format
        - 한 개의 항목을 index는 날짜 columns는 종목코드로 하여 불러올 수 있습니다. 

### 기본 사용법

In [4]:
from fndata import FnData

In [5]:
# 데이터 모듈을 생성하며 기본 전처리들을 수행합니다. 
fn = FnData(fndata_path)

In [6]:
# 사용 가능한 데이터를 확인합니다.
fn.get_items()

array(['종가(원)', '수정계수', '수정주가(원)', '수익률 (1개월)(%)', 'FnGuide Sector',
       '거래정지여부', '관리종목여부', '보통주자본금(천원)', '자본잉여금(천원)', '이익잉여금(천원)',
       '자기주식(천원)', '이연법인세부채(천원)', '매출액(천원)', '매출원가(천원)', '이자비용(천원)',
       '영업이익(천원)', '총자산(천원)', '기말발행주식수 (보통)(주)'], dtype=object)

In [7]:
# 분석 기간의 데이터 유니버스를 확인합니다. (금융업종, 거래정지, 관리종목 제외)
univ_list = fn.get_universe()
univ_list

array(['A000020', 'A000040', 'A000050', ..., 'A472850', 'A023460',
       'A487570'], dtype=object)

In [8]:
len(univ_list)

2624

In [9]:
# 이름으로 종목코드를 확인합니다.
fn.name_to_symbol('삼성전자')

'A005930'

In [10]:
# 종목코드로 이름을 확인합니다. 
fn.symbol_to_name('A005930')

'삼성전자'

### long-format으로 불러오기

In [12]:
# 원하는 데이터들을 long-format으로 불러옵니다.

my_data = ['수정주가(원)', '수익률 (1개월)(%)']
fn.get_data(my_data) # list가 들어가면 long-format으로 불러옵니다.

Unnamed: 0_level_0,Item Name,수정주가(원),수익률 (1개월)(%)
date,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-01-31,A000020,,
2013-01-31,A000020,6030.0,-0.0017
2013-01-31,A000040,,
2013-01-31,A000040,8727.0,0.0020
2013-01-31,A000050,,
...,...,...,...
2024-09-19,A460860,8220.0,-0.0508
2024-09-19,A462520,17870.0,0.0618
2024-09-19,A465770,9830.0,-0.1876
2024-09-19,A472850,5330.0,0.0057


In [13]:
# 모든 데이터를 불러옵니다. 

fn.get_data()

Unnamed: 0_level_0,Item Name,기말발행주식수 (보통)(주),매출액(천원),매출원가(천원),보통주자본금(천원),수익률 (1개월)(%),수정계수,수정주가(원),영업이익(천원),이연법인세부채(천원),이익잉여금(천원),이자비용(천원),자기주식(천원),자본잉여금(천원),종가(원),총자산(천원),FnGuide Sector,관리종목여부,거래정지여부
date,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2013-01-31,A000020,,220240575.0,116534418.0,27931470.0,,,,2062220.0,0.0,170147330.0,384335.0,0.0,26919758.0,,3.161655e+08,,,
2013-01-31,A000020,27931470.0,,,,-0.17,1.0,6030.0,,,,,,,6030.0,,,,
2013-01-31,A000040,,99553272.0,91996790.0,59670690.0,,,,-3486600.0,7181340.0,1298918.0,813333.0,-5044.0,3884892.0,,1.169316e+08,,,
2013-01-31,A000040,119341379.0,,,,0.20,1.0,8727.0,,,,,,,495.0,,,,
2013-01-31,A000050,,347189559.0,251468225.0,12461490.0,,,,36031739.0,90652345.0,612310562.0,13944625.0,0.0,13991400.0,,1.296989e+09,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-09-19,A460860,49608017.0,,,,-5.08,1.0,8220.0,,,,,,,8220.0,,,,
2024-09-19,A462520,11855168.0,,,,6.18,1.0,17870.0,,,,,,,17870.0,,,,
2024-09-19,A465770,7171032.0,,,,-18.76,1.0,9830.0,,,,,,,9830.0,,,,
2024-09-19,A472850,32343933.0,,,,0.57,1.0,5330.0,,,,,,,5330.0,,,,


In [15]:
# multi-index를 제거합니다. 
fn.get_data(multiindex=False).head()

Item Name,date,Symbol,Symbol Name,Kind,Frequency,기말발행주식수 (보통)(주),매출액(천원),매출원가(천원),보통주자본금(천원),수익률 (1개월)(%),...,이연법인세부채(천원),이익잉여금(천원),이자비용(천원),자기주식(천원),자본잉여금(천원),종가(원),총자산(천원),FnGuide Sector,관리종목여부,거래정지여부
0,2013-01-31,A000020,동화약품,NFS-IFRS(C),ANNUAL,,220240575.0,116534418.0,27931470.0,,...,0.0,170147330.0,384335.0,0.0,26919758.0,,316165500.0,,,
1,2013-01-31,A000020,동화약품,SSC,DAILY,27931470.0,,,,-0.17,...,,,,,,6030.0,,,,
2,2013-01-31,A000040,KR모터스,NFS-IFRS(C),ANNUAL,,99553272.0,91996790.0,59670690.0,,...,7181340.0,1298918.0,813333.0,-5044.0,3884892.0,,116931600.0,,,
3,2013-01-31,A000040,KR모터스,SSC,DAILY,119341379.0,,,,0.2,...,,,,,,495.0,,,,
4,2013-01-31,A000050,경방,NFS-IFRS(C),ANNUAL,,347189559.0,251468225.0,12461490.0,,...,90652345.0,612310562.0,13944625.0,0.0,13991400.0,,1296989000.0,,,


### wide-format으로 불러오기

In [16]:
my_data = '수익률 (1개월)(%)'
fn.get_data(my_data) # string이 들어가면 wide-format으로 불러옵니다.

Symbol,A000020,A000040,A000050,A000070,A000080,A000100,A000120,A000140,A000150,A000180,...,A950110,A950130,A950140,A950160,A950170,A950180,A950190,A950200,A950210,A950220
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-31,-0.0017,0.0020,-0.0346,-0.0451,0.0526,0.0607,0.1940,-0.0122,-0.0039,-0.0559,...,0.0444,,,,,,,,,
2013-02-28,0.0033,-0.0470,0.0537,0.0251,0.0791,-0.0028,-0.0295,-0.0414,0.0078,0.0520,...,0.1417,,,,,,,,,
2013-03-31,0.0133,-0.0064,0.0288,0.0187,-0.0073,0.0700,-0.1452,0.0432,0.0039,0.1621,...,-0.0568,,,,,,,,,
2013-04-30,0.1047,0.0000,0.2594,0.3239,-0.0473,0.0995,0.0783,-0.1302,-0.0502,0.0946,...,0.5006,,,,,,,,,
2013-05-31,0.0267,0.0475,0.1667,-0.0598,-0.0140,-0.1167,-0.0283,-0.0238,0.1545,0.0367,...,0.0262,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-31,-0.0505,-0.3226,0.0052,-0.0014,-0.0451,-0.0497,-0.1542,-0.0088,0.3526,-0.0437,...,-0.0625,0.0522,0.7204,0.1785,-0.1071,,-0.0206,-0.0266,-0.0136,-0.0328
2024-06-30,-0.0405,-0.1440,-0.0189,-0.0171,0.0652,0.1508,-0.0960,0.0033,-0.0136,-0.0638,...,-0.0768,-0.0432,0.0409,-0.0176,0.0304,,-0.0575,-0.1691,0.0799,-0.1684
2024-07-31,0.0347,-0.0979,-0.0874,0.0174,-0.0212,0.1669,0.0358,-0.0011,-0.1935,-0.0642,...,-0.0466,0.0286,-0.1271,0.2387,0.0368,,0.0350,0.1616,0.5988,-0.0844
2024-08-31,-0.0254,0.0051,-0.0675,0.0506,0.0024,0.4447,-0.0308,0.0187,-0.0923,0.0132,...,-0.1902,0.0724,-0.4368,-0.1166,-0.2121,,0.0269,-0.0517,-0.0201,0.0528
