# 고금계 과제 1 데이터 사용법

- 과제1 수행에 필요한 데이터를 불러오는 방법을 알아봅니다


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from pathlib import Path

## 경로 설정

- 과제 데이터 파일의 경로를 설정합니다. 
- 주피터노트북이 있는 폴더의 `data/` 안에 데이터를 두는 것을 권장합니다. 

In [4]:
CWD = Path('.').resolve()
DATA_DIR = CWD / 'data'

In [5]:
fndata_path = DATA_DIR / '고금계과제1_v3.3_201301-202408.csv'

## 데이터 불러오기

- 데이터 기간: 2013-01 ~ 2024-08
- 기본 전처리가 되어있습니다. 
    - 생존편향 제거됨
    - 데이터 기간 내 존재하지 않은 기업 (2013-01 이전 상장폐지) 제거됨
    - 월말일 기준 관리종목/거래정지 종목 제거됨
    - 모든 금액은 '원'단위 (천원 아님)
    - 모든 %는 1.0 == 100%
    - 금융 업종 제거됨
    - 월말일 기준 1개월 수익률이 없는 종목 제거
- 다양한 포맷으로 데이터 호출
    - long-format
        - 날짜-종목코드를 multi-index로, 여러 항목들(수익률, 이익잉여금 등)을 컬럼으로 하여 한 번에 불러올 수 있습니다. 
    - wide-format
        - 한 개의 항목을 index는 날짜 columns는 종목코드로 하여 불러올 수 있습니다. 

### 기본 사용법

In [6]:
from fndata import FnData

In [7]:
# 데이터 모듈을 생성하며 기본 전처리들을 수행합니다. 
fn = FnData(fndata_path)

In [8]:
# 사용 가능한 데이터를 확인합니다.
fn.get_items()

array(['종가(원)', '수정계수', '수정주가(원)', '수익률 (1개월)(%)', 'FnGuide Sector',
       '거래정지여부', '관리종목여부', '보통주자본금(천원)', '자본잉여금(천원)', '이익잉여금(천원)',
       '자기주식(천원)', '이연법인세부채(천원)', '매출액(천원)', '매출원가(천원)', '이자비용(천원)',
       '영업이익(천원)', '총자산(천원)', '기말발행주식수 (보통)(주)'], dtype=object)

In [9]:
# 분석 기간의 데이터 유니버스를 확인합니다. (금융업종, 거래정지, 관리종목 제외)
univ_list = fn.get_universe()
univ_list

array(['A000020', 'A000040', 'A000050', ..., 'A001260', 'A019660',
       'A023460'], dtype=object)

In [10]:
len(univ_list)

2616

In [11]:
# 이름으로 종목코드를 확인합니다.
fn.name_to_symbol('삼성전자')

'A005930'

In [12]:
# 종목코드로 이름을 확인합니다. 
fn.symbol_to_name('A005930')

'삼성전자'

### long-format으로 불러오기

In [13]:
# 원하는 데이터들을 long-format으로 불러옵니다.

my_data = ['수정주가(원)', '수익률 (1개월)(%)']
df = fn.get_data(my_data) # list가 들어가면 long-format으로 불러옵니다.
df

Unnamed: 0_level_0,Item Name,수정주가(원),수익률 (1개월)(%)
date,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-01-31,A000020,6030.0,-0.0017
2013-01-31,A000040,8727.0,0.0020
2013-01-31,A000050,7874.0,-0.0346
2013-01-31,A000070,67800.0,-0.0451
2013-01-31,A000080,32000.0,0.0526
...,...,...,...
2024-09-19,A037950,1198.0,-0.0910
2024-09-19,A038880,220.0,-0.1603
2024-09-19,A001260,6350.0,-0.0537
2024-09-19,A019660,588.0,-0.0184


In [14]:
# 모든 데이터를 불러옵니다. 

df = fn.get_data()
df

Unnamed: 0_level_0,Item Name,FnGuide Sector,거래정지여부,관리종목여부,기말발행주식수 (보통)(주),매출액(천원),매출원가(천원),보통주자본금(천원),수익률 (1개월)(%),수정계수,수정주가(원),영업이익(천원),이연법인세부채(천원),이익잉여금(천원),이자비용(천원),자기주식(천원),자본잉여금(천원),종가(원),총자산(천원)
date,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2013-01-31,A000020,의료,정상,정상,27931470.0,2.202406e+08,1.165344e+08,27931470.0,-0.17,1.0,6030.0,2062220.0,0.0,170147330.0,384335.0,0.0,26919758.0,6030.0,3.161655e+08
2013-01-31,A000040,경기소비재,정상,정상,119341379.0,9.955327e+07,9.199679e+07,59670690.0,0.20,1.0,8727.0,-3486600.0,7181340.0,1298918.0,813333.0,-5044.0,3884892.0,495.0,1.169316e+08
2013-01-31,A000050,경기소비재,정상,정상,2492298.0,3.471896e+08,2.514682e+08,12461490.0,-3.46,1.0,7874.0,36031739.0,90652345.0,612310562.0,13944625.0,0.0,13991400.0,86500.0,1.296989e+09
2013-01-31,A000070,산업재,정상,정상,7704997.0,2.329145e+09,2.025952e+09,40852325.0,-4.51,1.0,67800.0,2472122.0,110818108.0,871019983.0,17176000.0,-9982000.0,355855971.0,67800.0,2.595068e+09
2013-01-31,A000080,필수소비재,정상,정상,69477714.0,1.897469e+09,1.074666e+09,363168055.0,5.26,1.0,32000.0,161051461.0,23405084.0,422540903.0,56438096.0,-122693570.0,709299321.0,32000.0,3.553147e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-09-19,A037950,IT,정상,정상,84447519.0,,,,-9.10,1.0,1198.0,,,,,,,1198.0,
2024-09-19,A038880,IT,정상,정상,315334702.0,,,,-16.03,1.0,220.0,,,,,,,220.0,
2024-09-19,A001260,산업재,정상,정상,9832571.0,,,,-5.37,1.0,6350.0,,,,,,,6350.0,
2024-09-19,A019660,필수소비재,정상,정상,34790746.0,,,,-1.84,1.0,588.0,,,,,,,588.0,


### wide-format으로 불러오기

In [15]:
my_data = '수익률 (1개월)(%)'
fn.get_data(my_data) # string이 들어가면 wide-format으로 불러옵니다.

Symbol,A000020,A000040,A000050,A000070,A000080,A000100,A000120,A000140,A000150,A000180,...,A014940,A053810,A007390,A002410,A037950,A038880,A015020,A001260,A019660,A023460
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2013-01-31,-0.0017,0.0020,-0.0346,-0.0451,0.0526,0.0607,0.1940,-0.0122,-0.0039,-0.0559,...,,,,,,,,,,
2013-02-28,0.0033,-0.0470,0.0537,0.0251,0.0791,-0.0028,-0.0295,-0.0414,0.0078,0.0520,...,,,,,,,,,,
2013-03-31,0.0133,-0.0064,0.0288,0.0187,-0.0073,0.0700,-0.1452,0.0432,0.0039,0.1621,...,,,,,,,,,,
2013-04-30,0.1047,0.0000,0.2594,0.3239,-0.0473,0.0995,0.0783,-0.1302,-0.0502,0.0946,...,,,,,,,,,,
2013-05-31,0.0267,0.0475,0.1667,-0.0598,-0.0140,-0.1167,-0.0283,-0.0238,0.1545,0.0367,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-31,-0.0505,-0.3226,0.0052,-0.0014,-0.0451,-0.0497,-0.1542,-0.0088,0.3526,-0.0437,...,0.0015,,0.3278,-0.0320,-0.0793,-0.1522,,0.0096,-0.0962,-0.0500
2024-06-30,-0.0405,-0.1440,-0.0189,-0.0171,0.0652,0.1508,-0.0960,0.0033,-0.0136,-0.0638,...,0.0508,,-0.1922,-0.0978,-0.0985,-0.3015,,-0.0663,-0.1476,0.4400
2024-07-31,0.0347,-0.0979,-0.0874,0.0174,-0.0212,0.1669,0.0358,-0.0011,-0.1935,-0.0642,...,0.1586,,0.1644,0.1061,-0.0432,-0.0641,,0.2921,-0.0798,0.0226
2024-08-31,-0.0254,0.0051,-0.0675,0.0506,0.0024,0.4447,-0.0308,0.0187,-0.0923,0.0132,...,-0.0437,,-0.1071,-0.0603,0.1096,0.0444,,-0.0989,-0.0145,-0.8341
