In [31]:
import os
import re
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
data_dir = os.path.join(os.path.expanduser('~'), 'data','ofij')
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

# files
kospicode_filename = 'kospi_code.csv'
stinfo_filename = os.path.join(data_dir, 'stock_info.feather')
stprc_filename = os.path.join(data_dir, 'stock_prices.feather')

In [34]:
dfstcode = pd.read_csv(kospicode_filename)
dfstinfo = pd.read_feather(stinfo_filename)
dfstprc = pd.read_feather(stprc_filename)

In [43]:
df = dfstinfo.merge(dfstcode, how='left', on='단축코드')

dfmeta = pd.DataFrame({
    '한글명': df['한글명'],  # Company Name
    '표준코드': df['표준코드'],  # Standard Product Code
    '단축코드': df['단축코드'],  # Short Stock Code
    '상장일자': pd.to_datetime(df['상장일자'], format='%Y%m%d'),  # Listed Date
    '시장구분': df['KOSPI'].apply(lambda x: 'KOSPI' if x == 'Y' else 'Other'),  # Market (KOSPI or Others)
    '업종대분류': df['idx_bztp_lcls_cd_name'],  # Industry Size Category
    '업종중분류': df['idx_bztp_mcls_cd_name'],  # Industry Medium Category
    '표준산업분류': df['std_idst_clsf_cd_name'],  # Industry Detailed Category
    '액면가': df['액면가'],  # Face value
    '시가총액(억 원)': df['시가총액'],  # Market Cap in Hundred-Million KRW
    '매출액(억 원)': df['매출액'],  # Sales
    '영업이익(억 원)': df['영업이익'],  # Operating Profit
    '당기순이익(억 원)': df['당기순이익'],  # Net Profit
    'ROE(%)': df['ROE'],  # Return on Equity
    '전일종가(원)': df['기준가'],  # Previous Closing Price 
    '신용가능': df['신용가능'].apply(lambda x: True if x == 'Y' else False),  # Credit investment possibility
    '증거금비율(%)': df['증거금비율'],  # Margin Rate Requirement
    'KRX바이오': df['KRX바이오'].apply(lambda x: True if x == 'Y' else False),  # Part of KRX Bio
    '관리종목': df['관리종목'].apply(lambda x: True if x == 'Y' else False),  # Admin-Managed Issue 
    '거래정지': df['거래정지'].apply(lambda x: True if x == 'Y' else False),  # Trading Suspension
    '불성실공시': df['불성실공시'].apply(lambda x: True if x == 'Y' else False),  # Unfaithful Disclosure
    '이상급등': df['이상급등'].apply(lambda x: True if x == 'Y' else False),  # Sudden Price Surge
}).reset_index(drop=True)



In [44]:
dfmeta.head()

Unnamed: 0,한글명,표준코드,단축코드,상장일자,시장구분,업종대분류,업종중분류,표준산업분류,액면가,시가총액(억 원),...,당기순이익(억 원),ROE(%),전일종가(원),신용가능,증거금비율(%),KRX바이오,관리종목,거래정지,불성실공시,이상급등
0,동화약품,KR7000020008,20,1976-03-24,KOSPI,시가총액규모중,의약품,의약품 제조업,1000,1678,...,21,1.49,6010,True,60,True,False,False,False,False
1,KR모터스,KR7000040006,40,1976-05-25,KOSPI,시가총액규모소,운수장비,그외 기타 운송장비 제조업,500,217,...,-142,-42.76,362,False,100,False,False,False,False,False
2,경방,KR7000050005,50,1956-03-03,KOSPI,시가총액규모중,"섬유,의복",종합 소매업,500,1727,...,236,3.15,6300,True,60,False,False,False,False,False
3,삼양홀딩스,KR7000070003,70,1968-12-27,KOSPI,시가총액규모중,음식료품,기타 금융업,5000,4830,...,895,1.38,56400,True,60,False,False,False,False,False
4,삼양홀딩스우,KR7000071001,75,1992-02-21,Other,시가총액규모중,음식료품,기타 금융업,5000,171,...,0,0.0,56300,False,100,False,False,False,False,False


In [48]:
dfmeta.head(1).to_dict(orient='records')

[{'한글명': '동화약품',
  '표준코드': 'KR7000020008',
  '단축코드': '000020',
  '상장일자': Timestamp('1976-03-24 00:00:00'),
  '시장구분': 'KOSPI',
  '업종대분류': '시가총액규모중',
  '업종중분류': '의약품',
  '표준산업분류': '의약품 제조업',
  '액면가': 1000,
  '시가총액(억 원)': 1678,
  '매출액(억 원)': 4649,
  '영업이익(억 원)': 134,
  '당기순이익(억 원)': 21,
  'ROE(%)': 1.49,
  '전일종가(원)': 6010,
  '신용가능': True,
  '증거금비율(%)': 60,
  'KRX바이오': True,
  '관리종목': False,
  '거래정지': False,
  '불성실공시': False,
  '이상급등': False}]

In [49]:
# save to feather
dfmeta.to_feather(os.path.join(data_dir, 'stock_meta.feather'))