### F Score Implementation - May 2019 

In [1]:
# importing packages 
from datetime import datetime
import locale
from locale import atof
import pickle
import time
import random
import re

import numpy as np
import pandas as pd

import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait 
from selenium.webdriver.support import expected_conditions as EC 

# sqlalchemy packages 
from sqlalchemy import create_engine
from sqlalchemy import Table, Column, Integer, Numeric, String, DateTime, ForeignKey
from sqlalchemy.orm import sessionmaker, relationship, backref
from sqlalchemy.ext.declarative import declarative_base 

#### Creating Universe for F Score

In [17]:
# Getting DATA of KOSPI 
def get_univ_data(market, page_num):
    def clean_value(value):
        if ',' in value: 
            value = value.replace(',', '')
    
        return value
    
    mkt = market 
    
    if mkt == 0: 
        kospi_id_ls = []
        kospi_name_ls = []
        kospi_number_classes = []
    
        for page in range(1, page_num+1):
            url = """http://finance.naver.com/sise/field_submit.nhn?menu=market_sum&returnUrl=http%3A%2F%2Ffinance.naver.com%2Fsise%2Fsise_market_sum.nhn%3Fsosok%3D{0}%26page%3D{1}&fieldIds={2}&fieldIds={3}&fieldIds={4}&fieldIds={5}&fieldIds={6}&fieldIds={7}""".format(mkt, page, 'quant', 'listed_stock_cnt', 'market_sum', 'property_total', 'debt_total', 'pbr')
    
            req = requests.get(url)
            html = req.text 
    
            soup = BeautifulSoup(html, 'lxml')
        
            kospi_ids = soup.select('a[href*="/item/main.nhn"]')
            kospi_univ_sets = soup.select('td[class^="number"]')
        
            # Looping stock ids and names 
            for id in kospi_ids: 
                kospi_id_ls.append(str(id).strip('"<>/a').split('=')[-1].split('">')[0])
                kospi_name_ls.append(str(id).strip('"<>/a').split('=')[-1].split('">')[1])
        
            # Looping Number Classes for each stock
            for item in kospi_univ_sets: 
                kospi_number_classes.append(item.string)
                
        # Convert stock ids & names lists to Pandas DF 
        df_kospi = pd.DataFrame({'id' : kospi_id_ls, 'name': kospi_name_ls, 'market_type': 1}, columns = ['id', 'name','market_type', 'quant', 'market_sum', 'property_total', 'debt_total', 'listed_stock_cnt', 'pbr', 'face_value',])

        # Slicing and creating an individual list for each category 
        present_value = [clean_value(v) for v in kospi_number_classes[::10]]
        face_value = [clean_value(v) for v in kospi_number_classes[3::10]]
        quant = [clean_value(v) for v in kospi_number_classes[4::10]]
        listed_stock_cnt = [clean_value(v) for v in kospi_number_classes[5::10]]
        market_sum = [clean_value(v) for v in kospi_number_classes[6::10]] 
        property_total = [clean_value(v) for v in kospi_number_classes[7::10]]
        debt_total = [clean_value(v) for v in kospi_number_classes[8::10]]
        pbr = [clean_value(v) for v in kospi_number_classes[9::10]] 

        # Put each category list to DF
        df_kospi['present_value'] = pd.to_numeric(present_value, errors='coerce')
        df_kospi['quant'] = pd.to_numeric(quant, errors='coerce')
        df_kospi['property_total'] = pd.to_numeric(property_total, errors='coerce')
        df_kospi['debt_total'] = pd.to_numeric(debt_total, errors='coerce')
        df_kospi['listed_stock_cnt'] = pd.to_numeric(listed_stock_cnt, errors='coerce')
        df_kospi['pbr'] = pd.to_numeric(pbr, errors='coerce')
        df_kospi['market_sum'] = pd.to_numeric(market_sum, errors='coerce')
        df_kospi['face_value'] = pd.to_numeric(face_value, errors='coerce')   
        
        return df_kospi
    
    elif mkt == 1: 
        kosdaq_id_ls = []
        kosdaq_name_ls = [] 
        kosdaq_number_classes = []
        
        for page in range(1, page_num+1):
            url = """http://finance.naver.com/sise/field_submit.nhn?menu=market_sum&returnUrl=http%3A%2F%2Ffinance.naver.com%2Fsise%2Fsise_market_sum.nhn%3Fsosok%3D{0}%26page%3D{1}&fieldIds={2}&fieldIds={3}&fieldIds={4}&fieldIds={5}&fieldIds={6}&fieldIds={7}""".format(mkt, page, 'quant', 'listed_stock_cnt', 'market_sum', 'property_total', 'debt_total', 'pbr' )
            
            req = requests.get(url)
            html = req.text
            
            soup = BeautifulSoup(html, 'lxml')
            
            kosdaq_ids = soup.select('a[href*="/item/main.nhn"]')
            kosdaq_univ_sets = soup.select('td[class^="number"]')
    
            # Looping KOSDAQ ids and names 
            for id in kosdaq_ids:
                kosdaq_id_ls.append(str(id).strip('"<>/a').split('=')[-1].split('">')[0])
                kosdaq_name_ls.append(str(id).strip('"<>/a').split('=')[-1].split('">')[1])
    
            # Looping Number classes for each stock 
            for item in kosdaq_univ_sets: 
                kosdaq_number_classes.append(item.string)
       
        # convert KOSDAQ ids & names lists to Pandas DF 
        df_kosdaq = pd.DataFrame({'id' : kosdaq_id_ls, 'name' : kosdaq_name_ls, 'market_type' : 2}, columns = ['id', 'name','market_type', 'quant', 'market_sum', 'property_total', 'debt_total', 'listed_stock_cnt', 'pbr', 'face_value',])
        
        # Slicing and Put into df_kosdaq
        # Slicing and creating an individual list for each category 
        present_value = [clean_value(v) for v in kosdaq_number_classes[::10]]
        face_value = [clean_value(v) for v in kosdaq_number_classes[3::10]]
        quant = [clean_value(v) for v in kosdaq_number_classes[4::10]]
        listed_stock_cnt = [clean_value(v) for v in kosdaq_number_classes[5::10]]
        market_sum = [clean_value(v) for v in kosdaq_number_classes[6::10]]
        property_total = [clean_value(v) for v in kosdaq_number_classes[7::10]]
        debt_total = [clean_value(v) for v in kosdaq_number_classes[8::10]]
        pbr = [clean_value(v) for v in kosdaq_number_classes[9::10]] 

        # Put each category list to DF
        df_kosdaq['present_value'] = pd.to_numeric(present_value, errors='coerce')
        df_kosdaq['quant'] = pd.to_numeric(quant, errors='coerce')
        df_kosdaq['property_total'] = pd.to_numeric(property_total, errors='coerce')
        df_kosdaq['debt_total'] = pd.to_numeric(debt_total, errors='coerce')
        df_kosdaq['listed_stock_cnt'] = pd.to_numeric(listed_stock_cnt, errors='coerce')
        df_kosdaq['pbr'] = pd.to_numeric(pbr, errors='coerce')
        df_kosdaq['market_sum'] = pd.to_numeric(market_sum, errors='coerce')
        df_kosdaq['face_value'] = pd.to_numeric(face_value, errors='coerce')    
        
        return df_kosdaq

In [18]:
df_kospi = get_univ_data(0, 31) #2019년 5월 기준 31페이지
df_kosdaq = get_univ_data(1, 27) #2019년 5월 기준 27 페이지

In [19]:
df_univ = pd.concat([df_kospi, df_kosdaq], ignore_index=True)

In [20]:
bm_columns = ['id', 'market_type', 'market_sum', 'property_total', 'debt_total', 'pbr']
df_bm = df_univ[bm_columns]

In [21]:
df_bm.head(5)

Unnamed: 0,id,market_type,market_sum,property_total,debt_total,pbr
0,5930,1,2611780,3393572.0,916041.0,1.24
1,660,1,495770,636583.0,168060.0,0.99
2,5935,1,289245,,,0.99
3,5380,1,272427,1806558.0,1067597.0,0.5
4,68270,1,233559,35406.0,9078.0,9.16


In [22]:
df_bm['book_value'] = df_bm['property_total'] - df_bm['debt_total']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [23]:
df_bm['bm_ratio'] = df_bm['book_value'] / df_bm['market_sum']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [24]:
df_bm['bm_decile'] = pd.qcut(df_bm['bm_ratio'], 10, labels=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
df_bm['pbr_decile'] = pd.qcut(df_bm['pbr'], 10, labels=[10, 9, 8, 7, 6, 5, 4, 3, 2, 1])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [25]:
df_bm.head(10)

Unnamed: 0,id,market_type,market_sum,property_total,debt_total,pbr,book_value,bm_ratio,bm_decile,pbr_decile
0,5930,1,2611780,3393572.0,916041.0,1.24,2477531.0,0.948599,6.0,5
1,660,1,495770,636583.0,168060.0,0.99,468523.0,0.945041,6.0,6
2,5935,1,289245,,,0.99,,,,6
3,5380,1,272427,1806558.0,1067597.0,0.5,738961.0,2.71251,10.0,9
4,68270,1,233559,35406.0,9078.0,9.16,26328.0,0.112725,1.0,1
5,51910,1,230131,289441.0,116220.0,1.46,173221.0,0.752706,5.0,4
6,55550,1,219554,4596005.0,4229491.0,0.61,366514.0,1.669357,8.0,8
7,5490,1,201838,782483.0,314887.0,0.43,467596.0,2.31669,10.0,10
8,207940,1,200811,59804.0,18249.0,4.83,41555.0,0.206936,1.0,1
9,12330,1,200621,430711.0,123677.0,0.65,307034.0,1.530418,8.0,8


In [26]:
# Fscore university top 50%
df_fscore_univ = df_bm.query('bm_decile > 5')
df_fscore_univ_ids = df_fscore_univ['id']

In [27]:
len(df_fscore_univ_ids)

1055

In [28]:
df_fscore_univ_20 = df_bm.query('bm_decile > 8')
df_ids_20 = df_fscore_univ_20['id']

In [29]:
len(df_ids_20)

422

### BM Ratio by markets 

In [49]:
df_kospi.keys()

Index(['id', 'name', 'market_type', 'quant', 'market_sum', 'property_total',
       'debt_total', 'listed_stock_cnt', 'pbr', 'face_value', 'present_value'],
      dtype='object')

In [50]:
df_bm_kospi = df_kospi.copy()

In [51]:
df_bm_kospi['book_value'] = df_bm_kospi.property_total - df_bm_kospi.debt_total

In [52]:
df_bm_kospi['bm_ratio'] = df_bm_kospi.book_value / df_bm_kospi.market_sum 

In [53]:
df_kosdaq.keys()

Index(['id', 'name', 'market_type', 'quant', 'market_sum', 'property_total',
       'debt_total', 'listed_stock_cnt', 'pbr', 'face_value', 'present_value'],
      dtype='object')

In [54]:
df_bm_kosdaq = df_kosdaq.copy()

In [55]:
df_bm_kosdaq['book_value'] = df_bm_kosdaq.property_total - df_bm_kosdaq.debt_total

In [56]:
df_bm_kosdaq['bm_ratio'] = df_bm_kosdaq.book_value / df_bm_kosdaq.market_sum

In [57]:
df_bm_kospi['bm_decile'] = pd.qcut(df_bm_kospi['bm_ratio'], 10, labels=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
df_bm_kospi['pbr_decile'] = pd.qcut(df_bm_kospi['pbr'], 10, labels=[10, 9, 8, 7, 6, 5, 4, 3, 2, 1])

df_bm_kosdaq['bm_decile'] = pd.qcut(df_bm_kosdaq['bm_ratio'], 10, labels=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
df_bm_kosdaq['pbr_decile'] = pd.qcut(df_bm_kosdaq['pbr'], 10, labels=[10, 9, 8, 7, 6, 5, 4, 3, 2, 1]) #필터링 시 방행반대

In [58]:
df_bm_kospi_t20 = df_bm_kospi.query('(bm_decile >= 8) & (pbr_decile >= 8)')
df_bm_kosdaq_t20 = df_bm_kosdaq.query('(bm_decile >= 8) & (pbr_decile >= 8)')

In [59]:
df_bm_t20 = pd.concat([df_bm_kospi_t20, df_bm_kosdaq_t20])

In [60]:
df_bm_kospi_t50 = df_bm_kospi.query('(bm_decile >= 5) & (pbr_decile >= 5)')
df_bm_kosdaq_t50 = df_bm_kosdaq.query('(bm_decile >= 5) & (pbr_decile >= 5)')

In [61]:
df_bm_t50 = pd.concat([df_bm_kospi_t50, df_bm_kosdaq_t50])

In [41]:
df_fscore_univ_ids = df_bm_t50['id']

In [42]:
len(df_fscore_univ_ids)

1220

In [43]:
df_bm = pd.concat([df_bm_kospi, df_bm_kosdaq])

In [45]:
df_bm_t50

Unnamed: 0,id,name,market_type,quant,market_sum,property_total,debt_total,listed_stock_cnt,pbr,face_value,present_value,book_value,bm_ratio,bm_decile,pbr_decile
3,005380,현대차,1,368701,271359,1806558.0,1067597.0,213668,0.50,5000,127000,738961.0,2.723186,9,8
6,055550,신한지주,1,961292,218369,4596005.0,4229491.0,474200,0.61,5000,46050,366514.0,1.678416,7,7
7,017670,SK텔레콤,1,159880,204287,423691.0,200199.0,80746,0.81,500,253000,223492.0,1.094010,5,5
8,005490,POSCO,1,148928,203581,782483.0,314887.0,87187,0.43,5000,233500,467596.0,2.296855,9,9
10,012330,현대모비스,1,148996,201097,430711.0,123677.0,95307,0.65,5000,211000,307034.0,1.526796,6,6
12,105560,KB금융,1,891231,193168,4795883.0,4438753.0,418112,0.51,5000,46200,357130.0,1.848805,8,8
14,028260,삼성물산,1,285693,181533,424067.0,198577.0,189690,0.79,100,95700,225490.0,1.242143,5,5
15,034730,SK,1,127678,167809,1194570.0,685691.0,70360,0.81,200,238500,508879.0,3.032489,10,5
16,000270,기아차,1,1061629,166807,517866.0,245431.0,405363,0.61,5000,41150,272435.0,1.633235,7,7
17,032830,삼성생명,1,144690,161600,2894277.0,2589222.0,200000,0.50,500,80800,305055.0,1.887717,8,8


#### Crawl BalanceSheet, IncomeStatement, CashFlow

In [46]:
# Headless Chrome
options = webdriver.ChromeOptions()
#options.add_argument('headless')
#options.add_argument('window-size=1920x1080')
options.add_argument('user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit 537.36 (KHTML, like Gecko) Chrome')

In [55]:
# Start Chrome with Selenium
driver = webdriver.Chrome('/Users/daesikkim/Downloads/chromedriver', chrome_options=options) # chrome_options=options
driver.implicitly_wait(3)

In [48]:
# A function to crawl the industry classification, Market & ICS 
def crawl_ics(code): 
    #pArea > div.wrapper-table > div > table > tbody > tr:nth-child(1) > td > dl > dt:nth-child(3)
    # getting the page source and making the soup! 
    html_ics = driver.page_source 
    soup_ics = BeautifulSoup(html_ics, 'lxml')
    
    # getting valus for ICS 
    m_ics = soup_ics.select('#pArea > div.wrapper-table > div > table > tbody > tr:nth-of-type(1) > td > dl > dt:nth-of-type(3)')
    w_ics = soup_ics.select('#pArea > div.wrapper-table > div > table > tbody > tr:nth-of-type(1) > td > dl > dt:nth-of-type(4)')
    
    m_ics = m_ics[0].string.split(' : ')[-1]
    w_ics = w_ics[0].string.split(' : ')[-1]
    
    ics = {'m_ics': [m_ics]*5, 'w_ics': [w_ics]*5}
    
    return ics

#### 재무용어 및 영어정리
***
__재무재표__
- total asset = 자산총계
- long term debt = long term liabilities = 비유동부채
- long term borrowing = 장기차입금
- current asset = 유동자산
- current liabilities = 유동부채
- shareholder equity = 자본총계
- stock issued = 발행주식수
- intangible asset = 무형자산
- book value 1 = 순자산1
- book value 2 = 순자산2
- total liabilities = 부채총계
***

In [49]:
def crawl_balsheet(code):
    
    # click Bal Sheet Tab 
    balsheet_tab = driver.find_element_by_css_selector("#rpt_tab2")
    balsheet_tab.click()
    time.sleep(0.8)
    
    # getting the page source and making the soup! 
    html_balsheet = driver.page_source
    soup_balsheet = BeautifulSoup(html_balsheet, 'lxml')
    
    r = re.compile(r'<div id="([0-9a-zA-Z]{12})">')
    div_id = "#" + r.findall(html_balsheet)[0]

    # getting values from "Balance Sheet" 
    total_asset = soup_balsheet.select(f'{div_id}' + ' > table:nth-of-type(2) > tbody > tr:nth-of-type(1) > td.num')
    lt_debt = soup_balsheet.select(f'{div_id}' + ' > table:nth-of-type(2) > tbody > tr:nth-of-type(175) > td.num')
    lt_borrowing = soup_balsheet.select(f'{div_id}' + ' > table:nth-of-type(2) > tbody > tr:nth-of-type(181) > td.num')
    current_asset = soup_balsheet.select(f'{div_id}' + ' > table:nth-of-type(2) > tbody > tr:nth-of-type(2) > td.num')
    current_liabilities = soup_balsheet.select(f'{div_id}' + ' > table:nth-of-type(2) > tbody > tr:nth-of-type(128) > td.num')
    shareholder_equity = soup_balsheet.select(f'{div_id}' + ' > table:nth-of-type(2) > tbody > tr:nth-of-type(216) > td.num')
    stock_issued = soup_balsheet.select(f'{div_id}' + ' > table:nth-of-type(2) > tbody > tr:nth-of-type(249) > td.num')
    intangible_asset = soup_balsheet.select(f'{div_id}' + ' > table:nth-of-type(2) > tbody > tr:nth-of-type(73) > td.num')
    book_value1 = soup_balsheet.select(f'{div_id}' + ' > table:nth-of-type(2) > tbody > tr:nth-of-type(125) > td.num')
    book_value2 = soup_balsheet.select(f'{div_id}' + ' > table:nth-of-type(2) > tbody > tr:nth-of-type(126) > td.num')
    total_liabilities = soup_balsheet.select(f'{div_id}' + ' > table:nth-of-type(2) > tbody > tr:nth-of-type(127) > td.num')
    
    # convert string --> float 
    balsheet_vals = list(map(lambda l: [float(i.string.replace(',','')) if i.string != u'\xa0' else float(i.string.replace(u'\xa0','0')) for i in l[0:5]], [total_asset, lt_debt, lt_borrowing, current_asset, current_liabilities, shareholder_equity, stock_issued, intangible_asset, book_value1, book_value2, total_liabilities]))
    
    balsheet_dict = {'total_asset': balsheet_vals[0],
                     'lt_debt': balsheet_vals[1],
                     'lt_borrowing': balsheet_vals[2],
                     'current_asset': balsheet_vals[3],
                     'current_liabilities': balsheet_vals[4],
                     'shareholder_equity': balsheet_vals[5],
                     'stock_issued' : balsheet_vals[6],
                     'intangible_asset' : balsheet_vals[7], 
                     'book_value1' : balsheet_vals[8], 
                     'book_value2' : balsheet_vals[9], 
                     'total_liabilities' : balsheet_vals[10]}
    
    return balsheet_dict 

***
__포괄손익계산서__
- operating income = 영업이익
- extra income = 기타영업외손익
- total sales = 매출액
- gross profit = 매출총이익
- cogs = 매출원가
***

In [50]:
# A function to crawl an income statement 
def crawl_incomestate(code):
    
    # click Income Statement Tab 
    incomestate_tab = driver.find_element_by_css_selector("#rpt_tab1")
    incomestate_tab.click()
    time.sleep(0.8)
    
    # getting the page source and making the soup
    html_incomestate = driver.page_source
    soup_incomestate = BeautifulSoup(html_incomestate, 'lxml') 
    
    r = re.compile(r'<div id="([0-9a-zA-Z]{12})">')
    div_id = "#" + r.findall(html_incomestate)[0]
    
    # getting values from "Income Statement" 
    operating_income = soup_incomestate.select(f'{div_id}' + ' > table:nth-of-type(2) > tbody > tr:nth-of-type(58) > td.num')
    extra_income = soup_incomestate.select(f'{div_id}' + ' > table:nth-of-type(2) > tbody > tr:nth-of-type(150) > td.num')
    total_sales = soup_incomestate.select(f'{div_id}' + ' > table:nth-of-type(2) > tbody > tr:nth-of-type(1) > td.num')
    gross_profit = soup_incomestate.select(f'{div_id}' + ' > table:nth-of-type(2) > tbody > tr:nth-of-type(26) > td.num')
    cogs = soup_incomestate.select(f'{div_id}' + ' > table:nth-of-type(2) > tbody > tr:nth-of-type(15) > td.num')
    
    # convert string --> float 
    incomestate_vals = list(map(lambda l: [float(i.string.replace(',','')) if i.string != u'\xa0' else float(i.string.replace(u'\xa0','0')) for i in l[0:5]], [operating_income, extra_income, total_sales, gross_profit, cogs]))
    
    incomestate_dict = {'operating_income': incomestate_vals[0],
                        'extra_income': incomestate_vals[1],
                        'total_sales': incomestate_vals[2],
                        'gross_profit': incomestate_vals[3],
                        'cogs': incomestate_vals[4]}
    
    return incomestate_dict

***
__현금흐름표__
- cashflow operation = 영업활동으로인한 현금흐름
***

In [51]:
# A function to crawl a cash flow 
def crawl_cashflow(code):
    
    # click Cash Flow tab 
    cashflow_tab = driver.find_element_by_css_selector("#rpt_tab3")
    cashflow_tab.click
    time.sleep(0.8)
    
    # getting the page source and making the soup 
    html_cashflow = driver.page_source 
    soup_cashflow = BeautifulSoup(html_cashflow, 'lxml')
    
    r = re.compile(r'<div id="([0-9a-zA-Z]{12})">')
    div_id = "#" + r.findall(html_cashflow)[0]
    
    # getting values from "Income Statement" 
    cf_operation = soup_cashflow.select(f'{div_id}' + ' > table:nth-of-type(2) > tbody > tr:nth-of-type(1) > td.num')
    
    # convert string --> float 
    cashflow_vals = list(map(lambda l: [float(i.string.replace(',','')) if i.string != u'\xa0' else float(i.string.replace(u'\xa0', '0')) for i in l[0:5]], [cf_operation]))
    
    cashflow_dict = {'cf_operation': cashflow_vals[0]}
    
    return cashflow_dict

In [52]:
# A function to transform a list to DF 
def merge_n_convert(code, year, fs_dict): 
    dict_individual = {'code': [code]*5,
                       'year': year}
    
    dict_individual.update(fs_dict)
    
    df_individual = pd.DataFrame(dict_individual, columns = ('code', 'm_ics', 'w_ics', 'year', \
                                    'total_asset', 'lt_debt', 'lt_borrowing', 'current_asset', 'current_liabilities', 'shareholder_equity', 'stock_issued', 'intangible_asset', \
                                    'operating_income', 'extra_income', 'total_sales', 'gross_profit', 'cogs', \
                                    'cf_operation', 'book_value1', 'book_value2'))
    
    return df_individual                              

In [53]:
# Creating f score DF 
year = ['2014', '2015', '2016', '2017', '2018']
fs_dict = {}
df_fbase = pd.DataFrame(columns = ('code', 'm_ics', 'w_ics', 'year', \
                                    'total_asset', 'lt_debt', 'lt_borrowing', 'current_asset', 'current_liabilities', 'shareholder_equity', 'stock_issued', 'intangible_asset', \
                                    'operating_income', 'extra_income', 'total_sales', 'gross_profit', 'cogs', \
                                    'cf_operation', 'book_value1', 'book_value2'))
df_fcalc = pd.DataFrame(columns = ('code', 'year', 'cal_roa', 'cal_cfo', 'delta_roa', 'accrual', 'delta_lever', 'delta_liquid', 'eq_offer', 'delta_margin', 'delta_turn'))
df_fscore = pd.DataFrame(columns = ('code', 'year','f_roa', 'f_cfo', 'f_droa', 'f_accrual', 'f_dlever', 'f_dliquid', 'f_equityoffer', 'f_dmargin', 'f_dturnover', 'f_total'))

#### Crawling codes below

In [None]:
# Do crawl!
count = 0
init_time = time.time()
for i in df_fscore_univ_ids:
    start_time = time.time()
     # load the page! 
    driver.get("http://finance.naver.com/item/coinfo.nhn?code={}".format(i))
    time.sleep(random.randrange(2, 8, 1))
    
    # move to the relavent frame
    frame = WebDriverWait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it((By.CSS_SELECTOR, "#coinfo_cp")))
    #driver.switch_to.frame(frame)

    # select to the financial statement tab
    finstate_tab = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#header-menu > div.wrapper-menu > dl > dt:nth-of-type(3) > a")))
    finstate_tab.click()
    
    time.sleep(0.8)
    print(f"Company {i} processing")
    
    code = i
    for d in [crawl_ics(i), crawl_balsheet(i), crawl_incomestate(i), crawl_cashflow(i)]:
        fs_dict.update(d)                
    
    try:
        df_company = merge_n_convert(i, year, fs_dict)

        df_fbase = pd.concat([df_fbase, df_company], ignore_index=True)
        count += 1
    except: 
        count += 1
        continue

    #print(df_fbase)
    progress = count/len(df_fscore_univ_ids)*100
    print(f"{count} / {len(df_fscore_univ_ids)} >>> {progress}% Processed!!!")

end_time = time.time()
elapsed = end_time - init_time
print(f'Total time spent >>> {time.strftime("%H:%M:%S", time.gmtime(elapsed))}')

Company 005380 processing
1 / 1220 >>> 0.08196721311475409% Processed!!!
Company 055550 processing
Company 017670 processing
3 / 1220 >>> 0.24590163934426232% Processed!!!
Company 005490 processing
4 / 1220 >>> 0.32786885245901637% Processed!!!
Company 012330 processing
5 / 1220 >>> 0.4098360655737705% Processed!!!
Company 105560 processing
Company 028260 processing
7 / 1220 >>> 0.5737704918032787% Processed!!!
Company 034730 processing
8 / 1220 >>> 0.6557377049180327% Processed!!!
Company 000270 processing
9 / 1220 >>> 0.7377049180327869% Processed!!!
Company 032830 processing
Company 015760 processing
11 / 1220 >>> 0.9016393442622952% Processed!!!
Company 096770 processing
12 / 1220 >>> 0.9836065573770493% Processed!!!
Company 003550 processing
13 / 1220 >>> 1.0655737704918031% Processed!!!
Company 066570 processing
14 / 1220 >>> 1.1475409836065573% Processed!!!
Company 086790 processing
Company 011170 processing
16 / 1220 >>> 1.3114754098360655% Processed!!!
Company 024110 processin

In [None]:
df_fbase

#### Save & Load

In [36]:
with open('fscore_may2019.txt', 'wb') as f:
    pickle.dump(df_fbase, f)

In [2]:
with open('fscore_may2019.txt', 'rb') as f:
    df_fbase = pickle.load(f)

In [3]:
df_fbase.head(10)

Unnamed: 0,code,m_ics,w_ics,year,total_asset,lt_debt,lt_borrowing,current_asset,current_liabilities,shareholder_equity,stock_issued,intangible_asset,operating_income,extra_income,total_sales,gross_profit,cogs,cf_operation,book_value1,book_value2
0,5380,운수장비,자동차,2014,1472251.2,494248.8,74304.3,425271.0,351796.7,626205.7,285479.0,38216.6,75499.9,-2682.3,892563.2,191300.4,701262.8,892563.2,626205.7,587989.1
1,5380,운수장비,자동차,2015,1653679.5,572730.3,85526.2,437519.3,412135.2,668814.0,285479.0,42980.9,63579.1,528.7,919587.4,182574.4,737013.0,919587.4,668814.0,625833.1
2,5380,운수장비,자동차,2016,1788359.3,628815.6,133899.8,475840.1,436097.9,723445.8,285479.0,45861.7,51935.0,-490.8,936490.2,176893.0,759597.2,936490.2,723445.8,677584.1
3,5380,운수장비,자동차,2017,1781994.5,602813.9,124881.4,484397.3,431607.1,747573.5,285479.0,48093.4,45746.7,-2137.3,963760.8,175779.1,787981.7,963760.8,747573.5,699480.2
4,5380,운수장비,자동차,2018,1806557.5,573213.3,99852.5,471435.1,494384.1,738960.1,276939.0,49213.8,24221.7,-5197.6,968126.1,151421.3,816704.8,968126.1,738960.1,689746.3
5,5380,운수장비,자동차,2014,1472251.2,494248.8,74304.3,425271.0,351796.7,626205.7,285479.0,38216.6,75499.9,-2682.3,892563.2,191300.4,701262.8,892563.2,626205.7,587989.1
6,5380,운수장비,자동차,2015,1653679.5,572730.3,85526.2,437519.3,412135.2,668814.0,285479.0,42980.9,63579.1,528.7,919587.4,182574.4,737013.0,919587.4,668814.0,625833.1
7,5380,운수장비,자동차,2016,1788359.3,628815.6,133899.8,475840.1,436097.9,723445.8,285479.0,45861.7,51935.0,-490.8,936490.2,176893.0,759597.2,936490.2,723445.8,677584.1
8,5380,운수장비,자동차,2017,1781994.5,602813.9,124881.4,484397.3,431607.1,747573.5,285479.0,48093.4,45746.7,-2137.3,963760.8,175779.1,787981.7,963760.8,747573.5,699480.2
9,5380,운수장비,자동차,2018,1806557.5,573213.3,99852.5,471435.1,494384.1,738960.1,276939.0,49213.8,24221.7,-5197.6,968126.1,151421.3,816704.8,968126.1,738960.1,689746.3


#### F Score Calculation

In [4]:
df_fbase.keys()

Index(['code', 'm_ics', 'w_ics', 'year', 'total_asset', 'lt_debt',
       'lt_borrowing', 'current_asset', 'current_liabilities',
       'shareholder_equity', 'stock_issued', 'intangible_asset',
       'operating_income', 'extra_income', 'total_sales', 'gross_profit',
       'cogs', 'cf_operation', 'book_value1', 'book_value2'],
      dtype='object')

In [5]:
df_fs_01 = df_fbase[['code', 'year', 'total_asset', 'operating_income', 
                    'cf_operation', 'lt_debt', 'lt_borrowing', 
                    'current_asset', 'current_liabilities', 'stock_issued', 
                    'gross_profit', 'total_sales']]

In [6]:
df_fs_01.head(5)

Unnamed: 0,code,year,total_asset,operating_income,cf_operation,lt_debt,lt_borrowing,current_asset,current_liabilities,stock_issued,gross_profit,total_sales
0,5380,2014,1472251.2,75499.9,892563.2,494248.8,74304.3,425271.0,351796.7,285479.0,191300.4,892563.2
1,5380,2015,1653679.5,63579.1,919587.4,572730.3,85526.2,437519.3,412135.2,285479.0,182574.4,919587.4
2,5380,2016,1788359.3,51935.0,936490.2,628815.6,133899.8,475840.1,436097.9,285479.0,176893.0,936490.2
3,5380,2017,1781994.5,45746.7,963760.8,602813.9,124881.4,484397.3,431607.1,285479.0,175779.1,963760.8
4,5380,2018,1806557.5,24221.7,968126.1,573213.3,99852.5,471435.1,494384.1,276939.0,151421.3,968126.1


In [7]:
# Rearranging columns - lagged total asset ("total assets fromt he beginning of the year")
df_fs_01['l_total_asset'] = df_fs_01.loc[:, 'total_asset'].shift(1)
df_fs_01['l_total_asset'] = df_fs_01.loc[:, 'l_total_asset'].where(df_fs_01.loc[:, 'year'] != '2014') # converting to NaN for 2014


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [17]:
df_fs_01[df_fs_01['code'] == '189980']

Unnamed: 0,code,year,total_asset,operating_income,cf_operation,lt_debt,lt_borrowing,current_asset,current_liabilities,stock_issued,gross_profit,total_sales,l_total_asset,avg_total_asset
4270,189980,2014,410.8,79.6,350.4,70.1,59.7,149.3,96.0,5456.0,151.2,350.4,,
4271,189980,2015,750.9,86.0,396.4,30.1,30.0,280.6,80.6,7341.0,173.8,396.4,410.8,
4272,189980,2016,669.1,39.2,413.8,27.7,26.0,189.7,86.8,7341.0,156.0,413.8,750.9,710.0
4273,189980,2017,702.3,28.8,449.0,23.1,22.0,282.6,105.1,7341.0,154.8,449.0,669.1,685.7
4274,189980,2018,818.5,42.5,476.5,2.9,0.0,361.1,218.3,7341.0,165.6,476.5,702.3,760.4


## Average total assets = "rolling"
df_fs_01['avg_total_asset'] = df_fs_01.loc[:, 'total_asset'].where(df_fs_01.loc[:, 'year'] != '2014').rolling(2).mean()

In [10]:
# Creating df_fs_02(calculation) and df_fs_03(fscore) dataframe
df_fs_02 = pd.DataFrame(columns = ('code', 'year', 'cal_roa', 
                                   'cal_cfo', 'delta_roa', 'accrual',
                                   'delta_lever', 'delta_liquid', 'eq_offer', 
                                   'delta_margin', 'delta_turn', 'gpa'))
df_fs_03 = pd.DataFrame(columns = ('code', 'year','f_roa', 
                                    'f_cfo', 'f_droa', 'f_accrual', 
                                    'f_dlever', 'f_dliquid', 'f_equityoffer', 
                                    'f_dmargin', 'f_dturnover', 'f_total', 'gpa_decile'))

In [11]:
# Calcuation
df_fs_02['code'] = df_fs_01['code']
df_fs_02['year'] = df_fs_01['year']

df_fs_02['cal_roa'] = df_fs_01['operating_income'] / df_fs_01['l_total_asset']
df_fs_02['cal_cfo'] = df_fs_01['cf_operation'] / df_fs_01['l_total_asset']
df_fs_02['delta_roa'] = df_fs_02['cal_roa'].diff()
df_fs_02['accrual'] = df_fs_02['cal_roa'] - df_fs_02['cal_cfo']

df_fs_02['lever'] = df_fs_01['lt_debt'] / df_fs_01['avg_total_asset']
df_fs_02['delta_lever'] = df_fs_02['lever'].diff()
df_fs_02['delta_liquid'] = df_fs_01['current_asset'].where(df_fs_01['year'] != '2014') / df_fs_01['current_liabilities'].where(df_fs_01['year'] != '2014')
df_fs_02['eq_offer'] = df_fs_01['stock_issued'].where(df_fs_01['year'] != '2014').diff()

df_fs_02['gmo'] = df_fs_01['gross_profit'].where(df_fs_01['year'] != '2014') / df_fs_01['total_sales'].where(df_fs_01['year'] != '2014')
df_fs_02['delta_margin'] = df_fs_02['gmo'].diff()
df_fs_02['atr'] = df_fs_01['total_sales'] / df_fs_01['l_total_asset']
df_fs_02['delta_turn'] = df_fs_02['atr'].diff()

df_fs_02['gpa'] = df_fs_01['gross_profit'] / df_fs_01['total_asset']

In [13]:
df_fs_02[df_fs_02['code'] == '189980']

Unnamed: 0,code,year,cal_roa,cal_cfo,delta_roa,accrual,delta_lever,delta_liquid,eq_offer,delta_margin,delta_turn,gpa,lever,gmo,atr
4270,189980,2014,,,,,,,,,,0.368062,,,
4271,189980,2015,0.209348,0.964946,,-0.755599,,3.48139,,,,0.231456,,0.438446,0.964946
4272,189980,2016,0.052204,0.551072,-0.157144,-0.498868,,2.185484,0.0,-0.061452,-0.413874,0.233149,0.039014,0.376994,0.551072
4273,189980,2017,0.043043,0.671051,-0.009161,-0.628008,-0.005326,2.688868,0.0,-0.032228,0.119979,0.220419,0.033688,0.344766,0.671051
4274,189980,2018,0.060515,0.678485,0.017473,-0.61797,-0.029874,1.654146,0.0,0.002768,0.007434,0.202321,0.003814,0.347534,0.678485


In [14]:
# converting df_fs_02 to df_fs_03
df_fs_03['code'] = df_fs_01['code']
df_fs_03['year'] = df_fs_01['year']

# ROA and its f-score in DataFrame
df_fs_03.f_roa = np.where(df_fs_02.cal_roa > 0, 1, 0)

# CFO and its f-score in DataFrame
df_fs_03.f_cfo = np.where(df_fs_02.cal_cfo > 0, 1, 0)

# delta ROA and its f-score in DataFrame 
df_fs_03.f_droa = np.where(df_fs_02.delta_roa > 0, 1, 0)

# Accrual and its f-score
df_fs_03.f_accrual = np.where(df_fs_02.accrual < 0, 1, 0)

# Delta_leverage and its f-score
df_fs_03.f_dlever = np.where(df_fs_02.delta_lever < 0, 1, 0)

# delta_liquidity and its f-score
df_fs_03.f_dliquid = np.where(df_fs_02.delta_liquid > 0, 1, 0)

# equity offer and its f-score
df_fs_03.f_equityoffer = np.where(df_fs_02.eq_offer > 0, 0, 1)

# Delta_margin and its f-score 
df_fs_03.f_dmargin = np.where(df_fs_02.delta_margin > 0, 1, 0)

# Delta_turnover and its f-score
df_fs_03.f_dturnover = np.where(df_fs_02.delta_turn > 0, 1, 0)

# total f_score 
df_fs_03.f_total = df_fs_03.f_roa + \
                   df_fs_03.f_cfo + \
                   df_fs_03.f_droa + \
                   df_fs_03.f_accrual + \
                   df_fs_03.f_dlever + \
                   df_fs_03.f_dliquid + \
                   df_fs_03.f_equityoffer + \
                   df_fs_03.f_dmargin + \
                   df_fs_03.f_dturnover 

# gpa decile
df_fs_03.gpa_decile = pd.qcut(df_fs_02['gpa'], 10, labels=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])


In [18]:
df_fs_03[df_fs_03['code'] == '013120']

Unnamed: 0,code,year,f_roa,f_cfo,f_droa,f_accrual,f_dlever,f_dliquid,f_equityoffer,f_dmargin,f_dturnover,f_total,gpa_decile
2110,13120,2014,0,0,0,0,0,0,1,0,0,1,8
2111,13120,2015,1,1,0,1,0,1,1,0,0,5,8
2112,13120,2016,1,1,0,1,0,1,1,1,0,6,8
2113,13120,2017,1,1,0,1,0,1,1,1,0,6,8
2114,13120,2018,1,1,1,1,1,1,1,1,1,9,7


In [182]:
above_fs7 = df_fs_03.query('f_total >= 7')

In [183]:
len(above_fs7)

1369

In [184]:
above_fs8 = df_fs_03.query('f_total >= 8')

In [185]:
len(above_fs8)

642

In [186]:
above_fs9 = df_fs_03.query('f_total == 9')

In [187]:
len(above_fs9)

126

#### Filtering to choose 20 companies

In [16]:
df = df_fs_2019.copy()

In [31]:
df_univ.keys()

Index(['id', 'name', 'market_type', 'quant', 'market_sum', 'property_total',
       'debt_total', 'listed_stock_cnt', 'pbr', 'face_value', 'present_value'],
      dtype='object')

In [32]:
listed_companies = df_univ[['id', 'name']]

In [35]:
# created a dictionary of listed companies
listed_dictionary = {}
for idx in range(len(listed_companies)):
    listed_dictionary[listed_companies['id'].iloc[idx]] = listed_companies['name'].iloc[idx]

In [134]:
# import Jiho's fscore list
df_jiho = pd.read_csv('original_fscore.tsv', delimiter='\t', header=0)

In [135]:
df_jiho.head(10)

Unnamed: 0,회사명,18_fscore,17_fscore
0,샘표,9,9
1,신대양제지,9,9
2,한국수출포장공업,9,9
3,한섬,9,9
4,제이엠티,9,9
5,모아텍,9,9
6,S&K폴리텍,9,9
7,파트론,9,9
8,신세계인터내셔날,9,9
9,롯데정밀화학,9,9


In [137]:
jiho_fs8 = df_jiho[df_jiho['18_fscore'] > 7]

##### NCAV
net current asset value = current asset - total liabilities

In [47]:
df_bm.keys()

Index(['id', 'market_type', 'market_sum', 'property_total', 'debt_total',
       'pbr', 'book_value', 'bm_ratio', 'bm_decile', 'pbr_decile'],
      dtype='object')

In [189]:
df_01 = df_univ[['id', 'name', 'market_type', 'market_sum', 'debt_total']]
df_01 = df_01.rename(index=str, columns={'id': 'code'})

In [190]:
df_02 = df_bm[['id', 'bm_decile', 'pbr_decile']]
df_02 = df_02.rename(index=str, columns={'id': 'code'})

In [191]:
df_03 = above_fs8[above_fs8['year'] == '2018']

In [192]:
df_04 = above_fs7[(above_fs7['year'] == '2018')] 

In [193]:
df_05 = above_fs9[(above_fs9['year'] == '2018')]

In [194]:
df = pd.merge(df_01, df_02, on='code')

In [195]:
df_a = df.copy()

In [196]:
df_a.keys()

Index(['code', 'name', 'market_type', 'market_sum', 'debt_total', 'bm_decile',
       'pbr_decile'],
      dtype='object')

In [197]:
df_04.keys()

Index(['code', 'year', 'f_roa', 'f_cfo', 'f_droa', 'f_accrual', 'f_dlever',
       'f_dliquid', 'f_equityoffer', 'f_dmargin', 'f_dturnover', 'f_total',
       'gpa_decile'],
      dtype='object')

In [198]:
df_fs_01.keys()

Index(['code', 'year', 'total_asset', 'operating_income', 'cf_operation',
       'lt_debt', 'lt_borrowing', 'current_asset', 'current_liabilities',
       'stock_issued', 'gross_profit', 'total_sales', 'l_total_asset',
       'avg_total_asset'],
      dtype='object')

In [199]:
df_b = df_fs_01[['code', 'year', 'current_asset']]
df_b = df_b[df_b.year == '2018']

In [200]:
df_c = pd.merge(df_a, df_b, how='inner', on='code')

In [201]:
df_c['ncav'] = df_c['current_asset'] - df_c['debt_total']

In [202]:
df_c = df_c.reset_index()

In [203]:
df_c['mktsum*1.5'] = df_c['market_sum'] * 1.5

In [204]:
df_c.head()

Unnamed: 0,index,code,name,market_type,market_sum,debt_total,bm_decile,pbr_decile,year,current_asset,ncav,mktsum*1.5
0,0,5380,현대차,1,272427,1067597.0,10,9,2018,471435.1,-596161.9,408640.5
1,1,5380,현대차,1,272427,1067597.0,10,9,2018,471435.1,-596161.9,408640.5
2,2,5490,POSCO,1,201838,314887.0,10,10,2018,336510.0,21623.0,302757.0
3,3,12330,현대모비스,1,200621,123677.0,8,8,2018,197204.8,73527.8,300931.5
4,4,17670,SK텔레콤,1,199038,200199.0,7,7,2018,79588.4,-120610.6,298557.0


In [205]:
df_c['ncav1'] = np.where(df_c.ncav > df_c['mktsum*1.5'], 1, 0)

In [206]:
df_c['ncav2'] = np.where(df_c.ncav > df_c.market_sum, 1, 0)

In [207]:
df_c = df_c.reset_index()
df_c.head()

Unnamed: 0,level_0,index,code,name,market_type,market_sum,debt_total,bm_decile,pbr_decile,year,current_asset,ncav,mktsum*1.5,ncav1,ncav2
0,0,0,5380,현대차,1,272427,1067597.0,10,9,2018,471435.1,-596161.9,408640.5,0,0
1,1,1,5380,현대차,1,272427,1067597.0,10,9,2018,471435.1,-596161.9,408640.5,0,0
2,2,2,5490,POSCO,1,201838,314887.0,10,10,2018,336510.0,21623.0,302757.0,0,0
3,3,3,12330,현대모비스,1,200621,123677.0,8,8,2018,197204.8,73527.8,300931.5,0,0
4,4,4,17670,SK텔레콤,1,199038,200199.0,7,7,2018,79588.4,-120610.6,298557.0,0,0


In [208]:
df_c = df_c.drop(['level_0', 'index'], axis=1)
df_c.head()

Unnamed: 0,code,name,market_type,market_sum,debt_total,bm_decile,pbr_decile,year,current_asset,ncav,mktsum*1.5,ncav1,ncav2
0,5380,현대차,1,272427,1067597.0,10,9,2018,471435.1,-596161.9,408640.5,0,0
1,5380,현대차,1,272427,1067597.0,10,9,2018,471435.1,-596161.9,408640.5,0,0
2,5490,POSCO,1,201838,314887.0,10,10,2018,336510.0,21623.0,302757.0,0,0
3,12330,현대모비스,1,200621,123677.0,8,8,2018,197204.8,73527.8,300931.5,0,0
4,17670,SK텔레콤,1,199038,200199.0,7,7,2018,79588.4,-120610.6,298557.0,0,0


In [209]:
df_ncav2 = df_c[(df_c['ncav1'] == 1) | (df_c['ncav2'] == 1)]

In [210]:
df_ncav = df_ncav[df_ncav['ncav1'] == 1]

In [211]:
# NCAV company lists
ncav_list = df_ncav['name']

In [212]:
ncav_list

145       KISCO홀딩스
172     S&amp;T홀딩스
205           동일산업
298           삼정펄프
308           세원정공
317           만호제강
329          CS홀딩스
360            SJM
365         SJM홀딩스
381         WISCOM
422           동원개발
522           리드코프
533        아이디스홀딩스
583        네오위즈홀딩스
589            예림당
626            로스웰
679           헝셩그룹
780           이라이콤
802        차이나그레이트
816            블루콤
828       에스앤씨엔진그룹
880       피에스케이홀딩스
952         씨케이에이치
1089     이스트아시아홀딩스
1105        한국정밀기계
1112           감마누
Name: name, dtype: object

In [213]:
ncav2_list = df_ncav2['name']

In [214]:
ncav2_list

94            신도리코
101           남양유업
132           한국철강
146       KISCO홀딩스
152         KPX홀딩스
153           일성신약
157           삼영전자
173     S&amp;T홀딩스
178           삼양통상
185           화성산업
206           동일산업
227           미창석유
237            지투알
244          세아특수강
267          신영와코루
273        새론오토모티브
286           KTcs
290       S&amp;TC
296           한국제지
299           삼정펄프
300            휴스틸
303           삼성공조
309           세원정공
311           대창단조
317           유성기업
318           만호제강
327          한세엠케이
330          CS홀딩스
332           부국철강
361            SJM
           ...    
698           피에스텍
712          성도이엔지
719          동양이엔피
773            코메론
781           이라이콤
782           삼현철강
789           금강철강
790           한일진공
803        차이나그레이트
814            리노스
817            블루콤
829       에스앤씨엔진그룹
874          픽셀플러스
881       피에스케이홀딩스
905           동신건설
927           현대공업
932          알티캐스트
936          유비벨록스
953         씨케이에이치
954         지와이커머스
976          파인디지털
980         

In [219]:
ncav_n_fscore = pd.merge(df_ncav2, df_03, how='inner', on='code')

In [220]:
ncav_n_fscore

Unnamed: 0,code,name,market_type,market_sum,debt_total,bm_decile,pbr_decile,year_x,current_asset,ncav,...,f_cfo,f_droa,f_accrual,f_dlever,f_dliquid,f_equityoffer,f_dmargin,f_dturnover,f_total,gpa_decile
0,5680,삼영전자,1,2300,275.0,9,9,2018,3259.7,2984.7,...,1,1,1,1,1,1,1,1,9,2
1,69640,한세엠케이,1,726,571.0,9,10,2018,1550.8,979.8,...,1,0,1,1,1,1,1,1,8,10
2,13120,동원개발,2,4168,1744.0,8,9,2018,8545.1,6801.1,...,1,1,1,1,1,1,1,1,9,7
3,54800,아이디스홀딩스,2,1428,1888.0,10,9,2018,5291.6,3403.6,...,1,1,1,1,1,1,1,0,8,7
4,42420,네오위즈홀딩스,2,1187,743.0,10,10,2018,2972.2,2229.2,...,1,1,1,0,1,1,1,1,8,7
5,41520,이라이콤,2,687,1028.0,10,10,2018,2475.9,1447.9,...,1,1,1,0,1,1,1,1,8,4
6,123840,한일진공,2,672,294.0,9,9,2018,1069.6,775.6,...,1,1,1,1,1,0,1,1,8,2
7,31980,피에스케이홀딩스,2,543,482.0,10,8,2018,2282.2,1800.2,...,1,0,1,1,1,1,1,1,8,10
8,32080,아즈텍WB,2,405,153.0,9,10,2018,703.4,550.4,...,1,1,1,1,1,1,1,1,9,3
9,67010,이씨에스,2,317,150.0,8,8,2018,523.6,373.6,...,1,1,1,1,1,1,0,1,8,8


In [221]:
df_d = pd.merge(df_c, df_05, how='inner', on='code')

In [222]:
df_d.to_excel('2019_fs8.xlsx')

##### book-market ratio top 20% & F score '8'
- condition 1 >>> bm ratio top 20% (8 and up)
- condition 2 >>> f score 8 and up

In [223]:
len(df_bm_t20)

577

In [224]:
df = df_bm_t20.rename(index=str, columns={'id': 'code'})

In [225]:
df_fs_bm20 = pd.merge(df, above_fs8, how='inner', on='code')

In [226]:
bm20_n_fscore = df_fs_bm20['name']

In [227]:
bm20_n_fscore

0          POSCO
1           롯데쇼핑
2            이마트
3         한국가스공사
4         한국가스공사
5            KCC
6             LS
7             영풍
8         SK네트웍스
9         SK네트웍스
10        오리온홀딩스
11        롯데하이마트
12           HDC
13          대한유화
14           BGF
15            풍산
16            풍산
17         세아베스틸
18         삼양홀딩스
19           삼양사
20            한진
21            한진
22          고려제강
23            동양
24          서울가스
25            E1
26         농심홀딩스
27           삼천리
28         세아홀딩스
29          조선내화
         ...    
270         성호전자
271         성호전자
272           카스
273           카스
274           삼일
275           삼일
276           삼일
277        성우테크론
278         신화콘텍
279       크로바하이텍
280        한양디지텍
281       육일씨엔에쓰
282         이씨에스
283        동양에스텍
284        제이엠아이
285         한창산업
286         대동기어
287       티피씨글로벌
288          듀오백
289       한국정보공학
290        타이거일렉
291        한컴지엠디
292       케이엔더블유
293         대동금속
294    이스트아시아홀딩스
295         PN풍년
296        케이피티유
297         영신

#### F Score & Market Sum & GP/A
- fscore 8 and up
- Market sum bottom 50 and below
- gp/a top 20%

In [228]:
# market_sum bottom 20% / KOSPI
df_bm_kospi['market_sum_decile'] = pd.qcut(df_bm_kospi['market_sum'], 10, labels=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
df_market_sum_kospi_bottom = df_bm_kospi.query('market_sum_decile <= 5')

# market_sum bottom 20% / KOSDAQ
df_bm_kosdaq['market_sum_decile'] = pd.qcut(df_bm_kosdaq['market_sum'], 10, labels=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
df_market_sum_kosdaq_bottom = df_bm_kosdaq.query('market_sum_decile <= 5')

In [229]:
df_market_sum_kospi_top = df_bm_kospi.query('market_sum_decile > 5')
df_market_sum_kosdaq_top = df_bm_kosdaq.query('market_sum_decile > 5')

In [230]:
df_mkt_sum_bt = pd.concat([df_market_sum_kosdaq_bottom, df_market_sum_kospi_bottom])

In [231]:
df_mkt_sum_top = pd.concat([df_market_sum_kosdaq_top, df_market_sum_kospi_top])

In [232]:
df_mkt_sum_bt = df_mkt_sum_bt.rename(index=str, columns={'id':"code"})

In [233]:
df_mkt_sum_top = df_mkt_sum_top.rename(index=str, columns={'id': 'code'})

In [235]:
df0 = pd.merge(df_mkt_sum_bt, above_fs9, how='inner', on='code')

In [236]:
len(df0)

52

In [237]:
df1 = df0[df0.gpa_decile > 7]
len(df1)

21

In [238]:
gpa_n_fscore = df1.name

In [239]:
gpa_n_fscore

0        엘엠에스
1         액트로
4         매커스
5       와이비엠넷
9        인포뱅크
10      에이치시티
13     흥국에프엔비
19        TPC
20        엑시콘
21      쎄미시스코
22       피델릭스
23    유니온커뮤니티
26      피씨디렉트
27         베셀
29       옵티시스
31     육일씨엔에쓰
32        디엠티
35        듀오백
37    셀바스헬스케어
45     TBH글로벌
46         진도
Name: name, dtype: object

In [240]:
df_00 = pd.merge(df_mkt_sum_top, above_fs9, how='inner', on='code')
df_11 = df_00[df_00.gpa_decile > 7]

In [241]:
len(df_11)

24

In [242]:
df_11

Unnamed: 0,code,name,market_type,quant,market_sum,property_total,debt_total,listed_stock_cnt,pbr,face_value,...,f_cfo,f_droa,f_accrual,f_dlever,f_dliquid,f_equityoffer,f_dmargin,f_dturnover,f_total,gpa_decile
1,108230,톱텍,2,37135,3451,3928.0,489.0,36796,0.98,500,...,1,1,1,1,1,1,1,1,9,10
5,228850,레이언스,2,1951,2472,2011.0,188.0,16591,1.29,500,...,1,1,1,1,1,1,1,1,9,8
6,45100,한양이엔지,2,26406,2232,4777.0,1723.0,18000,0.71,500,...,1,1,1,1,1,1,1,1,9,9
8,73560,우리손에프앤지,2,975103,1831,3559.0,1719.0,69238,1.02,500,...,1,1,1,1,1,1,1,1,9,8
9,52710,아모텍,2,42629,1759,3958.0,1984.0,9743,0.89,500,...,1,1,1,1,1,1,1,1,9,8
10,58610,에스피지,2,67881,1710,2752.0,1499.0,20877,1.31,500,...,1,1,1,1,1,1,1,1,9,8
11,11370,서한,2,77330,1640,4724.0,1740.0,100895,0.55,500,...,1,1,1,1,1,1,1,1,9,9
12,15710,코콤,2,4583892,1588,1476.0,345.0,17530,1.33,500,...,1,1,1,1,1,1,1,1,9,9
14,1540,안국약품,2,14698,1441,2129.0,657.0,13042,0.86,500,...,1,1,1,1,1,1,1,1,9,10
15,35610,솔본,2,3039,1329,1973.0,197.0,27346,0.9,500,...,1,1,1,1,1,1,1,1,9,9
