### F-Score V.2 

In [3]:
import datetime
import re
import locale
from locale import atof

import requests
from bs4 import BeautifulSoup

import numpy as np
import pandas as pd

import BigBull

##### Getting the stock list from DB using 'BigBull' package

In [5]:
# database access setting 
conn_string = "postgresql+psycopg2://daesikkim@localhost:5432/db_goldspoon"
db = BigBull.StockDB(conn_string, BigBull.__meta)

In [7]:
stock_list = [i for i in db.load_all_stock_code()]

In [8]:
stock_list

[('삼성전자', '005930'),
 ('SK하이닉스', '000660'),
 ('셀트리온', '068270'),
 ('삼성전자우', '005935'),
 ('현대차', '005380'),
 ('POSCO', '005490'),
 ('삼성바이오로직스', '207940'),
 ('LG화학', '051910'),
 ('KB금융', '105560'),
 ('NAVER', '035420'),
 ('삼성물산', '028260'),
 ('삼성생명', '032830'),
 ('현대모비스', '012330'),
 ('SK', '034730'),
 ('신한지주', '055550'),
 ('한국전력', '015760'),
 ('SK이노베이션', '096770'),
 ('삼성에스디에스', '018260'),
 ('SK텔레콤', '017670'),
 ('LG전자', '066570'),
 ('LG생활건강', '051900'),
 ('아모레퍼시픽', '090430'),
 ('LG', '003550'),
 ('롯데케미칼', '011170'),
 ('하나금융지주', '086790'),
 ('삼성SDI', '006400'),
 ('KT&G', '033780'),
 ('S-Oil', '010950'),
 ('삼성화재', '000810'),
 ('기아차', '000270'),
 ('넷마블게임즈', '251270'),
 ('아모레G', '002790'),
 ('우리은행', '000030'),
 ('카카오', '035720'),
 ('LG디스플레이', '034220'),
 ('기업은행', '024110'),
 ('엔씨소프트', '036570'),
 ('고려아연', '010130'),
 ('현대중공업', '009540'),
 ('이마트', '139480'),
 ('삼성전기', '009150'),
 ('KT', '030200'),
 ('현대로보틱스', '267250'),
 ('한국타이어', '161390'),
 ('현대제철', '004020'),
 ('롯데쇼핑', '023530'),
 ('코웨이',

In [9]:
df_stock_ls = pd.DataFrame(stock_list, columns=['name', 'code'])

In [10]:
df_stock_ls

Unnamed: 0,name,code
0,삼성전자,005930
1,SK하이닉스,000660
2,셀트리온,068270
3,삼성전자우,005935
4,현대차,005380
5,POSCO,005490
6,삼성바이오로직스,207940
7,LG화학,051910
8,KB금융,105560
9,NAVER,035420


##### Universe  data 가져오기 

In [11]:
# Getting DATA of KOSPI 
def get_univ_data(market, page_num):
    def clean_value(value):
        if ',' in value: 
            value = value.replace(',', '')
    
        return value
    
    mkt = market 
    
    if mkt == 0: 
        kospi_id_ls = []
        kospi_name_ls = []
        kospi_number_classes = []
    
        for page in range(1, page_num+1):
            url = """http://finance.naver.com/sise/field_submit.nhn?menu=market_sum&returnUrl=http%3A%2F%2Ffinance.naver.com%2Fsise%2Fsise_market_sum.nhn%3Fsosok%3D{0}%26page%3D{1}&fieldIds={2}&fieldIds={3}&fieldIds={4}&fieldIds={5}&fieldIds={6}&fieldIds={7}""".format(mkt, page, 'quant', 'listed_stock_cnt', 'market_sum', 'property_total', 'debt_total', 'pbr')
    
            req = requests.get(url)
            html = req.text 
    
            soup = BeautifulSoup(html, 'lxml')
        
            kospi_ids = soup.select('a[href*="/item/main.nhn"]')
            kospi_univ_sets = soup.select('td[class^="number"]')
        
            # Looping stock ids and names 
            for id in kospi_ids: 
                kospi_id_ls.append(str(id).strip('"<>/a').split('=')[-1].split('">')[0])
                kospi_name_ls.append(str(id).strip('"<>/a').split('=')[-1].split('">')[1])
        
            # Looping Number Classes for each stock
            for item in kospi_univ_sets: 
                kospi_number_classes.append(item.string)
                
        # Convert stock ids & names lists to Pandas DF 
        df_kospi = pd.DataFrame({'id' : kospi_id_ls, 'name': kospi_name_ls, 'market_type': 1}, columns = ['id', 'name','market_type', 'quant', 'market_sum', 'property_total', 'debt_total', 'listed_stock_cnt', 'pbr', 'face_value',])

        # Slicing and creating an individual list for each category 
        present_value = [clean_value(v) for v in kospi_number_classes[::10]]
        face_value = [clean_value(v) for v in kospi_number_classes[3::10]]
        quant = [clean_value(v) for v in kospi_number_classes[4::10]]
        listed_stock_cnt = [clean_value(v) for v in kospi_number_classes[5::10]]
        market_sum = [clean_value(v) for v in kospi_number_classes[6::10]] 
        property_total = [clean_value(v) for v in kospi_number_classes[7::10]]
        debt_total = [clean_value(v) for v in kospi_number_classes[8::10]]
        pbr = [clean_value(v) for v in kospi_number_classes[9::10]] 

        # Put each category list to DF
        df_kospi['present_value'] = pd.to_numeric(present_value, errors='coerce')
        df_kospi['quant'] = pd.to_numeric(quant, errors='coerce')
        df_kospi['property_total'] = pd.to_numeric(property_total, errors='coerce')
        df_kospi['debt_total'] = pd.to_numeric(debt_total, errors='coerce')
        df_kospi['listed_stock_cnt'] = pd.to_numeric(listed_stock_cnt, errors='coerce')
        df_kospi['pbr'] = pd.to_numeric(pbr, errors='coerce')
        df_kospi['market_sum'] = pd.to_numeric(market_sum, errors='coerce')
        df_kospi['face_value'] = pd.to_numeric(face_value, errors='coerce')   
        
        return df_kospi
    
    elif mkt == 1: 
        kosdaq_id_ls = []
        kosdaq_name_ls = [] 
        kosdaq_number_classes = []
        
        for page in range(1, page_num+1):
            url = """http://finance.naver.com/sise/field_submit.nhn?menu=market_sum&returnUrl=http%3A%2F%2Ffinance.naver.com%2Fsise%2Fsise_market_sum.nhn%3Fsosok%3D{0}%26page%3D{1}&fieldIds={2}&fieldIds={3}&fieldIds={4}&fieldIds={5}&fieldIds={6}&fieldIds={7}""".format(mkt, page, 'quant', 'listed_stock_cnt', 'market_sum', 'property_total', 'debt_total', 'pbr' )
            
            req = requests.get(url)
            html = req.text
            
            soup = BeautifulSoup(html, 'lxml')
            
            kosdaq_ids = soup.select('a[href*="/item/main.nhn"]')
            kosdaq_univ_sets = soup.select('td[class^="number"]')
    
            # Looping KOSDAQ ids and names 
            for id in kosdaq_ids:
                kosdaq_id_ls.append(str(id).strip('"<>/a').split('=')[-1].split('">')[0])
                kosdaq_name_ls.append(str(id).strip('"<>/a').split('=')[-1].split('">')[1])
    
            # Looping Number classes for each stock 
            for item in kosdaq_univ_sets: 
                kosdaq_number_classes.append(item.string)
       
        # convert KOSDAQ ids & names lists to Pandas DF 
        df_kosdaq = pd.DataFrame({'id' : kosdaq_id_ls, 'name' : kosdaq_name_ls, 'market_type' : 2}, columns = ['id', 'name','market_type', 'quant', 'market_sum', 'property_total', 'debt_total', 'listed_stock_cnt', 'pbr', 'face_value',])
        
        # Slicing and Put into df_kosdaq
        # Slicing and creating an individual list for each category 
        present_value = [clean_value(v) for v in kosdaq_number_classes[::10]]
        face_value = [clean_value(v) for v in kosdaq_number_classes[3::10]]
        quant = [clean_value(v) for v in kosdaq_number_classes[4::10]]
        listed_stock_cnt = [clean_value(v) for v in kosdaq_number_classes[5::10]]
        market_sum = [clean_value(v) for v in kosdaq_number_classes[6::10]]
        property_total = [clean_value(v) for v in kosdaq_number_classes[7::10]]
        debt_total = [clean_value(v) for v in kosdaq_number_classes[8::10]]
        pbr = [clean_value(v) for v in kosdaq_number_classes[9::10]] 

        # Put each category list to DF
        df_kosdaq['present_value'] = pd.to_numeric(present_value, errors='coerce')
        df_kosdaq['quant'] = pd.to_numeric(quant, errors='coerce')
        df_kosdaq['property_total'] = pd.to_numeric(property_total, errors='coerce')
        df_kosdaq['debt_total'] = pd.to_numeric(debt_total, errors='coerce')
        df_kosdaq['listed_stock_cnt'] = pd.to_numeric(listed_stock_cnt, errors='coerce')
        df_kosdaq['pbr'] = pd.to_numeric(pbr, errors='coerce')
        df_kosdaq['market_sum'] = pd.to_numeric(market_sum, errors='coerce')
        df_kosdaq['face_value'] = pd.to_numeric(face_value, errors='coerce')    
        
        return df_kosdaq

In [12]:
df_kospi = get_univ_data(0, 29)

In [13]:
df_kosdaq = get_univ_data(1, 26)

In [14]:
frames = [df_kospi, df_kospi]
df_univ = pd.concat(frames, ignore_index=True)

In [15]:
bm_columns = ['id', 'market_type', 'market_sum', 'property_total', 'debt_total', 'pbr']

In [16]:
df_bm = df_univ[bm_columns]

In [17]:
df_bm['book_value'] = df_bm['property_total'] - df_bm['debt_total']
df_bm['bm_ratio'] = df_bm['book_value'] / df_bm['market_sum']
df_bm['bm_decile'] = pd.qcut(df_bm['bm_ratio'], 10, labels=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
df_bm['pbr_decile'] = pd.qcut(df_bm['pbr'], 10, labels=[10, 9, 8, 7, 6, 5, 4, 3, 2, 1])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: ht

In [18]:
df_bm

Unnamed: 0,id,market_type,market_sum,property_total,debt_total,pbr,book_value,bm_ratio,bm_decile,pbr_decile
0,005930,1,3257165,3017521.0,872607.0,1.67,2144914.0,0.658522,3.0,3.0
1,000660,1,654474,454185.0,115975.0,1.88,338210.0,0.516766,2.0,2.0
2,068270,1,391306,30219.0,8230.0,19.39,21989.0,0.056194,1.0,1.0
3,005935,1,376271,,,1.37,,,,3.0
4,005380,1,333719,1781995.0,1034421.0,0.59,747574.0,2.240130,9.0,8.0
5,005490,1,299051,790250.0,315610.0,0.63,474640.0,1.587154,7.0,7.0
6,207940,1,297742,75330.0,34506.0,7.29,40824.0,0.137112,1.0,1.0
7,051910,1,285899,250412.0,87026.0,1.92,163386.0,0.571482,3.0,2.0
8,105560,1,262992,3756737.0,3444123.0,0.81,312614.0,1.188683,6.0,6.0
9,035420,1,258757,80193.0,27141.0,4.84,53052.0,0.205026,1.0,1.0


In [19]:
df_fscore_univ = df_bm.query('bm_decile > 5')

In [21]:
df_fscore_univ

Unnamed: 0,id,market_type,market_sum,property_total,debt_total,pbr,book_value,bm_ratio,bm_decile,pbr_decile
4,005380,1,333719,1781995.0,1034421.0,0.59,747574.0,2.240130,9,8
5,005490,1,299051,790250.0,315610.0,0.63,474640.0,1.587154,7,7
8,105560,1,262992,3756737.0,3444123.0,0.81,312614.0,1.188683,6,6
11,032830,1,235000,2827138.0,2515922.0,0.72,311216.0,1.324323,7,7
12,012330,1,219997,417368.0,123779.0,0.73,293589.0,1.334514,7,7
13,034730,1,217765,1097769.0,640205.0,1.28,457564.0,2.101182,9,4
14,055550,1,213627,3956803.0,3639353.0,0.69,317450.0,1.486001,7,7
15,015760,1,204145,1778370.0,1047865.0,0.28,730505.0,3.578363,10,10
24,086790,1,142377,3481775.0,3247875.0,0.63,233900.0,1.642822,8,7
29,000270,1,127284,522944.0,254333.0,0.47,268611.0,2.110328,9,9
