### Market Trade Info Crawler

In [1]:
import BigBull

import datetime, re, locale
from locale import atof

import requests
from bs4 import BeautifulSoup 

import numpy as np
import pandas as pd

In [2]:
# DB connection setting
conn_string = 'postgresql+psycopg2://daesikkim@localhost:5432/db_goldspoon'
db = BigBull.StockDB(conn_string, BigBull.__meta)

In [3]:
def clean_comma(value):
    if ',' in value:
        value = value.replace(',', '')
    return value

In [4]:
def crawl_market_trade_info(market):
    t = datetime.datetime.today()
    crawl_date = t.strftime("%Y-%m-%d")
    
    if market == 0:
        page_num = 29
        kospi_id_ls = []
        kospi_number_classes = []
        
        for page in range(1, page_num+1):
            url = """http://finance.naver.com/sise/field_submit.nhn?menu=market_sum&returnUrl=http%3A%2F%2Ffinance.naver.com%2Fsise%2Fsise_market_sum.nhn%3Fsosok%3D{0}%26page%3D{1}&fieldIds={2}&fieldIds={3}&fieldIds={4}&fieldIds={5}&fieldIds={6}&fieldIds={7}""".format(market, page, 'quant', 'buy_total', 'sell_total', 'market_sum', 'per', 'pbr')
            
            req = requests.get(url)
            html = req.text
            
            soup = BeautifulSoup(html, 'lxml')
            
            kospi_ids = soup.select('a[href*="/item/main.nhn"]')
            kospi_trade_info = soup.select('td[class^="number"]')
            
             # Looping stock ids and names 
            for id in kospi_ids: 
                kospi_id_ls.append(str(id).strip('"<>/a').split('=')[-1].split('">')[0])
        
            # Looping Number Classes for each stock
            for item in kospi_trade_info: 
                kospi_number_classes.append(item.string)
                
        # Convert stock ids & names lists to Pandas DF 
        df_kospi = pd.DataFrame({'code' : kospi_id_ls, 'market_type': 0}, columns = ['code', 'market_type', 'date', 'present_value', 'quant', 'buy_total', 'sell_total', 'market_sum', 'per', 'pbr'])

        # Slicing and creating an individual list for each category 
        present_value = [clean_comma(v) for v in kospi_number_classes[::10]]
        quant = [clean_comma(v) for v in kospi_number_classes[4::10]]
        buy_total = [clean_comma(v) for v in kospi_number_classes[5::10]]
        sell_total = [clean_comma(v) for v in kospi_number_classes[6::10]] 
        market_sum = [clean_comma(v) for v in kospi_number_classes[7::10]]
        per = [clean_comma(v) for v in kospi_number_classes[8::10]]
        pbr = [clean_comma(v) for v in kospi_number_classes[9::10]] 

        # Put each category list to DF
        df_kospi['present_value'] = pd.to_numeric(present_value, errors='coerce')
        df_kospi['quant'] = pd.to_numeric(quant, errors='coerce')
        df_kospi['market_sum'] = pd.to_numeric(market_sum, errors='coerce')
        df_kospi['buy_total'] = pd.to_numeric(buy_total, errors='coerce')
        df_kospi['sell_total'] = pd.to_numeric(sell_total, errors='coerce')
        df_kospi['pbr'] = pd.to_numeric(pbr, errors='coerce')
        df_kospi['per'] = pd.to_numeric(per, errors='coerce')  
        
        df_kospi['date'] = crawl_date
        
        return df_kospi
    
    elif market == 1:
        page_num = 26
        kosdaq_id_ls = []
        kosdaq_number_classes = []
        
        for page in range(1, page_num+1):
            url = """http://finance.naver.com/sise/field_submit.nhn?menu=market_sum&returnUrl=http%3A%2F%2Ffinance.naver.com%2Fsise%2Fsise_market_sum.nhn%3Fsosok%3D{0}%26page%3D{1}&fieldIds={2}&fieldIds={3}&fieldIds={4}&fieldIds={5}&fieldIds={6}&fieldIds={7}""".format(market, page, 'quant', 'buy_total', 'sell_total', 'market_sum', 'per', 'pbr')
            
            req = requests.get(url)
            html = req.text
            
            soup = BeautifulSoup(html, 'lxml')
            
            kosdaq_ids = soup.select('a[href*="/item/main.nhn"]')
            kosdaq_univ_sets = soup.select('td[class^="number"]')
    
            # Looping KOSDAQ ids and names 
            for id in kosdaq_ids:
                kosdaq_id_ls.append(str(id).strip('"<>/a').split('=')[-1].split('">')[0])
    
            # Looping Number classes for each stock 
            for item in kosdaq_univ_sets: 
                kosdaq_number_classes.append(item.string)
       
        # convert KOSDAQ ids & names lists to Pandas DF 
        df_kosdaq = pd.DataFrame({'code' : kosdaq_id_ls, 'market_type' : 1}, columns = ['code', 'market_type', 'date', 'present_value', 'quant', 'buy_total', 'sell_total', 'market_sum', 'per', 'pbr'])
        
        # Slicing and Put into df_kosdaq
        # Slicing and creating an individual list for each category 
        present_value = [clean_comma(v) for v in kosdaq_number_classes[::10]]
        quant = [clean_comma(v) for v in kosdaq_number_classes[4::10]]
        buy_total = [clean_comma(v) for v in kosdaq_number_classes[5::10]]
        sell_total = [clean_comma(v) for v in kosdaq_number_classes[6::10]]
        market_sum = [clean_comma(v) for v in kosdaq_number_classes[7::10]]
        per = [clean_comma(v) for v in kosdaq_number_classes[8::10]]
        pbr = [clean_comma(v) for v in kosdaq_number_classes[9::10]] 

        # Put each category list to DF
        df_kosdaq['present_value'] = pd.to_numeric(present_value, errors='coerce')
        df_kosdaq['quant'] = pd.to_numeric(quant, errors='coerce')
        df_kosdaq['buy_total'] = pd.to_numeric(buy_total, errors='coerce')
        df_kosdaq['sell_total'] = pd.to_numeric(sell_total, errors='coerce')
        df_kosdaq['per'] = pd.to_numeric(per, errors='coerce')
        df_kosdaq['pbr'] = pd.to_numeric(pbr, errors='coerce')
        df_kosdaq['market_sum'] = pd.to_numeric(market_sum, errors='coerce')  
        
        df_kosdaq['date'] = crawl_date
        
        return df_kosdaq

### F_Score Universe - Book-2-Market-Ratio

In [5]:
df_kospi_trade = crawl_market_trade_info(0)
df_kosdaq_trade = crawl_market_trade_info(1)

In [10]:
df_kospi_trade.to_csv('df_kospi_trade', index=False)

In [6]:
df = pd.read_sql_table('company_info', conn_string)
df_fin = df.copy()

In [7]:
df_fin

Unnamed: 0,id,code,year,total_sales,domestic_sales,export_sales,cogs,gross_profit,operating_income,extraordinary_items_income,...,total_liabilities,current_liabilities,short_term_borrowings,non_current_liabilities,long_term_borrowings,capital_expenditure,total_equity,equity_owers,stock_issued,cashflow_operations
0,1,005930,2013,2286926.67,,,1376963.09,909963.58,367850.13,8155.03,...,640590.08,513154.09,64385.17,127435.99,9027.15,231575.87,1500160.10,1444426.16,170133.0,467074.40
1,2,005930,2014,2062059.87,207289.17,1854770.70,1282788.00,779271.87,250250.71,15416.20,...,623347.70,520139.13,80292.99,103208.57,239.89,220429.43,1680881.88,1621817.25,170133.0,369753.89
2,3,005930,2015,2006534.82,208278.22,1798256.60,1234821.18,771713.64,264134.42,-20374.87,...,631197.16,505029.09,111554.25,126168.07,1935.98,258802.22,1790598.05,1728767.67,170133.0,400617.61
3,4,005930,2016,2018667.45,,,1202777.15,815890.30,292406.72,7744.47,...,692112.91,547040.95,127467.89,145071.96,11791.11,241429.73,1929630.33,1864243.28,161193.0,473856.44
4,5,005930,2017,2395753.76,,,1292906.61,1102847.15,536450.38,15910.09,...,872606.62,671751.14,157676.19,200855.48,17569.08,427922.34,2144914.28,2072134.16,161193.0,621620.41
5,6,000660,2013,141651.02,,,88645.87,53005.15,33797.85,-1373.57,...,77304.39,30782.39,1379.79,46522.00,17301.83,32057.97,130668.59,130672.43,710201.0,63720.56
6,7,000660,2014,171255.66,,,94617.25,76638.41,51094.66,362.60,...,88469.75,57653.04,7341.65,30816.71,12627.72,48007.22,180363.03,180364.53,728002.0,58666.91
7,8,000660,2015,187979.98,,,105153.53,82826.45,53361.00,-1084.60,...,82902.03,48406.98,1479.48,34495.05,15120.03,67746.25,213877.03,213868.63,728002.0,93195.20
8,9,000660,2016,171979.75,,,107871.39,64108.36,32767.46,-516.08,...,81924.96,41608.49,,40316.47,20957.37,59563.54,240235.30,240169.55,728002.0,55489.22
9,10,000660,2017,301094.34,,,127018.43,174075.91,137213.26,-409.78,...,115975.45,81161.33,1926.86,34814.12,20803.33,91283.03,338209.19,338152.80,728002.0,146906.14


In [8]:
df_fin_bv = df_fin[['code', 'year', 'book_value_1', 'book_value_2']].query("year == '2017'")

In [9]:
df_kospi = df_kospi_trade.copy()
df_kosdaq = df_kosdaq_trade.copy()

In [10]:
df_mktmv_kospi = df_kospi[['code', 'quant', 'market_sum', 'pbr']]
df_mktmv_kosdaq = df_kosdaq[['code', 'quant', 'market_sum', 'pbr']]

In [11]:
df_bm_kospi = pd.merge(df_fin_bv, df_mktmv_kospi, how='inner', on='code')
df_bm_kosdaq = pd.merge(df_fin_bv, df_mktmv_kosdaq, how='inner', on='code')

In [12]:
# Kospi Market
df_bm_kospi['bm_ratio_1'] = df_bm_kospi['book_value_1'] / df_bm_kospi['market_sum']
df_bm_kospi['bm_ratio_2'] = df_bm_kospi['book_value_2'] / df_bm_kospi['market_sum']

df_bm_kospi['bm_decile_1'] = pd.qcut(df_bm_kospi['bm_ratio_1'], 10, labels=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
df_bm_kospi['bm_decile_2'] = pd.qcut(df_bm_kospi['bm_ratio_2'], 10, labels=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

df_bm_kospi['pbr_decile'] = pd.qcut(df_bm_kospi['pbr'], 10, labels=[10, 9, 8, 7, 6, 5, 4, 3, 2, 1])

In [13]:
# Kosdaq market
df_bm_kosdaq['bm_ratio_1'] = df_bm_kosdaq['book_value_1'] / df_bm_kosdaq['market_sum']
df_bm_kosdaq['bm_ratio_2'] = df_bm_kosdaq['book_value_2'] / df_bm_kosdaq['market_sum']

df_bm_kosdaq['bm_decile_1'] = pd.qcut(df_bm_kosdaq['bm_ratio_1'], 10, labels=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
df_bm_kosdaq['bm_decile_2'] = pd.qcut(df_bm_kosdaq['bm_ratio_2'], 10, labels=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

df_bm_kosdaq['pbr_decile'] = pd.qcut(df_bm_kosdaq['pbr'], 10, labels=[10, 9, 8, 7, 6, 5, 4, 3, 2, 1])

In [14]:
df_bm_kospi

Unnamed: 0,code,year,book_value_1,book_value_2,quant,market_sum,pbr,bm_ratio_1,bm_ratio_2,bm_decile_1,bm_decile_2,pbr_decile
0,005930,2017,2144914.28,1997309.45,314013,3239191,1.66,0.662176,0.616607,3,4,3
1,000660,2017,338209.19,315736.29,6791628,597690,1.71,0.565861,0.528261,3,3,3
2,207940,2017,39764.51,39574.52,332430,313622,7.89,0.126791,0.126185,1,1,1
3,068270,2017,25715.94,16197.22,1643845,310894,12.76,0.082716,0.052099,1,1,1
4,005380,2017,747573.54,699480.18,624001,357949,0.63,2.088492,1.954134,9,9,8
5,005490,2017,474640.09,415117.40,175341,308641,0.65,1.537839,1.344985,8,7,8
6,051910,2017,163385.77,145154.22,144622,264368,1.77,0.618024,0.549061,3,3,3
7,028260,2017,251512.50,238397.78,321630,257978,0.99,0.974938,0.924101,5,5,5
8,035420,2017,53051.86,49656.75,96915,238979,4.47,0.221994,0.207787,1,1,1
9,012330,2017,293589.58,284019.49,253646,238492,0.79,1.231025,1.190897,6,7,7


In [15]:
df_bm_kosdaq

Unnamed: 0,code,year,book_value_1,book_value_2,quant,market_sum,pbr,bm_ratio_1,bm_ratio_2,bm_decile_1,bm_decile_2,pbr_decile
0,074600,2017,1809.86,1752.09,318516,3851,2.25,0.469971,0.454970,5,5,4
1,180400,2017,347.49,346.15,80314,3529,11.02,0.098467,0.098087,1,1,1
2,091990,2017,17074.73,17054.02,1114221,112633,6.72,0.151596,0.151412,1,2,1
3,215600,2017,2019.47,1323.32,3116925,57424,28.06,0.035168,0.023045,1,1,1
4,086900,2017,2047.10,1762.69,38242,37197,17.37,0.055034,0.047388,1,1,1
5,084990,2017,1644.55,1119.98,225097,33333,20.27,0.049337,0.033600,1,1,1
6,130960,2017,22487.01,14653.07,73662,35982,1.72,0.624952,0.407233,6,5,6
7,028300,2017,1136.79,-129.46,1557611,39690,51.02,0.028642,-0.003262,1,1,1
8,263750,2017,785.48,775.26,118098,32374,10.54,0.024263,0.023947,1,1,1
9,263750,2017,2751.97,2709.76,118098,32374,10.54,0.085006,0.083702,1,1,1


### F Score Calculation 

In [16]:
df_fbase = df_fin[['code', 'year', 'total_assets', 'operating_income', 
                    'cashflow_operations', 'non_current_liabilities', 'long_term_borrowings', 
                    'current_assets', 'current_liabilities', 'stock_issued', 
                    'gross_profit', 'total_sales']]            

In [17]:
df_fbase

Unnamed: 0,code,year,total_assets,operating_income,cashflow_operations,non_current_liabilities,long_term_borrowings,current_assets,current_liabilities,stock_issued,gross_profit,total_sales
0,005930,2013,2140750.18,367850.13,467074.40,127435.99,9027.15,1107602.71,513154.09,170133.0,909963.58,2286926.67
1,005930,2014,2304229.58,250250.71,369753.89,103208.57,239.89,1151460.26,520139.13,170133.0,779271.87,2062059.87
2,005930,2015,2421795.21,264134.42,400617.61,126168.07,1935.98,1248147.25,505029.09,170133.0,771713.64,2006534.82
3,005930,2016,2621743.24,292406.72,473856.44,145071.96,11791.11,1414297.04,547040.95,161193.0,815890.30,2018667.45
4,005930,2017,3017520.90,536450.38,621620.41,200855.48,17569.08,1469824.64,671751.14,161193.0,1102847.15,2395753.76
5,000660,2013,207972.98,33797.85,63720.56,46522.00,17301.83,66531.24,30782.39,710201.0,53005.15,141651.02
6,000660,2014,268832.78,51094.66,58666.91,30816.71,12627.72,103635.14,57653.04,728002.0,76638.41,171255.66
7,000660,2015,296779.06,53361.00,93195.20,34495.05,15120.03,97600.30,48406.98,728002.0,82826.45,187979.98
8,000660,2016,322160.26,32767.46,55489.22,40316.47,20957.37,98389.82,41608.49,728002.0,64108.36,171979.75
9,000660,2017,454184.64,137213.26,146906.14,34814.12,20803.33,173104.44,81161.33,728002.0,174075.91,301094.34


In [18]:
# Rearranging columns - lagged total asset ("total assets from the beginning of the year")
df_fbase['l_total_assets'] = df_fbase.loc[:, 'total_assets'].shift(1)
df_fbase['l_total_assets'] = df_fbase.loc[:, 'l_total_assets'].where(df_fbase.loc[:, 'year'] != '2013') # converting the value to 'NaN' for 2013 years

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()


In [19]:
# Average total assets = "rolling" 
df_fbase['avg_total_assets'] = df_fbase.loc[:,'total_assets'].where(df_fbase.loc[:, 'year'] != '2013').rolling(2).mean()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [20]:
# Creating df_fcalc & df_fscore dataframe
df_fcalc = pd.DataFrame(columns = ('code', 'year', 'cal_roa', 
                                   'cal_cfo', 'delta_roa', 'accrual',
                                   'delta_lever', 'delta_liquid', 'eq_offer', 
                                   'delta_margin', 'delta_turn', 'gpa'))
df_fscore = pd.DataFrame(columns = ('code', 'year','f_roa', 
                                    'f_cfo', 'f_droa', 'f_accrual', 
                                    'f_dlever', 'f_dliquid', 'f_equityoffer', 
                                    'f_dmargin', 'f_dturnover', 'f_total', 'gpa_decile'))

In [21]:
# df_fcalc dataframe calculations 
df_fcalc['code'] = df_fbase['code']
df_fcalc['year'] = df_fbase['year']

df_fcalc['cal_roa'] = df_fbase['operating_income'] / df_fbase['l_total_assets']
df_fcalc['cal_cfo'] = df_fbase['cashflow_operations'] / df_fbase['l_total_assets']
df_fcalc['delta_roa'] = df_fcalc['cal_roa'].diff()
df_fcalc['accurual'] = df_fcalc['cal_roa'] - df_fcalc['cal_cfo']

df_fcalc['lever'] = df_fbase['non_current_liabilities'] / df_fbase['avg_total_assets']
df_fcalc['delta_lever'] = df_fcalc['lever'].diff()
df_fcalc['delta_liquid'] = df_fbase['current_assets'].where(df_fbase['year'] != '2013') / df_fbase['current_liabilities'].where(df_fbase['year'] != '2013')
df_fcalc['eq_offer'] = df_fbase['stock_issued'].where(df_fbase['year'] != '2013').diff()

df_fcalc['gmo'] = df_fbase['gross_profit'].where(df_fbase['year'] != '2013') / df_fbase['total_sales'].where(df_fbase['year'] != '2012')
df_fcalc['delta_margin'] = df_fcalc['gmo'].diff()
df_fcalc['atr'] = df_fbase['total_sales'] / df_fbase['l_total_assets']
df_fcalc['delta_turn'] = df_fcalc['atr'].diff()

df_fcalc['gpa'] = df_fbase['gross_profit'] / df_fbase['total_assets']

In [22]:
df_fcalc

Unnamed: 0,code,year,cal_roa,cal_cfo,delta_roa,accrual,delta_lever,delta_liquid,eq_offer,delta_margin,delta_turn,gpa,accurual,lever,gmo,atr
0,005930,2013,,,,,,,,,,0.425068,,,,
1,005930,2014,0.116899,0.172722,,,,2.213754,,,,0.338192,-0.055823,,0.377909,0.963242
2,005930,2015,0.114630,0.173862,-0.002268,,,2.471436,0.0,0.006691,-0.092437,0.318654,-0.059232,0.053393,0.384600,0.870805
3,005930,2016,0.120740,0.195663,0.006109,,0.004135,2.585359,-8940.0,0.019573,-0.037263,0.311201,-0.074924,0.057528,0.404173,0.833542
4,005930,2017,0.204616,0.237102,0.083876,,0.013707,2.188049,0.0,0.056161,0.080260,0.365481,-0.032486,0.071235,0.460334,0.913802
5,000660,2013,,,,,,,,,,0.254866,,,,
6,000660,2014,0.245679,0.282089,,,,1.797566,,,,0.285078,-0.036410,,0.447509,0.823451
7,000660,2015,0.198491,0.346666,-0.047188,,,2.016244,0.0,-0.006896,-0.124206,0.279085,-0.148175,0.121974,0.440613,0.699245
8,000660,2016,0.110410,0.186971,-0.088081,,0.008302,2.364657,0.0,-0.067846,-0.119758,0.198995,-0.076561,0.130276,0.372767,0.579487
9,000660,2017,0.425916,0.456003,0.315506,,-0.040589,2.132844,0.0,0.205377,0.355123,0.383271,-0.030087,0.089687,0.578144,0.934610


In [23]:
# Converting df_fcalc to df_fscore
df_fscore['code'] = df_fbase['code']
df_fscore['year'] = df_fbase['year'] 

# ROA and its f-score in DataFrame
df_fscore.f_roa = np.where(df_fcalc.cal_roa > 0, 1, 0)

# CFO and its f-score in DataFrame
df_fscore.f_cfo = np.where(df_fcalc.cal_cfo > 0, 1, 0)

# delta ROA and its f-score in DataFrame 
df_fscore.f_droa = np.where(df_fcalc.delta_roa > 0, 1, 0)

# accrual and its f-score
df_fscore.f_accrual = np.where(df_fcalc.accrual < 0, 1, 0)

# delta_leverage and its f-score
df_fscore.f_dlever = np.where(df_fcalc.delta_lever < 0, 1, 0)

# delta_liquidity and its f-score
df_fscore.f_dliquid = np.where(df_fcalc.delta_liquid > 0, 1, 0) 

# equity offer and its f-score
df_fscore.f_equityoffer = np.where(df_fcalc.eq_offer > 0, 0, 1)

# delta_margin and its f-score
df_fscore.f_dmargin = np.where(df_fcalc.delta_margin > 0, 1, 0)

# delta_turnover and its f-score
df_fscore.f_dturnover = np.where(df_fcalc.delta_turn > 0, 1, 0)

# total f-score
df_fscore.f_total = df_fscore.f_roa + df_fscore.f_cfo + df_fscore.f_droa + df_fscore.f_accrual + df_fscore.f_dlever + df_fscore.f_dliquid + df_fscore.f_equityoffer + df_fscore.f_dmargin + df_fscore.f_dturnover

# gpa decile 
df_fscore.gpa_decile = pd.qcut(df_fcalc['gpa'], 10, labels=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

In [24]:
df_fscore.gpa_decile = pd.qcut(df_fcalc['gpa'], 10, labels=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

In [25]:
df_fscore

Unnamed: 0,code,year,f_roa,f_cfo,f_droa,f_accrual,f_dlever,f_dliquid,f_equityoffer,f_dmargin,f_dturnover,f_total,gpa_decile
0,005930,2013,0,0,0,0,0,0,1,0,0,1,10.0
1,005930,2014,1,1,0,0,0,1,1,0,0,4,9.0
2,005930,2015,1,1,0,0,0,1,1,1,0,5,9.0
3,005930,2016,1,1,1,0,0,1,1,1,0,6,9.0
4,005930,2017,1,1,1,0,0,1,1,1,1,7,9.0
5,000660,2013,0,0,0,0,0,0,1,0,0,1,8.0
6,000660,2014,1,1,0,0,0,1,1,0,0,4,8.0
7,000660,2015,1,1,0,0,0,1,1,0,0,4,8.0
8,000660,2016,1,1,0,0,0,1,1,0,0,4,7.0
9,000660,2017,1,1,1,0,1,1,1,1,1,8,9.0


### F-Score '7' and above & market_sum bottom 20% & high ranking 'gpa' & bm top 20%

In [26]:
# f_score '7' and above list 
df_fscore_high = df_fscore.query('f_total >= 7')

In [27]:
# bm_value top 20% / KOSPI
df_bm_kospi_top = df_bm_kospi.query('(bm_decile_1 >= 8) & (bm_decile_2 >= 8) & (pbr_decile >= 8)')

# bm_value top 20% / KOSDAQ
df_bm_kosdaq_top = df_bm_kosdaq.query('(bm_decile_1 >= 8) & (bm_decile_2 >= 8) & (pbr_decile >= 8)')

In [28]:
# Combine markets 
df_bm_top = pd.concat([df_bm_kospi_top, df_bm_kosdaq_top])

In [29]:
# market_sum bottom 20% / KOSPI
df_bm_kospi['market_sum_decile'] = pd.qcut(df_bm_kospi['market_sum'], 10, labels=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
df_market_sum_kospi_bottom = df_bm_kospi.query('market_sum_decile <= 2')

# market_sum bottom 20% / KOSDAQ
df_bm_kosdaq['market_sum_decile'] = pd.qcut(df_bm_kosdaq['market_sum'], 10, labels=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
df_market_sum_kosdaq_bottom = df_bm_kosdaq.query('market_sum_decile <= 2')

In [30]:
# Combining markets 
df_market_sum_bottom = pd.concat([df_market_sum_kospi_bottom, df_market_sum_kosdaq_bottom])

In [31]:
# high ranking GP/A 
df_gpa_top = df_fscore.query('gpa_decile >= 8')

In [32]:
df_fscore_original = pd.merge(df_fscore_high, df_bm_top, how='inner', on='code')

In [33]:
df_fscore_mktsum = pd.merge(df_fscore_original, df_market_sum_bottom, how='inner', on='code')

In [34]:
df_fscore_gpa = pd.merge(df_fscore_original, df_gpa_top, how='inner', on='code')

In [35]:
df_fscore_all = pd.merge(df_fscore_gpa, df_market_sum_bottom, how='inner', on='code')

In [36]:
# find company names with codes 
stock_ls = [i for i in db.load_all_stock_code()]
names, codes = zip(*stock_ls)

In [37]:
dict_stocks = {'code': codes, 'name': names}

In [38]:
df_stocks = pd.DataFrame(data=dict_stocks)

In [39]:
fscore_all = df_fscore_all['code'].tolist()
fscore_original = df_fscore_original['code'].tolist()
fscore_mktsum = df_fscore_mktsum['code'].tolist()
fscore_gpa = df_fscore_gpa['code'].tolist()

In [40]:
df_stocks[df_stocks.code.isin(fscore_all)]
df_stocks[df_stocks.code.isin(fscore_all)].to_csv('fscore_all')

In [41]:
df_stocks[df_stocks.code.isin(fscore_mktsum)].to_csv('fscore_marketsum')

In [42]:
df_stocks[df_stocks.code.isin(fscore_original)].to_csv('fscore_original')

In [43]:
df_stocks[df_stocks.code.isin(fscore_gpa)].to_csv('fscore_gpa')