##### From the file "crawler4piotroski_fscore_selenium"

=> flipping stock codes

In [1]:
# importing packages 
from datetime import datetime

import numpy as np
import pandas as pd

import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By

# sqlalchemy packages 
from sqlalchemy import create_engine
from sqlalchemy import Table, Column, Integer, Numeric, String, DateTime, ForeignKey
from sqlalchemy.orm import sessionmaker, relationship, backref
from sqlalchemy.ext.declarative import declarative_base 

In [2]:
# Headless Chrome
options = webdriver.ChromeOptions()
options.add_argument('headless')
options.add_argument('window-size=1920x1080')

In [3]:
# Start Chrome with Selenium
driver = webdriver.Chrome('/Users/daesikkim/Downloads/chromedriver', chrome_options=options)
driver.implicitly_wait(3)

In [4]:
# Get the stock data from DB(postgresql)
db_string = "postgresql://daesik:@localhost/db_piotroski"

engine = create_engine(db_string, echo=True)

In [5]:
Session = sessionmaker(bind=engine)
session = Session()

In [6]:
Base = declarative_base()

In [7]:
# Mapping 
class Stock(Base): 
    __tablename__ = 'stocks'
    
    stock_id = Column(Integer, primary_key=True)
    stock_code = Column(String, unique=True, nullable=False, primary_key=True)
    company = Column(String, index=True, unique=True, nullable=False)
    market_type = Column(Integer)
    industry = Column(String) 
    created_on = Column(DateTime, default=datetime.utcnow)
    updated_on = Column(DateTime, default=datetime.utcnow)
    
    base_param = relationship('BaseParam', back_populates='stock')
    bookmarket_param = relationship('BookMarketParam', back_populates='stock')
    
    # def __repr__(self):
    #    return "<Stock ==> id : {0}, code : {1}, company : {2}, market_type : {3}, created : {4}, updated : {5}>".format(self.stock_id, 
    #                                                                                                                     self.stock_code, 
    #                                                                                                                     self.company,
    #                                                                                                                     self.market_type,
    #                                                                                                                     self.created_on,
    #                                                                                                                     self.updated_on,)
    

In [8]:
class BaseParam(Base): 
    __tablename__ = 'base_params'
    
    baseparam_id = Column(Integer, primary_key=True)
    stock_code = Column(String, ForeignKey('stocks.stock_code'))
    date = Column(DateTime)
    price_open = Column(Integer)
    price_close = Column(Integer) 
    price_high = Column(Integer)
    price_low = Column(Integer)
    quant = Column(Integer)
    market_sum = Column(Integer)
    
    stock = relationship("Stock", back_populates="base_param")
    

In [9]:
class BookMarketParam(Base): 
    __tablename__ = 'bookmarket_params'
    
    bookmarketparam_id = Column(Integer, primary_key=True)
    stock_code = Column(String, ForeignKey('stocks.stock_code'))
    listed_stock_cnt = Column(Integer)
    property_total = Column(Integer)
    debt_total = Column(Integer)
    pbr = Column(Integer)
    
    stock = relationship('Stock', back_populates='bookmarket_param')

In [11]:
Base.metadata.create_all(engine)

2017-10-08 22:51:17,271 INFO sqlalchemy.engine.base.Engine select version()
2017-10-08 22:51:17,272 INFO sqlalchemy.engine.base.Engine {}
2017-10-08 22:51:17,277 INFO sqlalchemy.engine.base.Engine select current_schema()
2017-10-08 22:51:17,279 INFO sqlalchemy.engine.base.Engine {}
2017-10-08 22:51:17,285 INFO sqlalchemy.engine.base.Engine SELECT CAST('test plain returns' AS VARCHAR(60)) AS anon_1
2017-10-08 22:51:17,290 INFO sqlalchemy.engine.base.Engine {}
2017-10-08 22:51:17,296 INFO sqlalchemy.engine.base.Engine SELECT CAST('test unicode returns' AS VARCHAR(60)) AS anon_1
2017-10-08 22:51:17,297 INFO sqlalchemy.engine.base.Engine {}
2017-10-08 22:51:17,304 INFO sqlalchemy.engine.base.Engine show standard_conforming_strings
2017-10-08 22:51:17,309 INFO sqlalchemy.engine.base.Engine {}
2017-10-08 22:51:17,313 INFO sqlalchemy.engine.base.Engine select relname from pg_class c join pg_namespace n on n.oid=c.relnamespace where pg_catalog.pg_table_is_visible(c.oid) and relname=%(name)s
20

In [12]:
code_n_name = session.query(Stock.stock_code, Stock.company).all()

2017-10-08 22:51:19,801 INFO sqlalchemy.engine.base.Engine BEGIN (implicit)
2017-10-08 22:51:19,805 INFO sqlalchemy.engine.base.Engine SELECT stocks.stock_code AS stocks_stock_code, stocks.company AS stocks_company 
FROM stocks
2017-10-08 22:51:19,808 INFO sqlalchemy.engine.base.Engine {}


In [13]:
code_n_name

[('005930', '삼성전자'),
 ('000660', 'SK하이닉스'),
 ('005935', '삼성전자우'),
 ('005380', '현대차'),
 ('005490', 'POSCO'),
 ('051910', 'LG화학'),
 ('035420', 'NAVER'),
 ('028260', '삼성물산'),
 ('015760', '한국전력'),
 ('055550', '신한지주'),
 ('105560', 'KB금융'),
 ('032830', '삼성생명'),
 ('207940', '삼성바이오로직스'),
 ('012330', '현대모비스'),
 ('017670', 'SK텔레콤'),
 ('034730', 'SK'),
 ('096770', 'SK이노베이션'),
 ('006400', '삼성SDI'),
 ('090430', '아모레퍼시픽'),
 ('033780', 'KT&amp;G'),
 ('066570', 'LG전자'),
 ('086790', '하나금융지주'),
 ('003550', 'LG'),
 ('051900', 'LG생활건강'),
 ('010950', 'S-Oil'),
 ('251270', '넷마블게임즈'),
 ('018260', '삼성에스디에스'),
 ('011170', '롯데케미칼'),
 ('000810', '삼성화재'),
 ('000270', '기아차'),
 ('000030', '우리은행'),
 ('034220', 'LG디스플레이'),
 ('002790', '아모레G'),
 ('036570', '엔씨소프트'),
 ('035720', '카카오'),
 ('010130', '고려아연'),
 ('009540', '현대중공업'),
 ('024110', '기업은행'),
 ('009150', '삼성전기'),
 ('030200', 'KT'),
 ('035250', '강원랜드'),
 ('161390', '한국타이어'),
 ('021240', '코웨이'),
 ('004020', '현대제철'),
 ('267250', '현대로보틱스'),
 ('006800', '미래에셋대우'),
 (

In [None]:
# Creating funcions to crawl each type of fiann

In [21]:
# looping and crawl - use 'yield' 
for item in [code[0] for code in code_n_name]:
    # open a webpage
    driver.get('http://finance.naver.com/item/coinfo.nhn?code={0}').format(item)
    
    # move to a relavent frame
    frame = driver.find_element_by_css_selector("#coinfo_cp")
    driver.swtich_to.frame(frame)
    
    # click a relavent tab and button - income statement 
    finstate_button = driver.find_element_by_css_selector("#header-menu > div.wrapper-menu > dl > dt:nth-of-type(3) > a")
    finstate_button.click()
    
    # About income statement 
    html_income = driver.page_source
    soup_income = BeautifulSoul(html_income, 'lxml')
    
    
    
    

005930
000660
005935
005380
005490
051910
035420
028260
015760
055550
105560
032830
207940
012330
017670
034730
096770
006400
090430
033780
066570
086790
003550
051900
010950
251270
018260
011170
000810
000270
000030
034220
002790
036570
035720
010130
009540
024110
009150
030200
035250
161390
021240
004020
267250
006800
023530
018880
009830
078930
088350
139480
008930
032640
069500
086280
004800
001040
005830
128940
042660
097950
047810
000720
010140
029780
002380
001450
036460
027410
005940
011070
271560
005387
241560
009240
071050
000120
079440
000880
012750
138930
028670
016360
003490
047040
008560
000210
088980
007070
000150
004990
012630
000060
000100
010060
006260
026960
047050
007310
102110
006280
011780
005385
204320
138040
008770
000670
028050
030000
004370
069960
006360
051915
000240
122630
010620
003520
012450
120110
051600
005300
034020
004170
139130
020150
039490
005250
000080
042670
001740
011210
010120
011200
079550
090435
006650
011790
071840
006120
005440
064350
057050