# Extract Stock Information using Selenium

In [None]:
from selenium import webdriver
import time
import sys, os

# adding current directory into environment path
os.environ["PATH"] += os.pathsep + './'

driver = webdriver.Chrome() # using Chrome to fetch data
driver.get("http://m.3x.com.tw/app_future.php") # data source

prev_data = None
prev_time = None
curr_time = None
curr_price = None
up_down = None
precentage = None
single_volumn = None
total_volumn = None
while True:
    # detail_ul_0: the latest data
    for data in driver.find_elements_by_id('detail_ul_0'):
        items = data.find_elements_by_tag_name("li")
        try:
            curr_data = tuple([item.text for item in items])
            (curr_time, curr_price, up_down, precentage, single_volumn, total_volumn) = curr_data
            if prev_data != curr_data:
                print("時間: {}, 現價: {}, 漲跌: {}, %: {}, 單量: {}, 總量: {}".format(
                    curr_time,
                    curr_price,
                    up_down,
                    precentage,
                    single_volumn,
                    total_volumn
                ))
        except:
            pass
            
    # wait for 1s to scrape the browser data
    time.sleep(0.1)

driver.close()

時間: 09:56:35, 現價: 29403, 漲跌: 205, %: 0.70, 單量: 9, 總量: 32580
時間: 09:56:38, 現價: 29400, 漲跌: 202, %: 0.69, 單量: 70, 總量: 32699
時間: 09:56:39, 現價: 29398, 漲跌: 200, %: 0.68, 單量: 42, 總量: 32741
時間: 09:56:41, 現價: 29399, 漲跌: 201, %: 0.69, 單量: 1, 總量: 32764
時間: 09:56:42, 現價: 29396, 漲跌: 198, %: 0.68, 單量: 1, 總量: 32805
時間: 09:56:43, 現價: 29388, 漲跌: 190, %: 0.65, 單量: 3, 總量: 32913
時間: 09:56:45, 現價: 29392, 漲跌: 194, %: 0.66, 單量: 33, 總量: 33037
時間: 09:56:46, 現價: 29391, 漲跌: 193, %: 0.66, 單量: 40, 總量: 33077


# Listen data from websocket

In [3]:
import websocket
import _thread
import time
import json

In [6]:
def on_message(ws, message):
    global prev_price, prev_volumn
    # example: HSIX|00:59:58|29344|29339|29341|11570| (idx, time, highest price, ?, current_price, volume)
    print(message)
    message_json = json.loads(message)
    if "d" in message_json:
        data = message_json["d"].split('|')
        product_name = data[0]
        if product_name == 'HSIX':
            idx = data[0]
            time = data[1]
            highest_price = float(data[2])
            current_price = float(data[4])
            total_volumn = float(data[5])
            single_volumn = total_volumn - prev_volumn
            print("="*30)
            print("HSIX detected.")
            print('產品代號: {}, 時間: {}, 現價: {}, 單量: {}, 總量: {}'.format(
                idx,
                time,
                current_price,
                single_volumn,
                total_volumn,
            ))
            
            # update previous
            prev_price = current_price
            prev_volumn = current_volumn
            
def on_error(ws, error):
    print(error)

def on_close(ws):
    print("### closed ###")

def on_open(ws):
    def run(*args):
        for i in range(30000):
            time.sleep(1)
        time.sleep(1)
        ws.close()
        print("thread terminating...")
    _thread.start_new_thread(run, ())

In [7]:
websocket.enableTrace(True)
ws = websocket.WebSocketApp("ws://m.3x.com.tw:5490",
                            on_message = on_message,
                            on_error = on_error,
                            on_close = on_close)
ws.on_open = on_open
ws.run_forever()

--- request header ---
GET / HTTP/1.1
Upgrade: websocket
Connection: Upgrade
Host: m.3x.com.tw:5490
Origin: http://m.3x.com.tw:5490
Sec-WebSocket-Key: O57h8mVK0re8HW+HbcxD2w==
Sec-WebSocket-Version: 13


-----------------------
--- response header ---
HTTP/1.1 101 Switching Protocols
Connection: Upgrade
Sec-WebSocket-Accept: lm1xG1yih9juO/vntYg9s9sg920=
Upgrade: websocket
-----------------------


{"d":"HSI|29466040|29260310|29509760|29343480@","t":"GIN"}
{"d":"NKI|2250585|2226176|2256325|2242669@","t":"GIN"}
{"d":"KOR|253528|252767|253710|252596@","t":"GIN"}
{"d":"YDX|940790|940560|940890|939990@","t":"GIN"}
{"d":"SHI|3400380|3392400|3400490|3377860@","t":"GIN"}
{"d":"HSC|11666920|11538280|11676070|11567030@","t":"GIN"}
{"d":"SHSZ300|4173030|4143830|4173050|4129980@","t":"GIN"}
{"d":"HSI|29465120|29260310|29509760|29343480@","t":"GIN"}
{"d":"NKI|2250519|2226176|2256325|2242669@","t":"GIN"}
{"d":"KOR|253570|252767|253710|252596@","t":"GIN"}
{"d":"YDX|940790|940560|940890|939990@","t":"GIN"}
{"d":"SHI|3400550|3392400|3400550|3377860@","t":"GIN"}
{"d":"HSC|11668870|11538280|11676070|11567030@","t":"GIN"}
{"d":"SHSZ300|4173240|4143830|4173310|4129980@","t":"GIN"}
{"d":"HSI|29457040|29260310|29509760|29343480@","t":"GIN"}
{"d":"NKI|2250547|2226176|2256325|2242669@","t":"GIN"}
{"d":"KOR|253512|252767|253710|252596@","t":"GIN"}
{"d":"YDX|940850|940560|940890|939990@","t":"GIN"}
{"d":"

send: b'\x88\x82\xdf]\x96\x99\xdc\xb5'



### closed ###


# Testing

In [None]:
from bs4 import BeautifulSoup
from selenium import webdriver

In [None]:
import os
import urllib
import datetime
from bs4 import BeautifulSoup
import time


def soup():
    url = "http://m.3x.com.tw/app_future.php"
    req = urllib.request.Request(
    url,
    data=None,
    headers={
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
        'Connection': 'keep-alive',
        'Host': 'm.3x.com.tw',
        'Cookie': 'PHPSESSID=af0umr6rekp5l2m3tv3jvlme60; _gat=1; _ga=GA1.3.1833497169.1511192942; _gid=GA1.3.1126247127.1511192942',
        'Upgrade-Insecure-Requests': 1,
        }
    )
    global Ltp
    global html
    while True:
        html = urllib.request.urlopen(req, timeout=10)
        ok = html.read().decode('utf-8')
        bsobj = BeautifulSoup(ok)
        
        print(bsobj)
        break
        
        Ltp = bsobj.find("ul", {"id":"detail_ul_0"})
        Ltp = (Ltp.text)
        Ltp = Ltp.replace(',' , '');
        os.system('cls')
        Ltp = float(Ltp)
        print (Ltp, datetime.datetime.now())
        time.sleep(3)

soup()

In [None]:
import os
import urllib
import datetime
from bs4 import BeautifulSoup
import time


def soup():
    url = "http://www.investing.com/indices/major-indices"
    req = urllib.request.Request(
    url,
    data=None,
    headers={
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
        'Connection': 'keep-alive'    }
           )
    global Ltp
    global html
    while True:
        html = urllib.request.urlopen(req)
        ok = html.read().decode('utf-8')
        bsobj = BeautifulSoup(ok, "lxml")
        
        print(bsobj)
        break
        Ltp = bsobj.find("td", {"class":"pid-169-last"} )
        Ltp = (Ltp.text)
        Ltp = Ltp.replace(',' , '');
        os.system('cls')
        Ltp = float(Ltp)
        print (Ltp, datetime.datetime.now())
        time.sleep(3)

soup()