# Extract Stock Information using Selenium

In [1]:
from selenium import webdriver
import time
import sys, os
from collections import namedtuple
from collections import deque


# adding current directory into environment path
os.environ["PATH"] += os.pathsep + './'

In [None]:
driver = webdriver.Chrome() # using Chrome to fetch data
driver.get("http://m.3x.com.tw/app_future.php") # data source

Trade = namedtuple('Trade', ['time', 'price', 'change', 'precentage_change', 'single_volumn', 'total_volumn'])
trades = deque(maxlen=50) # keep track of the latest 50 trades on the webpage


# define the data source to extract:
# detail_ul_0 to detail_ul_10: the latest data
id_list = ['detail_ul_{}'.format(i) for i in range(11)]
sleep_interval = 0.5
for _ in range(int(60/sleep_interval)): # run for 60s
    # extract trade record
    for id_tag in id_list[::-1]:
        for data in driver.find_elements_by_id(id_tag):
            items = data.find_elements_by_tag_name("li")
            try:
                # extract the trade record
                trade = Trade(*[item.text for item in items])
                
                # if the trade is not in the trade lists, append it and print it out
                if trade not in list(trades):
                    trades.append(trade)
                    print("時間: {}, 現價: {}, 漲跌: {}, %: {}, 單量: {}, 總量: {}".format(
                        trade.time,
                        trade.price,
                        trade.change,
                        trade.precentage_change,
                        trade.single_volumn,
                        trade.total_volumn
                    ))
            except:
                continue
    
    time.sleep(sleep_interval) # sleep for 0.1s

driver.close()

時間: 11:53:27, 現價: 29592, 漲跌: 394, %: 1.35, 單量: 4, 總量: 79479
時間: 11:53:28, 現價: 29592, 漲跌: 394, %: 1.35, 單量: 1, 總量: 79480
時間: 11:53:29, 現價: 29592, 漲跌: 394, %: 1.35, 單量: 1, 總量: 79481
時間: 11:53:29, 現價: 29593, 漲跌: 395, %: 1.35, 單量: 2, 總量: 79483
時間: 11:53:32, 現價: 29592, 漲跌: 394, %: 1.35, 單量: 1, 總量: 79487
時間: 11:53:33, 現價: 29594, 漲跌: 396, %: 1.36, 單量: 3, 總量: 79491
時間: 11:53:35, 現價: 29593, 漲跌: 395, %: 1.35, 單量: 1, 總量: 79492
時間: 11:53:37, 現價: 29595, 漲跌: 397, %: 1.36, 單量: 1, 總量: 79518
時間: 11:53:38, 現價: 29595, 漲跌: 397, %: 1.36, 單量: 1, 總量: 79519
時間: 11:53:33, 現價: 29593, 漲跌: 395, %: 1.35, 單量: 1, 總量: 79488
時間: 11:53:36, 現價: 29593, 漲跌: 395, %: 1.35, 單量: 1, 總量: 79493
時間: 11:53:36, 現價: 29596, 漲跌: 398, %: 1.36, 單量: 21, 總量: 79514
時間: 11:53:37, 現價: 29595, 漲跌: 397, %: 1.36, 單量: 3, 總量: 79517
時間: 11:53:40, 現價: 29594, 漲跌: 396, %: 1.36, 單量: 1, 總量: 79527
時間: 11:53:38, 現價: 29593, 漲跌: 395, %: 1.35, 單量: 2, 總量: 79521
時間: 11:53:39, 現價: 29594, 漲跌: 396, %: 1.36, 單量: 5, 總量: 79526
時間: 11:53:41, 現價: 29594, 漲跌: 396, %: 1.

# Listen some information from websocket

In [None]:
import websocket
import _thread
import time
import json

In [None]:
def on_message(ws, message):
    global prev_price, prev_volumn
    # example: HSIX|00:59:58|29344|29339|29341|11570| (idx, time, highest price, ?, current_price, volume)
    print(message)
    message_json = json.loads(message)
    if "d" in message_json:
        data = message_json["d"].split('|')
        product_name = data[0]
        if product_name == 'HSIX':
            idx = data[0]
            time = data[1]
            highest_price = float(data[2])
            current_price = float(data[4])
            total_volumn = float(data[5])
            single_volumn = total_volumn - prev_volumn
            print("="*30)
            print("HSIX detected.")
            print('產品代號: {}, 時間: {}, 現價: {}, 單量: {}, 總量: {}'.format(
                idx,
                time,
                current_price,
                single_volumn,
                total_volumn,
            ))
            
            # update previous
            prev_price = current_price
            prev_volumn = current_volumn
            
def on_error(ws, error):
    print(error)

def on_close(ws):
    print("### closed ###")

def on_open(ws):
    def run(*args):
        for i in range(30000):
            time.sleep(0.05)
        time.sleep(1)
        ws.close()
        print("thread terminating...")
    _thread.start_new_thread(run, ())

In [None]:
websocket.enableTrace(True)
ws = websocket.WebSocketApp("ws://m.3x.com.tw:5490",
                            on_message = on_message,
                            on_error = on_error,
                            on_close = on_close)
ws.on_open = on_open
ws.run_forever()

# Testing

In [None]:
from bs4 import BeautifulSoup
from selenium import webdriver

In [None]:
import os
import urllib
import datetime
from bs4 import BeautifulSoup
import time


def soup():
    url = "http://m.3x.com.tw/app_future.php"
    req = urllib.request.Request(
    url,
    data=None,
    headers={
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
        'Connection': 'keep-alive',
        'Host': 'm.3x.com.tw',
        'Cookie': 'PHPSESSID=af0umr6rekp5l2m3tv3jvlme60; _gat=1; _ga=GA1.3.1833497169.1511192942; _gid=GA1.3.1126247127.1511192942',
        'Upgrade-Insecure-Requests': 1,
        }
    )
    global Ltp
    global html
    while True:
        html = urllib.request.urlopen(req, timeout=10)
        ok = html.read().decode('utf-8')
        bsobj = BeautifulSoup(ok)
        
        print(bsobj)
        break
        
        Ltp = bsobj.find("ul", {"id":"detail_ul_0"})
        Ltp = (Ltp.text)
        Ltp = Ltp.replace(',' , '');
        os.system('cls')
        Ltp = float(Ltp)
        print (Ltp, datetime.datetime.now())
        time.sleep(3)

soup()

In [None]:
import os
import urllib
import datetime
from bs4 import BeautifulSoup
import time


def soup():
    url = "http://www.investing.com/indices/major-indices"
    req = urllib.request.Request(
    url,
    data=None,
    headers={
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
        'Connection': 'keep-alive'    }
           )
    global Ltp
    global html
    while True:
        html = urllib.request.urlopen(req)
        ok = html.read().decode('utf-8')
        bsobj = BeautifulSoup(ok, "lxml")
        
        print(bsobj)
        break
        Ltp = bsobj.find("td", {"class":"pid-169-last"} )
        Ltp = (Ltp.text)
        Ltp = Ltp.replace(',' , '');
        os.system('cls')
        Ltp = float(Ltp)
        print (Ltp, datetime.datetime.now())
        time.sleep(3)

soup()