# Extract Stock Information using Selenium

In [1]:
from selenium import webdriver
import time
import sys, os
from collections import namedtuple
from collections import deque
import time
import datetime
import numpy as np
from threading import Thread

# used for ploting the candle plot
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go
from IPython.html.widgets import interact


print(__version__)
init_notebook_mode(connected=True)


# adding current directory into environment path
# required for selenium chrome
os.environ["PATH"] += os.pathsep + './'

2.0.11



The `IPython.html` package has been deprecated since IPython 4.0. You should import from `notebook` instead. `IPython.html.widgets` has moved to `ipywidgets`.



In [2]:
curr_trade_price = 30000
change_choices = range(-5, 6)
def simulate_trade():
    global curr_trade_price
    trade_list = []
    for _ in range(np.random.choice([1,2,3])):
        today = datetime.datetime.now()
        trade_time = '{}:{}:{}'.format(today.hour, today.minute, today.second)
        trade_change = np.random.choice(change_choices)
        trade_price = curr_trade_price + trade_change
        curr_trade_price = trade_price
        trade = Trade(trade_time, trade_price, trade_change, 1, 1, 100)
        trade_list.append(trade)
        time.sleep(0.5)
        print(trade)
    return trade_list

In [30]:
Trade = namedtuple('Trade', ['time', 'price', 'change', 'precentage_change', 'single_volumn', 'total_volumn'])

def to_datetime_obj(trade_time):
    today = datetime.datetime.now()
    result = datetime.datetime.strptime(trade_time, '%H:%M:%S')
    result = result.replace(year=today.year, month=today.month, day=today.day)
    return result
    
class Stock(object):
    def __init__(self, time_interval=1):
        # initialize the browser
        print("Openning browser...")
        print("Connecting to http://m.3x.com.tw/app_future.php...")
        self.driver = webdriver.Chrome() # using Chrome to fetch data
        self.driver.get("http://m.3x.com.tw/app_future.php") # data source     
        self.id_list = ['detail_ul_{}'.format(i) for i in range(11)]
        time.sleep(1)
        
        
        # define data variable for plotting candles
        self.time_interval = datetime.timedelta(minutes=time_interval)
        self.open_data = [0]
        self.high_data = [0]
        self.low_data = [0]
        self.close_data = [0]
        self.datetime_data = [0]
        self.curr_open = self.curr_high = self.curr_low = self.curr_close = 0
        self.curr_datetime = datetime.datetime.now()
        self.next_datetime = self.curr_datetime + self.time_interval
        self.datetime_data[-1] = self.curr_datetime
        
        # initize the trades records
        self.trades = deque(maxlen=30)
        
        
    def init_candle_value(self):
        print("initialize the candle plot value")
        trade_list = self.get_live_trade_data()
        print('*'*30)
        print(trade_list, '\n', '*'*30)
        if len(trade_list) > 0:
            trade = trade_list[-1]
            trade_time = to_datetime_obj(trade.time)
            trade_price = float(trade.price)

            self.curr_open = self.curr_high = self.curr_low = self.curr_close = trade_price
            self.open_data[-1] = self.curr_open
            self.high_data[-1] = self.curr_high
            self.low_data[-1] = self.curr_low
            self.close_data[-1] = self.curr_close
            self.datetime_data[-1] = self.curr_datetime
        else:
            print("No trade found in the web. using the simulation data...")
            trade = simulate_trade()[-1]
            trade_time = to_datetime_obj(trade.time)
            trade_price = float(trade.price)

            self.curr_open = self.curr_high = self.curr_low = self.curr_close = trade_price
            self.open_data[-1] = self.curr_open
            self.high_data[-1] = self.curr_high
            self.low_data[-1] = self.curr_low
            self.close_data[-1] = self.curr_close
            self.datetime_data[-1] = self.curr_datetime
        print("="*30)


    def get_live_trade_data(self):
        curr_trade_list = []
        for id_tag in self.id_list:
            targets = self.driver.find_elements_by_id(id_tag)
            for data in targets:
                items = data.find_elements_by_tag_name("li")
                try:
                    # extract the trade record
                    trade = Trade(*[item.text for item in items])
                    
                    # if the trade is not in the trade lists, append it and print it out
                    if trade not in list(self.trades):
                        self.trades.append(trade)
                        curr_trade_list.append(trade)
                        print("時間: {}, 現價: {}, 漲跌: {}, %: {}, 單量: {}, 總量: {}".format(
                            trade.time,
                            trade.price,
                            trade.change,
                            trade.precentage_change,
                            trade.single_volumn,
                            trade.total_volumn
                        ))
                    else:
                        return curr_trade_list
                except:
                    continue
        return curr_trade_list
    
    def update_candle_data(self, trade_list):    
        for trade in trade_list:
            trade_time = to_datetime_obj(trade.time) # str: 'hh:mm:ss'
            trade_price = float(trade.price) # str: 'xxxxx.xx'
            
            if trade_time > self.next_datetime:
                # expand candle
                self.open_data.append(0)
                self.high_data.append(0)
                self.low_data.append(0)
                self.close_data.append(0)
                self.datetime_data.append(0)
                
                self.curr_high = 0
                self.curr_low = 1000000
                self.curr_datetime = self.next_datetime
                self.next_datetime = self.curr_datetime + self.time_interval
                self.open_data[-1] = self.close_data[-2]
                
            # update current candle data
            self.curr_high = max(self.curr_high, trade_price)
            self.curr_low = min(self.curr_low, trade_price)
            self.curr_close = trade_price
            self.high_data[-1] = self.curr_high
            self.low_data[-1] = self.curr_low
            self.close_data[-1] = self.curr_close
            self.datetime_data[-1] = self.curr_datetime
    
    def get_candle_plot(self):
        trace = go.Candlestick(x=self.datetime_data,
                       open=self.open_data,
                       high=self.high_data,
                       low=self.low_data,
                       close=self.close_data)
        data = [trace]
        iplot(data)
        

In [31]:
x = Stock()
x.init_candle_value()

Openning browser...
Connecting to http://m.3x.com.tw/app_future.php...
initialize the candle plot value
時間: 11:12:11, 現價: 29734, 漲跌: -176, %: -0.59, 單量: 2, 總量: 95311
時間: 11:12:10, 現價: 29734, 漲跌: -176, %: -0.59, 單量: 8, 總量: 95309
時間: 11:12:10, 現價: 29736, 漲跌: -174, %: -0.58, 單量: 6, 總量: 95301
時間: 11:12:09, 現價: 29736, 漲跌: -174, %: -0.58, 單量: 2, 總量: 95295
時間: 11:12:08, 現價: 29737, 漲跌: -173, %: -0.58, 單量: 2, 總量: 95293
時間: 11:12:06, 現價: 29737, 漲跌: -173, %: -0.58, 單量: 2, 總量: 95291
時間: 11:12:06, 現價: 29737, 漲跌: -173, %: -0.58, 單量: 3, 總量: 95289
時間: 11:12:04, 現價: 29736, 漲跌: -174, %: -0.58, 單量: 1, 總量: 95286
時間: 11:12:03, 現價: 29736, 漲跌: -174, %: -0.58, 單量: 6, 總量: 95285
******************************
[Trade(time='11:12:11', price='29734', change='-176', precentage_change='-0.59', single_volumn='2', total_volumn='95311'), Trade(time='11:12:10', price='29734', change='-176', precentage_change='-0.59', single_volumn='8', total_volumn='95309'), Trade(time='11:12:10', price='29736', change='-174', precentag

In [33]:
x = Stock()
x.init_candle_value()
for _ in range(1000):
    interact(x.get_candle_plot)
    trade_list = x.get_live_trade_data()
    x.update_candle_data(trade_list)
    time.sleep(1)

CannotSendRequest: Request-sent

In [13]:
trade_list = x.get_live_trade_data()
x.update_candle_data(trade_list)
x.get_candle_plot()

時間: 10:35:07, 現價: 29752, 漲跌: -158, %: -0.53, 單量: 21, 總量: 80610
時間: 10:35:07, 現價: 29749, 漲跌: -161, %: -0.54, 單量: 1, 總量: 80589
時間: 10:35:06, 現價: 29749, 漲跌: -161, %: -0.54, 單量: 11, 總量: 80588


In [5]:
x = Stock()
for _ in range(1000):
    interact(x.get_candle_plot)
    trade_list = simulate_trade()
    x.update_candle_data(trade_list)

Trade(time='1:14:46', price=29995, change=-4, precentage_change=1, single_volumn=1, total_volumn=100)


KeyboardInterrupt: 

In [4]:
def plot_candle(stock, delay=1):
    while True:
        interact(stock.get_candle_plot)
        time.sleep(delay)

def update_data(stock, simulation=False):
    while True:
        if simulation:
            trade_list = simulate_trade()
        else:
            trade_list = stock.get_live_trade_data()
        stock.update_candle_data(trade_list)

In [None]:
if __name__ == '__main__':
    stock = Stock()
    t1 = Thread(target=plot_candle, kwargs={'stock': stock}).start()
    t2 = Thread(target=update_data, args=[stock, True])
    t2.start()
    t1.start()

In [None]:
from datetime import datetime

open_data = [33.0, 33.3, 33.5, 33.0, 34.1]
high_data = [33.1, 33.3, 33.6, 33.2, 34.8]
low_data = [32.7, 32.7, 32.8, 32.6, 32.8]
close_data = [33.0, 32.9, 33.3, 33.1, 33.1]
dates = [datetime(year=2013, month=10, day=10),
         datetime(year=2013, month=11, day=10),
         datetime(year=2013, month=12, day=10),
         datetime(year=2014, month=1, day=10),
         datetime(year=2014, month=2, day=10)]

trace = go.Candlestick(x=dates,
                       open=open_data,
                       high=high_data,
                       low=low_data,
                       close=close_data)
data = [trace]
iplot(data)

In [None]:
print("Opening Browser...")
driver = webdriver.Chrome() # using Chrome to fetch data
print("Connecting to http://m.3x.com.tw/app_future.php")
driver.get("http://m.3x.com.tw/app_future.php") # data source

Trade = namedtuple('Trade', ['time', 'price', 'change', 'precentage_change', 'single_volumn', 'total_volumn'])
trades = deque(maxlen=30) # keep track of the latest 50 trades on the webpage


# define the data source to extract:
# detail_ul_0 to detail_ul_10: the latest data
id_list = ['detail_ul_{}'.format(i) for i in range(11)]
break_flat = False
sleep_interval = 0.1
print("Start fetching data...")
print("="*30)
for _ in range(int(1000)):
    # extract trade record
    break_flat = False
    for id_tag in id_list:  
        start_time = time.time()
        targets = driver.find_elements_by_id(id_tag)
#         print("Time used to find targets:", time.time() - start_time)
        for data in targets:
            start_time = time.time()
            items = data.find_elements_by_tag_name("li")
#             print("Time used to find items:", time.time() - start_time)
            try:
                start_time = time.time()
                # extract the trade record
                trade = Trade(*[item.text for item in items])
#                 print("Time used to process the trade:", time.time() - start_time)

                # if the trade is not in the trade lists, append it and print it out
                if trade not in list(trades):
                    trades.append(trade)
                    print("時間: {}, 現價: {}, 漲跌: {}, %: {}, 單量: {}, 總量: {}".format(
                        trade.time,
                        trade.price,
                        trade.change,
                        trade.precentage_change,
                        trade.single_volumn,
                        trade.total_volumn
                    ))
                else:
                    break_flat = True
                    break
            except:
                continue
        if break_flat:
            break

driver.close()

In [None]:
print("Opening Browser...")
driver = webdriver.Chrome() # using Chrome to fetch data
print("Connecting to http://m.3x.com.tw/app_future.php")
driver.get("http://m.3x.com.tw/app_future.php") # data source

Trade = namedtuple('Trade', ['time', 'price', 'change', 'precentage_change', 'single_volumn', 'total_volumn'])
trades = deque(maxlen=30) # keep track of the latest 50 trades on the webpage


# define the data source to extract:
# detail_ul_0 to detail_ul_10: the latest data
sleep_interval = 0.1
print("Start fetching data...")
print("="*30)
for _ in range(int(100/sleep_interval)): # run for 60s
    # extract trade record
    start_time = time.time()
    targets = driver.find_elements_by_css_selector('ul[id*=detail_ul]')
    print("Time used to find targets:", time.time() - start_time)
    for data in targets:
        items = data.find_elements_by_tag_name("li")
        try:
            start_time = time.time()
            _ = [item.text for item in items]
            # extract the trade record
            trade = Trade(*[item.text for item in items])
            header = (trade.time, )
            
            # if the trade is not in the trade lists, append it and print it out
            if trade not in list(trades):
                trades.append(trade)
                print("時間: {}, 現價: {}, 漲跌: {}, %: {}, 單量: {}, 總量: {}".format(
                    trade.time,
                    trade.price,
                    trade.change,
                    trade.precentage_change,
                    trade.single_volumn,
                    trade.total_volumn
                ))
        except:
            continue

    time.sleep(sleep_interval) # sleep for 0.1s

driver.close()

# Listen some information from websocket

In [None]:
import websocket
import _thread
import time
import json

In [None]:
def on_message(ws, message):
    global prev_price, prev_volumn
    # example: HSIX|00:59:58|29344|29339|29341|11570| (idx, time, highest price, ?, current_price, volume)
    print(message)
    message_json = json.loads(message)
    if "d" in message_json:
        data = message_json["d"].split('|')
        product_name = data[0]
        if product_name == 'HSIX':
            idx = data[0]
            time = data[1]
            highest_price = float(data[2])
            current_price = float(data[4])
            total_volumn = float(data[5])
            single_volumn = total_volumn - prev_volumn
            print("="*30)
            print("HSIX detected.")
            print('產品代號: {}, 時間: {}, 現價: {}, 單量: {}, 總量: {}'.format(
                idx,
                time,
                current_price,
                single_volumn,
                total_volumn,
            ))
            
            # update previous
            prev_price = current_price
            prev_volumn = current_volumn
            
def on_error(ws, error):
    print(error)

def on_close(ws):
    print("### closed ###")

def on_open(ws):
    def run(*args):
        for i in range(30000):
            time.sleep(0.05)
        time.sleep(1)
        ws.close()
        print("thread terminating...")
    _thread.start_new_thread(run, ())

In [None]:
websocket.enableTrace(True)
ws = websocket.WebSocketApp("ws://m.3x.com.tw:5490",
                            on_message = on_message,
                            on_error = on_error,
                            on_close = on_close)
ws.on_open = on_open
ws.run_forever()

# Testing

In [None]:
from bs4 import BeautifulSoup
from selenium import webdriver

In [None]:
import os
import urllib
import datetime
from bs4 import BeautifulSoup
import time


def soup():
    url = "http://m.3x.com.tw/app_future.php"
    req = urllib.request.Request(
    url,
    data=None,
    headers={
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
        'Connection': 'keep-alive',
        'Host': 'm.3x.com.tw',
        'Cookie': 'PHPSESSID=af0umr6rekp5l2m3tv3jvlme60; _gat=1; _ga=GA1.3.1833497169.1511192942; _gid=GA1.3.1126247127.1511192942',
        'Upgrade-Insecure-Requests': 1,
        }
    )
    global Ltp
    global html
    while True:
        html = urllib.request.urlopen(req, timeout=10)
        ok = html.read().decode('utf-8')
        bsobj = BeautifulSoup(ok)
        
        print(bsobj)
        break
        
        Ltp = bsobj.find("ul", {"id":"detail_ul_0"})
        Ltp = (Ltp.text)
        Ltp = Ltp.replace(',' , '');
        os.system('cls')
        Ltp = float(Ltp)
        print (Ltp, datetime.datetime.now())
        time.sleep(3)

soup()

In [None]:
import os
import urllib
import datetime
from bs4 import BeautifulSoup
import time


def soup():
    url = "http://www.investing.com/indices/major-indices"
    req = urllib.request.Request(
    url,
    data=None,
    headers={
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36',
        'Connection': 'keep-alive'    }
           )
    global Ltp
    global html
    while True:
        html = urllib.request.urlopen(req)
        ok = html.read().decode('utf-8')
        bsobj = BeautifulSoup(ok, "lxml")
        
        print(bsobj)
        break
        Ltp = bsobj.find("td", {"class":"pid-169-last"} )
        Ltp = (Ltp.text)
        Ltp = Ltp.replace(',' , '');
        os.system('cls')
        Ltp = float(Ltp)
        print (Ltp, datetime.datetime.now())
        time.sleep(3)

soup()

# Trying plotly

In [None]:
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

print(__version__)
init_notebook_mode(connected=True)

In [None]:
import plotly.graph_objs as go
from IPython.html.widgets import interact

def view_image(noise):
    x_data = [1,2,3]
    y1_data = [3+noise, 1-noise, 6]
    y2_data = [1, 2, noise]
    fig1 = go.Scatter(x=x_data, y=y1_data)
    fig2 = go.Scatter(x=x_data, y=y2_data)
    iplot([fig1, fig2])
for i in range(10):
    interact(view_image, noise=i)
    time.sleep(1)