In [25]:
import socket
from urllib.parse import urlparse
from selectors import DefaultSelector, EVENT_WRITE, EVENT_READ

import sqlite3
import json
from datetime import datetime, timedelta

In [26]:
def datetime_gen_fn(start_date, end_date, delta=timedelta(days=1)):
    current_date = start_date
    while current_date <= end_date:
        yield current_date
        current_date += delta

In [51]:
class Fetcher:
    def __init__(self, date):
        self.response = b'' # Empty array of bytes.
        self.sock = None
        
        roc_year = int(date.strftime('%Y')) - 1911
        query_date = '{:3d}{}'.format(roc_year, date.strftime('%m%d')).replace(' ', '0')
        url = base_url.format(query_date, query_date)
        
        self.date = date
        self.url = urlparse(url)

    def parse_response(self):
        pass
    
    def read_response(self, key, mask):
        global stopped

        chunk = self.sock.recv(4096)  # 4k chunk size.
        if chunk:
            self.response += chunk
        else:
            selector.unregister(key.fd)  # Done reading.
            self.sock.close()
            
            try:
                date = next(datetime_gen)
                fetcher = Fetcher(date)
                fetcher.fetch()
            except StopIteration:
#                 stopped = True
                pass
                

            json_str = self.response.decode('utf-8').split('\r\n')[-1]
            json_parsed = json.loads(json_str)
            
            for item in json_parsed:
                type_name = item['魚貨名稱']
                type_code = item['品種代碼']
                market_name = item['市場名稱']
                high_price = item['上價']
                low_price = item['下價']
                mid_price = item['中價']
                avg_price = item['平均價']
                date = item['交易日期']
                trans_amount = item['交易量']

                cur.execute('''
                INSERT INTO AquaticTrans
                (type_name, type_code, market_name, high_price, low_price, mid_price, avg_price, date, trans_amount)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)''', (
                    type_name, type_code, market_name, high_price, low_price, mid_price, avg_price, date,
                    trans_amount))

                conn.commit()

            print('{} done'.format(self.date))

    def connected(self, key, mask):
        selector.unregister(key.fd)
        
        url = self.url.geturl()
        host = self.url.netloc
        
        request = 'GET {} HTTP/1.0\r\nHost: {}\r\n\r\n'.format(url, host)
        self.sock.send(request.encode('ascii'))

        # Register the next callback.
        selector.register(key.fd,
                          EVENT_READ,
                          self.read_response)
    
    def fetch(self):
        self.sock = socket.socket()
        self.sock.setblocking(False)
        
        host = self.url.netloc
        port = self.url.port
        
        if not port:
            port = 80
        
        try:
            self.sock.connect((host, port))
        except BlockingIOError:
            pass
        
        
        # Register next callback.
        selector.register(self.sock.fileno(),
                          EVENT_WRITE,
                          self.connected)

In [54]:
selector = DefaultSelector()

# Database connection
conn = sqlite3.connect('tw-axxxCulture-market.sqlite')
cur = conn.cursor()

# Table setup
cur.executescript('''
DROP TABLE IF EXISTS AquaticTrans;
CREATE TABLE AquaticTrans (
    id           INTEGER NOT NULL PRIMARY KEY AUTOINCREMENT UNIQUE,
    type_name    TEXT NOT NULL,
    type_code    INTEGER NOT NULL,
    market_name  TEXT NOT NULL,
    high_price   REAL NOT NULL,
    low_price    REAL NOT NULL,
    mid_price    REAL NOT NULL,
    avg_price    REAL NOT NULL,
    date         TEXT NOT NULL,
    trans_amount REAL NOT NULL
)
''')

# datetime object generator
start_date = '2009-01-01'
end_date = '2018-01-09'
start_date = datetime.strptime(start_date, '%Y-%m-%d')
end_date = datetime.strptime(end_date, '%Y-%m-%d')
datetime_gen = datetime_gen_fn(start_date, end_date)

num_fetcher = 10
base_url = 'http://m.coa.gov.tw/OpenData/AquaticTransData.aspx?StartDate={}&EndDate={}'

# Start fetchers
for i in range(num_fetcher):
    try:
        date = next(datetime_gen)
        fetcher = Fetcher(date)
        fetcher.fetch()
    except StopIteration:
        pass

stopped = False

# Event loop
while not stopped:
    events = selector.select()
    for event_key, event_mask in events:
        callback = event_key.data
        callback(event_key, event_mask)

conn.close()

2009-01-01 00:00:00 done
2009-01-02 00:00:00 done
2009-01-05 00:00:00 done
2009-01-07 00:00:00 done
2009-01-03 00:00:00 done
2009-01-06 00:00:00 done
2009-01-09 00:00:00 done
2009-01-08 00:00:00 done
2009-01-10 00:00:00 done
2009-01-04 00:00:00 done
2009-01-11 00:00:00 done
2009-01-12 00:00:00 done
2009-01-13 00:00:00 done
2009-01-14 00:00:00 done
2009-01-17 00:00:00 done
2009-01-20 00:00:00 done
2009-01-18 00:00:00 done
2009-01-16 00:00:00 done
2009-01-19 00:00:00 done
2009-01-15 00:00:00 done
2009-01-22 00:00:00 done
2009-01-21 00:00:00 done
2009-01-23 00:00:00 done
2009-01-24 00:00:00 done
2009-01-29 00:00:00 done
2009-01-30 00:00:00 done
2009-01-31 00:00:00 done
2009-01-27 00:00:00 done
2009-01-28 00:00:00 done
2009-02-02 00:00:00 done
2009-01-26 00:00:00 done
2009-02-01 00:00:00 done
2009-01-25 00:00:00 done
2009-02-03 00:00:00 done
2009-02-04 00:00:00 done
2009-02-06 00:00:00 done
2009-02-05 00:00:00 done
2009-02-13 00:00:00 done
2009-02-07 00:00:00 done
2009-02-12 00:00:00 done


KeyboardInterrupt: 

In [36]:
start_date = '2009-01-01'
end_date = '2010-01-01'
start_date = datetime.strptime(start_date, '%Y-%m-%d')
end_date = datetime.strptime(end_date, '%Y-%m-%d')
datetime_gen = datetime_gen_fn(start_date, end_date)

In [38]:
date = next(datetime_gen)

In [39]:
roc_year = int(date.strftime('%Y')) - 1911

In [40]:
roc_year

98

In [50]:
'{:3d}{}'.format(roc_year, date.strftime('%m%d')).replace(' ', '0')

'0980102'