In [3]:
import aiohttp
import asyncio
from collections import namedtuple
from pyquery import PyQuery as pq
import re
##################################################


class LOTTERY:
    cols = [
        'name',
        'no', 'ymd', 'area1', 'area1_asc', 'area2'
    ]
    #
    y_start = 103
    m_start = 1
    #
    _names = ['super', 'big', 'today']
    names = namedtuple('types', _names)(*_names)
    #
    url_home = 'https://www.taiwanlottery.com.tw'
    urls_history = {
        names.super: f"{url_home}/lotto/superlotto638/history.aspx",
        names.big: f"{url_home}/lotto/Lotto649/history.aspx",
        names.today: f"{url_home}/lotto/DailyCash/history.aspx",
    }
    #
    _ss = dict.fromkeys(_names, None)
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36',
    }
    timeout = 30
    last_threevars = None
    # ___________________________________________________-

    def __init__(self, name=names.super):
        self.name = name
        self.url_history = self.urls_history[name]
        self.row = dict.fromkeys(self.cols, None)

    @property
    def ss(self) -> aiohttp.ClientSession:
        '''各name用自己的session'''
        if not self._ss.get(self.name):
            connector = aiohttp.TCPConnector(ssl=True, limit=100)
            TO = aiohttp.ClientTimeout(total=self.timeout)
            self._ss[self.name] = aiohttp.ClientSession(connector=connector, timeout=TO)
        #
        return self._ss[self.name]

    def set_threevars(self, rtext: str):
        doc = pq(rtext, parser='html')
        vs = doc.find("#__VIEWSTATE").val()
        vsg = doc.find("#__VIEWSTATEGENERATOR").val()
        ev = doc.find("#__EVENTVALIDATION").val()
        if vs and vsg and ev:
            self.last_threevars = vs, vsg, ev

    async def first_get(self):
        async with self.ss.get(self.url_history, headers=self.headers, proxy=None) as r:
            if (status := r.status) == 200:
                rtext = await r.text(encoding='utf8')
                if rtext:
                    self.set_threevars(rtext)
                    return rtext

    async def post(self, y=y_start, m=m_start):
        while not self.last_threevars:
            await self.first_get()
            await asyncio.sleep(0.5)
        #
        payload = {
            "__VIEWSTATE": self.last_threevars[0],
            "__VIEWSTATEGENERATOR": self.last_threevars[1],
            "__EVENTVALIDATION": self.last_threevars[2],
            "forma": "請選擇遊戲",
            "SuperLotto638Control_history1$txtNO": "",
            "SuperLotto638Control_history1$chk": "radYM",
            "SuperLotto638Control_history1$dropYear": y,
            "SuperLotto638Control_history1$dropMonth": m,
            "SuperLotto638Control_history1$btnSubmit": "查詢",
        }
        #
        async with self.ss.post(self.url_history, headers=self.headers, proxy=None, data=payload) as r:
            if r.status == 200:
                rtext = await r.text(encoding='utf8')
                if rtext:
                    self.set_threevars(rtext)
                    return rtext

    async def get_ymdata(self, y=y_start, m=m_start):
        rtext = await self.post(y=y, m=m)
        doc = pq(rtext, parser='html')
        #
        ym_data = []
        for tbcls in ['.table_org.td_hm', '.table_gre.td_hm']:
            for t1 in doc.find(tbcls):
                trs = pq(t1).find("tr")
                tr1 = trs.eq(1)
                tr4 = trs.eq(4)
                tr5 = trs.eq(5)
                #
                no = tr1.find("span").eq(0).text()
                ymd = tr1.find("span").eq(1).text()
                area1 = tr4.find("span").text().replace(" ", '_')[:-3]
                area1_asc = tr5.find("span").text().replace(" ", '_')[:-3]
                area2 = tr4.find("span").eq(-1).text()
                #
                ym_data.append([no, ymd, area1, area1_asc, area2])
        #
        return sorted(ym_data, key=lambda x: x[0])

In [4]:
superA = LOTTERY(LOTTERY.names.super)

In [5]:
ymdata = await superA.get_ymdata()

In [6]:
ymdata

[['103000001', '103/01/02', '27_10_08_37_28_19', '08_10_19_27_28_37', '03'],
 ['103000002', '103/01/06', '07_18_08_23_31_12', '07_08_12_18_23_31', '02'],
 ['103000003', '103/01/09', '31_04_23_34_35_22', '04_22_23_31_34_35', '01'],
 ['103000004', '103/01/13', '04_03_30_24_23_26', '03_04_23_24_26_30', '02'],
 ['103000005', '103/01/16', '34_38_23_13_14_37', '13_14_23_34_37_38', '07'],
 ['103000006', '103/01/20', '36_04_23_09_37_21', '04_09_21_23_36_37', '03'],
 ['103000007', '103/01/23', '31_29_26_32_12_28', '12_26_28_29_31_32', '06'],
 ['103000008', '103/01/27', '38_36_20_28_26_08', '08_20_26_28_36_38', '08'],
 ['103000009', '103/01/30', '10_14_16_06_11_30', '06_10_11_14_16_30', '01']]

In [7]:
ymdata = await superA.get_ymdata(y=103, m=2)
ymdata

[['103000010', '103/02/03', '28_37_19_24_21_17', '17_19_21_24_28_37', '06'],
 ['103000011', '103/02/06', '02_08_25_35_29_36', '02_08_25_29_35_36', '02'],
 ['103000012', '103/02/10', '13_27_20_25_28_33', '13_20_25_27_28_33', '01'],
 ['103000013', '103/02/13', '16_35_36_20_06_08', '06_08_16_20_35_36', '08'],
 ['103000014', '103/02/17', '33_14_12_37_17_08', '08_12_14_17_33_37', '05'],
 ['103000015', '103/02/20', '30_01_10_03_18_13', '01_03_10_13_18_30', '01'],
 ['103000016', '103/02/24', '29_10_36_27_23_15', '10_15_23_27_29_36', '05'],
 ['103000017', '103/02/27', '23_26_04_08_20_21', '04_08_20_21_23_26', '05']]