In [1]:
import requests
from lxml import etree
import pickle
import re
import datetime
import csv
from dataclasses import dataclass
from typing import Optional, Union

In [2]:
pattern_title = re.compile(r'^収録曲/(かんたん|ふつう|むずかしい|おに)/(.+)$')

In [3]:
@dataclass(frozen=True)
class Song:
    title: str
    date: Optional[datetime.date]
    work: Optional[str]

In [4]:
with open('songs.pkl', 'br') as f:
    songs = pickle.load(f)

In [5]:
def get_titles_from_ns2(url):
    response = requests.get(url)
    html = etree.HTML(response.text)
    tables = html.xpath('//table')
    tables = [
        t
        for t in tables
        if any(
            'BPM' in etree.tostring(td, encoding=str)
            for td in t.xpath('.//thead//td')
        )
    ]
    rows = sum([table.xpath('.//tbody/tr') for table in tables], [])
    return sum(
        [
            [
                g.group(2)
                for g in (
                    pattern_title.match(t) for t in row.xpath('.//a/@title')
                )
                if g
            ][:1]
            for row in rows
        ],
        [],
    )


def get_titles_from_ns2_pass(url):
    response = requests.get(url)
    html = etree.HTML(response.text)
    tables = html.xpath('//table')
    tables = [
        t
        for t in tables
        if any(
            'BPM' in etree.tostring(td, encoding=str)
            for td in t.xpath('.//thead//td')
        )
    ]
    tables = tables[-7:]
    rows = sum([table.xpath('.//tbody/tr') for table in tables], [])
    return [
        [
            g.group(2)
            for g in (pattern_title.match(t) for t in row.xpath('.//a/@title'))
            if g
        ][0]
        for row in rows
    ]

In [6]:
def parse_song(song: Song) -> list:
    return [song.title, song.date.strftime('%Y/%m/%d'), song.work]

In [7]:
url = 'https://wikiwiki.jp/taiko-fumen/%E4%BD%9C%E5%93%81/NS2'
url_pass = 'https://wikiwiki.jp/taiko-fumen/%E4%BD%9C%E5%93%81/NS2/%E5%A4%AA%E9%BC%93%E3%83%9F%E3%83%A5%E3%83%BC%E3%82%B8%E3%83%83%E3%82%AF%E3%83%91%E3%82%B9'

In [8]:
titles_pass = get_titles_from_ns2(url_pass)

In [9]:
titles = set(get_titles_from_ns2(url) + titles_pass)

In [10]:
with open('songs.pkl', 'br') as f:
    songs = pickle.load(f)

In [11]:
songs_ns2 = [song for song in songs if song.title in titles]

In [12]:
with open('songsNS2.csv', 'w', encoding='utf_8_sig', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['Title', 'Date', 'Work'])
    writer.writerows([parse_song(song) for song in songs_ns2])