In [1]:
import re
import requests
from html.parser import HTMLParser

In [2]:
class ParserKabuYoho(HTMLParser):
    url = 'https://kabuyoho.ifis.co.jp/index.php'
    pattern = re.compile(r'index\.php\?(action=tp1\&sa=consNewsDetail\&nid=.+)')

    def __init__(self):
        HTMLParser.__init__(self)

        self.flag_news = False
        self.flag_date = False
        self.flag_link = False

        self.result = list()
        self.element = None
        self.link = ''

    def handle_starttag(self, tag, attrs):
        dict_attrs = dict(attrs)

        if tag == 'table':
            if 'class' in dict_attrs:
                if dict_attrs['class'] == 'tb_new_news':
                    self.flag_news = True
                    return
                return
            return

        if tag == 'span' and self.flag_news is True:
            self.flag_date = True
            return

        if tag == 'a' and self.flag_news is True:
            if 'href' in dict_attrs:
                m = self.pattern.match(dict_attrs['href'])
                if m:
                    self.link = '%s?%s' % (self.url, m.group(1))
                    self.flag_link = True
                    return
                return
            return

        if tag == 'tr' and self.flag_news is True:
            self.element = list()
            return

    def handle_data(self, data):
        content = data.strip()
        if self.flag_news and self.flag_date:
            self.element.append(content)
            self.flag_date = False
            return

        if self.flag_news and self.flag_link:
            self.element.append(content)
            self.element.append(self.link)
            self.flag_link = False
            self.link = ''
            return

    def handle_endtag(self, tag):
        if tag == 'table' and self.flag_news is True:
            self.flag_news = False
            return

        if tag == 'tr' and self.flag_news is True:
            if len(self.element) > 0:
                self.result.append(self.element)
            return

In [3]:
parser = ParserKabuYoho()
url = parser.url
params = {'action': 'tp1',
          'sa': 'report_top',
          'bcode': '8035'}
response = requests.get(url, params=params)

In [4]:
parser.feed(response.text)
parser.close()

for line in parser.result:
    print(line)

['2024/05/20', '25年3月期経常予想。対前週0.7%上昇。', 'https://kabuyoho.ifis.co.jp/index.php?action=tp1&sa=consNewsDetail&nid=8035_20240520_cons_20240520_224532_62']
['2024/05/20', '米系大手証券、レーティング強気継続。目標株価引き上げ、48,000円。', 'https://kabuyoho.ifis.co.jp/index.php?action=tp1&sa=consNewsDetail&nid=8035_20240519_rep_20240520_110505_3']
['2024/05/16', '日系大手証券、レーティング強気継続。目標株価引き上げ、42,000円。', 'https://kabuyoho.ifis.co.jp/index.php?action=tp1&sa=consNewsDetail&nid=8035_20240516_rep_20240516_180007_13']
