From f6fcb7d99bd82cfbfdd3c3212c42436b4a83571b Mon Sep 17 00:00:00 2001
From: Zhengyang Song <songzy_thu@163.com>
Date: Sun, 27 Aug 2023 15:11:12 +0800
Subject: [PATCH] Fix the crawling of toutiao article urls.

---
 weibo_spider/parser/page_parser.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/weibo_spider/parser/page_parser.py b/weibo_spider/parser/page_parser.py
index a2c5900..fcd1714 100644
--- a/weibo_spider/parser/page_parser.py
+++ b/weibo_spider/parser/page_parser.py
@@ -75,7 +75,7 @@ def get_one_page(self, weibo_id_list):
                         publish_time = datetime_util.str_to_time(
                             weibo.publish_time)
 
-                        if publish_time < since_date:                            
+                        if publish_time < since_date:
                             # As of 2023.05, there can be at most 2 pinned weibo.
                             # We will continue for at most 2 times before return.
                             if self.page == 1 and cur_pinned_count < MAX_PINNED_COUNT:
@@ -158,9 +158,9 @@ def get_article_url(self, info):
         """获取微博头条文章的url"""
         article_url = ''
         text = handle_garbled(info)
-        if text.startswith(u'发布了头条文章'):
+        if text.startswith(u'发布了头条文章') or text.startswith(u'我发表了头条文章'):
             url = info.xpath('.//a/@href')
-            if url and url[0].startswith('https://weibo.cn/sinaurl'):
+            if url and url[0].startswith('https://weibo.com/ttarticle'):
                 article_url = url[0]
         return article_url