Skip to content

Commit

Permalink
Fix for downloading short comments
Browse files Browse the repository at this point in the history
  • Loading branch information
egbertbouman committed Sep 2, 2023
1 parent 74ac424 commit e2718ac
Showing 1 changed file with 11 additions and 1 deletion.
12 changes: 11 additions & 1 deletion youtube_comment_downloader/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import requests

YOUTUBE_VIDEO_URL = 'https://www.youtube.com/watch?v={youtube_id}'
YOUTUBE_CONSENT_URL = 'https://consent.youtube.com/save'

USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'

Expand All @@ -16,6 +17,7 @@

YT_CFG_RE = r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;'
YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;\s*(?:var\s+meta|</script|\n)'
YT_HIDDEN_INPUT_RE = r'<input\s+type="hidden"\s+name="([A-Za-z0-9_]+)"\s+value="([A-Za-z0-9_\-\.]*)"\s*(?:required|)\s*>'


class YoutubeCommentDownloader:
Expand Down Expand Up @@ -46,6 +48,12 @@ def get_comments(self, youtube_id, *args, **kwargs):
def get_comments_from_url(self, youtube_url, sort_by=SORT_BY_RECENT, language=None, sleep=.1):
response = self.session.get(youtube_url)

if 'consent' in str(response.url):
# We may get redirected to a separate page for cookie consent. If this happens we agree automatically.
params = dict(re.findall(YT_HIDDEN_INPUT_RE, response.text))
params.update({'continue': youtube_url, 'set_eom': False, 'set_ytc': True, 'set_apyt': True})
response = self.session.post(YOUTUBE_CONSENT_URL, params=params)

html = response.text
ytcfg = json.loads(self.regex_search(html, YT_CFG_RE, default=''))
if not ytcfg:
Expand Down Expand Up @@ -88,7 +96,9 @@ def get_comments_from_url(self, youtube_url, sort_by=SORT_BY_RECENT, language=No
list(self.search_dict(response, 'appendContinuationItemsAction'))
for action in actions:
for item in action.get('continuationItems', []):
if action['targetId'] in ['comments-section', 'engagement-panel-comments-section']:
if action['targetId'] in ['comments-section',
'engagement-panel-comments-section',
'shorts-engagement-panel-comments-section']:
# Process continuations for comments and replies.
continuations[:0] = [ep for ep in self.search_dict(item, 'continuationEndpoint')]
if action['targetId'].startswith('comment-replies-item') and 'continuationItemRenderer' in item:
Expand Down

0 comments on commit e2718ac

Please sign in to comment.