Skip to content

Commit

Permalink
Merge a04a701 into e0a9f0d
Browse files Browse the repository at this point in the history
  • Loading branch information
jdepoix committed Apr 17, 2023
2 parents e0a9f0d + a04a701 commit 796129f
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 2 deletions.
1 change: 1 addition & 0 deletions youtube_transcript_api/__init__.py
Expand Up @@ -13,4 +13,5 @@
CookiesInvalid,
FailedToCreateConsentCookie,
YouTubeRequestFailed,
InvalidVideoId,
)
8 changes: 8 additions & 0 deletions youtube_transcript_api/_errors.py
Expand Up @@ -53,6 +53,14 @@ class VideoUnavailable(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = 'The video is no longer available'


class InvalidVideoId(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = (
'You provided an invalid video id. Make sure you are using the video id and NOT the url!\n\n'
'Do NOT run: `YouTubeTranscriptApi.get_transcript("https://www.youtube.com/watch?v=1234")`\n'
'Instead run: `YouTubeTranscriptApi.get_transcript("1234")`'
)


class TooManyRequests(CouldNotRetrieveTranscript):
CAUSE_MESSAGE = (
'YouTube is receiving too many requests from this IP and now requires solving a captcha to continue. '
Expand Down
6 changes: 4 additions & 2 deletions youtube_transcript_api/_transcripts.py
Expand Up @@ -24,6 +24,7 @@
TranslationLanguageNotAvailable,
NoTranscriptAvailable,
FailedToCreateConsentCookie,
InvalidVideoId,
)
from ._settings import WATCH_URL

Expand All @@ -41,7 +42,6 @@ def __init__(self, http_client):
self._http_client = http_client

def fetch(self, video_id):

return TranscriptList.build(
self._http_client,
video_id,
Expand All @@ -52,6 +52,8 @@ def _extract_captions_json(self, html, video_id):
splitted_html = html.split('"captions":')

if len(splitted_html) <= 1:
if video_id.startswith('http://') or video_id.startswith('https://'):
raise InvalidVideoId(video_id)
if 'class="g-recaptcha"' in html:
raise TooManyRequests(video_id)
if '"playabilityStatus":' not in html:
Expand Down Expand Up @@ -182,7 +184,7 @@ def find_transcript(self, language_codes):

def find_generated_transcript(self, language_codes):
"""
Finds a automatically generated transcript for a given language code.
Finds an automatically generated transcript for a given language code.
:param language_codes: A list of language codes in a descending priority. For example, if this is set to
['de', 'en'] it will first try to fetch the german transcript (de) and then fetch the english transcript (en) if
Expand Down
11 changes: 11 additions & 0 deletions youtube_transcript_api/test/test_api.py
Expand Up @@ -20,6 +20,7 @@
CookiesInvalid,
FailedToCreateConsentCookie,
YouTubeRequestFailed,
InvalidVideoId,
)


Expand Down Expand Up @@ -97,6 +98,16 @@ def test_list_transcripts__find_generated(self):

self.assertTrue(transcript.is_generated)

def test_list_transcripts__url_as_video_id(self):
httpretty.register_uri(
httpretty.GET,
'https://www.youtube.com/watch',
body=load_asset('youtube_transcripts_disabled.html.static')
)

with self.assertRaises(InvalidVideoId):
YouTubeTranscriptApi.list_transcripts('https://www.youtube.com/watch?v=GJLlxj_dtq8')

def test_translate_transcript(self):
transcript = YouTubeTranscriptApi.list_transcripts('GJLlxj_dtq8').find_transcript(['en'])

Expand Down

0 comments on commit 796129f

Please sign in to comment.