In [13]:
import vertexai
from vertexai.generative_models import GenerativeModel, Part

In [14]:
def video_transcription(project_id: str, location: str) -> str:
    # Initialize Vertex AI
    vertexai.init(project=project_id, location=location)
    # Load the model
    vision_model = GenerativeModel("gemini-1.0-pro-vision")
    # Generate text
    response = vision_model.generate_content(
        [
            Part.from_uri(
                "gs://cloud-samples-data/video/animals.mp4", mime_type="video/mp4"
            ),
            "What is in the video?",
        ]
    )
    print(response)
    return response.text

# Transcription using Gemini 1.0 Pro Version

In [15]:
import vertexai
from vertexai.generative_models import GenerativeModel, Part

In [16]:
PROJECT_ID = "vertexai-gemini-hackathon-2024"
VIDEO_URI = ["gs://cloud-samples-data/video/animals.mp4",
             "gs://jay_tank_video_531251/leopardtankbrake.mp4",
             "gs://youtube_videos_1/Charlie Chaplin - The Kid - Fight Scene.mp4"]

PROMPT = "Transcribe the video."
PROMPT_NO_SPEECH = "Transcribe the video despite not having any "
LOCATION = "us-central1"

In [17]:
def gemini_video_transcription(video_uri: str, prompt: str, project_id: str, location: str) -> str:
    # Initialize Vertex AI
    vertexai.init(project=project_id, location=location)
    # Load the model
    vision_model = GenerativeModel("gemini-1.0-pro-vision")
    # Generate text
    response = vision_model.generate_content(
        [
            Part.from_uri(video_uri, mime_type="video/mp4"),
            prompt,
        ]
    )
    return response.text

In [6]:
result = gemini_video_transcription(VIDEO_URI[2], PROMPT, PROJECT_ID, LOCATION)
result

ValueError: Cannot get the response text.
Cannot get the Candidate text.
Response candidate content has no parts (and thus no text). The candidate is likely blocked by the safety filters.
Content:
{}
Candidate:
{
  "finish_reason": "PROHIBITED_CONTENT"
}
Response:
{
  "candidates": [
    {
      "finish_reason": "PROHIBITED_CONTENT"
    }
  ],
  "usage_metadata": {
    "prompt_token_count": 1037,
    "total_token_count": 1037
  }
}

In [9]:
result = gemini_video_transcription(VIDEO_URI[2], PROMPT_NO_SPEECH, PROJECT_ID, LOCATION)
result

AttributeError: Content has no parts.

# Transcription using Gemini 1.5

In [18]:
import google.generativeai as genai

In [19]:
GOOGLE_API_KEY='AIzaSyDxaYa7A_b0d8mvdIeVA5aALEEOgM9YXls'
genai.configure(api_key=GOOGLE_API_KEY)

In [20]:
# List available models.
print('All available models:')
for model in genai.list_models():
    if 'generateContent' in model.supported_generation_methods:
        print(f'- {model.name}')

All available models:
- models/gemini-1.0-pro
- models/gemini-1.0-pro-001
- models/gemini-1.0-pro-latest
- models/gemini-1.0-pro-vision-latest
- models/gemini-1.5-pro-latest
- models/gemini-pro
- models/gemini-pro-vision


In [21]:
def generate_response(video, language = 'English', model = 'gemini-1.5-pro-latest'):
    # Return a gemini response
    genai.configure(api_key=GOOGLE_API_KEY)
    model = genai.GenerativeModel(model)
    prompt = f"Transcribe the YouTube video link {video} in {language}"
    response = model.generate_content(prompt)
    result = ''.join([p.text for p in response.candidates[0].content.parts])
    return result

In [11]:
transcription = generate_response(VIDEO_URI[1])

In [12]:
transcription = generate_response(VIDEO_LINK)

NameError: name 'VIDEO_LINK' is not defined

In [13]:
transcription

"## I'm Unable to Process Video Files Directly\n\nUnfortunately, as a large language model, I lack the capability to directly access and process video files, including those stored in Google Cloud Storage buckets like the one you provided (gs://jay\\_tank\\_video\\_531251/leopardtankbrake.mp4). My abilities are confined to text-based information and interaction.\n\n**Here are some alternative solutions you can consider:**\n\n1. **YouTube Transcript:** If the video is publicly available on YouTube, check if it has automatically generated captions or a transcript provided by the uploader. \n2. **Video Editing Software:**  Some video editing software offer speech-to-text functionality that can transcribe the audio within the video.\n3. **Transcription Services:** Several online services and platforms provide manual or automated transcription services for video and audio files. \n4. **Optical Character Recognition (OCR):** If the video contains text overlays or subtitles, OCR tools can ext

# Transcription using Google Cloud Speech API

In [None]:
# Enable Cloud Speech-to-Text API in the GCP Project

In [2]:
pip install ipython google-cloud-speech

Collecting google-cloud-speech
  Downloading google_cloud_speech-2.26.0-py2.py3-none-any.whl.metadata (5.2 kB)
Downloading google_cloud_speech-2.26.0-py2.py3-none-any.whl (284 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m285.0/285.0 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: google-cloud-speech
Successfully installed google-cloud-speech-2.26.0
Note: you may need to restart the kernel to use updated packages.


In [10]:
from google.cloud import speech

In [11]:
def speech_to_text(config: speech.RecognitionConfig, audio: speech.RecognitionAudio,
) -> speech.RecognizeResponse:
    client = speech.SpeechClient()

    # Synchronous speech recognition request
    response = client.recognize(config=config, audio=audio)

    return response

In [12]:
config = speech.RecognitionConfig(language_code="en")
audio = speech.RecognitionAudio(uri="gs://cloud-samples-data/speech/brooklyn_bridge.flac")

In [13]:
response = speech_to_text(config, audio)

In [14]:
response.results

[alternatives {
  transcript: "how old is the Brooklyn Bridge"
  confidence: 0.982000887
}
result_end_time {
  seconds: 1
  nanos: 770000000
}
language_code: "en-us"
]

In [9]:
def print_result(result: speech.SpeechRecognitionResult):
    best_alternative = result.alternatives[0]
    print(f"language_code: {result.language_code}")
    print(f"transcript:    {best_alternative.transcript}")
    print(f"confidence:    {best_alternative.confidence:.0%}")

def print_response(response: speech.RecognizeResponse):
    for result in response.results:
        print_result(result)

In [10]:
print_response(response)

language_code: en-us
transcript:    how old is the Brooklyn Bridge
confidence:    98%


In [13]:
import Youtube_transcript

ModuleNotFoundError: No module named 'Youtube_transcript'

# Transcription directly from Youtube link videos

In [9]:
import re
from youtube_transcript_api import YouTubeTranscriptApi

GOOGLE_API_KEY='AIzaSyDxaYa7A_b0d8mvdIeVA5aALEEOgM9YXls'
genai.configure(api_key=GOOGLE_API_KEY)

In [10]:
# def parse_video_id(video_url):
#     try:
#         # Split the URL by "v="
#         split_url = video_url.split("v=")
#         # Extract the substring after "v="
#         video_id = split_url[1]
#         return video_id
#     except IndexError:
#         print("Invalid URL format")
#         return None

# def get_clean_transcript(video_id):
#     """Takes a youtube video id (in the video link, can look like: YrVVXFMgXrw). Pulls transcript and returns clean version."""
#     # checks to see if full link then returns just id
#     if "v=" in video_id:
#         video_id = parse_video_id(video_id)
#     # Pull transcript
#     script = YouTubeTranscriptApi.get_transcript(video_id)
#     # Combine 'text' fields into one string
#     input_string = ' '.join([line['text'] for line in script])
#     # remove special charaters
#     pattern = re.compile(r'[^a-zA-Z0-9\s.,;:!"\'()\-—–?‘’“”\[\]]')
#     transcript =  pattern.sub('', input_string)

#     return transcript

In [11]:
VIDEO_LINK = "https://www.youtube.com/watch?v=cen0rBKLuYE"
VIDEO_LINK_NO_SPEECH = "https://www.youtube.com/watch?v=Z7-QdoofMq8"

In [12]:
result_speech = get_clean_transcript(VIDEO_LINK)
result_speech

NameError: name 'get_clean_transcript' is not defined

In [16]:
# Cannot transcribe silent movies or scenes with some action but not talking or saound.
try:
    result_no_speech = get_clean_transcript(VIDEO_LINK_NO_SPEECH)
    result_no_speech
except NameError:
    print("Need to use Gemini")

Need to use Gemini


# Testing YouTube Python API

In [22]:
VIDEO_LINK = "https://www.youtube.com/watch?v=cen0rBKLuYE"
VIDEO_LINK_NO_SPEECH = "https://www.youtube.com/watch?v=Z7-QdoofMq8"

In [23]:
transcript_list = YouTubeTranscriptApi.list_transcripts("cen0rBKLuYE")
# print(transcript_list)

In [24]:
transcript = transcript_list.find_transcript
print(transcript)

<bound method TranscriptList.find_transcript of <youtube_transcript_api._transcripts.TranscriptList object at 0x7f44c3c419f0>>


In [25]:
def parse_video_id(video_url):
    try:
        # Split the URL by "v="
        split_url = video_url.split("v=")
        # Extract the substring after "v="
        video_id = split_url[1]
        return video_id
    except IndexError:
        return "Invalid URL format"

def clean_transcript(transcript):
    """Takes a youtube video id (in the video link, can look like: YrVVXFMgXrw). Pulls transcript and returns clean version."""
    input_string = ' '.join([line['text'] for line in transcript])
    # remove special charaters
    pattern = re.compile(r'[^a-zA-Z0-9\s.,;:!"\'()\-—–?‘’“”\[\]]')
    clean_transcript =  pattern.sub('', input_string)

    return clean_transcript

def video_transcript_list(video_id):
    print(video_id)
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        return transcript_list
    except Exception as exception:
        raise exception

def video_transcript(transcript_list, language_code = ["en"]):
    return transcript_list.find_transcript(language_code)

def fetch_transcript(transcript):
    return transcript.fetch()
    
def video_language(transcript):
    return transcript.language

def video_translation_languages(transcript):
    return transcript.translation_languages

def video_language_supported(translation_languages, language):
    count = 0
    nlang = len(translation_languages)
    for lang in translation_languages:
        count += 1
        if lang['language_code'] == language:
            # print(f"Yes, language '{lang['language_code']}' is translatable.")
            return True
        else:
            if count == nlang:
                # print(f"Language '{language}' is not supported for translation")
                break
            else:
                continue
    return False

def video_translation(transcript, language):
    # if video_language_supported(video_translation_languages(transcript), language):
    #     language in transcript.translation_languages
    #     translation = transcript.translate(language)
    #     return fetch_transcript(translation)
    # else:
    #     return [{'text': f"Language '{language}' is not available for translation.", 'start': 0.0, 'duration': 0.0}]
    
    if transcript.is_translatable:
        try:
            translation = transcript.translate(language)
            return fetch_transcript(translation)
        except Exception as exception:
            print("Error: TranslationLanguageNotAvailable. Could not translate the video because the translation language is not avaialable.")
            # raise exception
            return [{'text': f"Language '{language}' is not available for translation.", 'start': 0.0, 'duration': 0.0}]
    else:
        return [{'text': f"Language '{language}' is not available for translation.", 'start': 0.0, 'duration': 0.0}]

In [26]:
transcript_list = video_transcript_list(parse_video_id(VIDEO_LINK))
transcript_list

cen0rBKLuYE


<youtube_transcript_api._transcripts.TranscriptList at 0x7f44c3c930a0>

In [27]:
transcript = video_transcript(transcript_list)
transcript

<youtube_transcript_api._transcripts.Transcript at 0x7f44c3c92170>

In [28]:
fetched = fetch_transcript(transcript)

In [29]:
final_transcript = clean_transcript(fetched)
final_transcript

"BARTENDER\nI told you, you're not welcome here. You're not welcome anywhere. Now, get the f out of my bar. LOGAN\nJust give me one more drink and then I'll leave. DEADPOOL\nHi, peanut. I'm going to need you\nto come with me right now. LOGAN\nLook, lady, I'm not interested. DEADPOOL\nAlright, well, I'm sort of on the tick-tick,\nso upsy-daisy, here we go. LOGAN\nWhoa, hey, hey! DEADPOOL\nOh. Whiskey dick of the claws.\nIt's quite common in Wolverines over 40. LOGAN\nYou don't want this. DEADPOOL\nUnless you want to take a deep breath through your f forehead,\nI suggest you reconsider. DEADPOOL\nI'm about to lose everything that I've ever cared about. WOLVERINE\nNot my f problem. DEADPOOL\nIs that what you said when your world went to s? WOLVERINE\nCome again? PARADOX\nThis Wolverine let down his entire world. WADE WILSON\nWant to talk about what's haunting you, or should we wait\nfor a third act flashback? WOLVERINE\nUh, go f yourself. DEADPOOL\nI don't know anything about saving world

In [30]:
translated_transcript = clean_transcript(video_translation(transcript, 'es'))
translated_transcript

'BARTENDER\nTe lo dije, no eres bienvenido aqu. No eres bienvenido a ningn lado. Ahora, lrgate de mi bar. LOGAN\nSlo dame un trago ms y luego me ir. DEADPOOL\nHola, man. Voy a necesitar que\nvengas conmigo ahora mismo. LOGAN\nMire seora, no me interesa. DEADPOOL\nMuy bien, bueno, estoy como en el tic-tic,\nas que upsy-daisy, all vamos. LOGAN \nVaya, oye, oye! Consorcio Inactivo\nAh. [  ] de whisky de las garras.\nEs bastante comn en Wolverines mayores de 40 aos. LOGAN\nNo quieres esto. DEADPOOL\nA menos que quieras respirar profundamente por tu [  ] frente, te\nsugiero que lo reconsideres. DEADPOOL\nEstoy a punto de perder todo lo que alguna vez me import. WOLVERINE\nNo es mi maldito problema. DEADPOOL \nEs eso lo que dijiste cuando tu mundo se fue a la [  ]? WOLVERINE \nVen otra vez? PARADOJA\nEste Wolverine decepcion a todo su mundo. WADE WILSON \nQuieres hablar sobre lo que te atormenta o deberamos esperar a\nun flashback del tercer acto? WOLVERINE\nUh, vete a la [  ]. DEADPOOL\nNo 

In [31]:
translated_transcript = clean_transcript(video_translation(transcript, 'za'))
translated_transcript

Error: TranslationLanguageNotAvailable. Could not translate the video because the translation language is not avaialable.


"Language 'za' is not available for translation."

In [32]:
transcript.is_translatable

True

In [33]:
transcript.is_generated

False

In [34]:
transcript_list_no_speech = video_transcript_list(parse_video_id(VIDEO_LINK_NO_SPEECH))
transcript_list_no_speech

Z7-QdoofMq8


TranscriptsDisabled: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=Z7-QdoofMq8! This is most likely caused by:

Subtitles are disabled for this video

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem!

# Sentiment Analysis with Gemini 1.5

In [39]:
from flask import request, jsonify, current_app
import google.generativeai as genai

def generate_sentiment_response(transcription, model = 'gemini-1.5-pro-latest'):
    # Return a gemini response
    genai.configure(api_key=GOOGLE_API_KEY)
    model = genai.GenerativeModel(model)
    prompt = f'''What is the sentiment anylisis of the video transcription? 
        Input:
            transcript: A string containing the full text transcript of a video.
        Output (in markdown):
            Overall Sentiment: This is the general sentiment of the entire transcript, categorized as either "Positive", "Negative", or "Neutral".
            Sentiment Summary: Briefly descript the sentiment in the video adding reason to how the overall sentiment categoized.
        Video transcription (Input): {transcription}'''
    response = model.generate_content(prompt)
    result = ''.join([p.text for p in response.candidates[0].content.parts])
    return response

In [43]:
transc_sentiment = generate_sentiment_response(final_transcript)

In [44]:
transc_sentiment

response:
GenerateContentResponse(
    done=True,
    iterator=None,
    result=glm.GenerateContentResponse({'candidates': [{'content': {'parts': [{'text': '## Sentiment Analysis of Video Transcript\n\n**Overall Sentiment:** Negative\n\n**Sentiment Summary:** The overall sentiment of the video transcript is negative due to the prevalent use of strong negative language, aggressive interactions, and themes of conflict and negativity. \n\n*   **Strong Language:** The consistent use of profanity throughout the dialogue contributes significantly to the negative sentiment. Words like "f***" and "s***" create a hostile and aggressive atmosphere. \n*   **Aggressive Interactions:** The characters, particularly Deadpool and Wolverine, engage in antagonistic exchanges filled with insults and threats. The forceful nature of their interactions, including physical altercations, further reinforces the negative sentiment.\n*   **Themes of Conflict and Negativity:** The dialogue revolves around themes 

In [45]:
print(''.join([p.text for p in transc_sentiment.candidates[0].content.parts]))

## Sentiment Analysis of Video Transcript

**Overall Sentiment:** Negative

**Sentiment Summary:** The overall sentiment of the video transcript is negative due to the prevalent use of strong negative language, aggressive interactions, and themes of conflict and negativity. 

*   **Strong Language:** The consistent use of profanity throughout the dialogue contributes significantly to the negative sentiment. Words like "f***" and "s***" create a hostile and aggressive atmosphere. 
*   **Aggressive Interactions:** The characters, particularly Deadpool and Wolverine, engage in antagonistic exchanges filled with insults and threats. The forceful nature of their interactions, including physical altercations, further reinforces the negative sentiment.
*   **Themes of Conflict and Negativity:** The dialogue revolves around themes of loss, negativity, and internal struggles. Deadpool's desperation and Wolverine's cynicism contribute to the overall sense of negativity. 
*   **Humor as a Counter

# Video Transcription Summarization with Gemini 1.5

In [25]:
def generate_summary_response(transcription, prompt, model = 'gemini-1.5-pro-latest'):
    # Return a gemini response
    genai.configure(api_key=GOOGLE_API_KEY)
    model = genai.GenerativeModel(model)
    question = f"{prompt}: {transcription}"
    response = model.generate_content(question)
    result = ''.join([p.text for p in response.candidates[0].content.parts])
    return result

In [84]:
prompt = '''
    Give me all the topics in the video transcription.
    Structure your response as markdown format, with the overall topic at the top followed by each supporting topic.
    Topics should be in chronological order. 
    The Overall topic should just be bolded and italicized, and the sub-topics should be formatted as dash list, with the topic bolded and description non-bolded. 
    Do not repeat profanity words found in the transcription within your answer.
    Video transcription
    '''

transc_summary = generate_summary_response(final_transcript, prompt)

In [85]:
# t = re.sub(r"(?<!\n)\n(?!\n)", "<br/>", transc_summary)
t = re.sub(r"\n", "<br/>", transc_summary)
t

"***_Overall Topic: Deadpool and Wolverine's Partnership_***<br/><br/>- **Confrontation at the Bar**: Deadpool attempts to recruit a reluctant Logan, highlighting Logan's past failures and the potential consequences of inaction. <br/>- **Paradox's Intervention**:  A mysterious figure named Paradox emphasizes Logan's past shortcomings and the need for his involvement.<br/>- **Deadpool's Persuasion**: Deadpool appeals to Logan's sense of responsibility and experience as an X-Man, urging him to help save the world. <br/>- **Action Sequence Tease**:  Deadpool hints at an upcoming action sequence with uncertain outcomes. <br/>- **Humorous Exchange with Blind Al**: Deadpool and Blind Al engage in a playful conversation about limitations imposed by film executives, referencing various drug-related slang and movie titles. <br/>"

***_Overall Topic: Deadpool and Wolverine's Partnership_***<br/><br/>- **Confrontation at the Bar**: Deadpool attempts to recruit a reluctant Logan, highlighting Logan's past failures and the potential consequences of inaction. <br/>- **Paradox's Intervention**:  A mysterious figure named Paradox emphasizes Logan's past shortcomings and the need for his involvement.<br/>- **Deadpool's Persuasion**: Deadpool appeals to Logan's sense of responsibility and experience as an X-Man, urging him to help save the world. <br/>- **Action Sequence Tease**:  Deadpool hints at an upcoming action sequence with uncertain outcomes. <br/>- **Humorous Exchange with Blind Al**: Deadpool and Blind Al engage in a playful conversation about limitations imposed by film executives, referencing various drug-related slang and movie titles. <br/>

In [96]:
 prompt = '''
    Give me the sentiment of the video transcription followed by the overall summary. 
    Structure your response as markdown format, with the overall sentiment at the top followed by each supporting section summary and sentiment.
    It should be in chronological order. 
    The Overall sentiment should just be bolded and italicized, and the sub-sentiments should be formatted as dash list, with the sentiment bolded and description and summary non-bolded. 
    Do not repeat profanity words found in the transcription within your answer.
    Video transcription
    '''
transc_summary = generate_summary_response(final_transcript, prompt)

In [98]:
t = re.sub(r"\n", "<br/>", transc_summary)
t

"***Overall Sentiment: Humorous and Action-Packed***<br/> <br/> - **Humorous**: The dialogue is filled with witty banter, sarcasm, and pop culture references, creating a lighthearted and comedic tone. <br/> - **Action-Packed**: The video includes references to fight scenes and intense situations, suggesting a thrilling and fast-paced plot.<br/> <br/> * **Opening Scene**:<br/>     - **Confrontational**: The bartender's hostility towards Logan sets a tense atmosphere.<br/>     - **Humorous**: Deadpool's entrance and subsequent interaction with Logan injects humor into the scene.<br/> * **Deadpool's Plea**:<br/>     - **Desperate**: Deadpool's urgency and mention of losing everything he cares about indicate a dire situation. <br/>     - **Mysterious**: The reference to Logan's past and a potential failure adds an element of intrigue.<br/> * **Wolverine's Reluctance**:<br/>     - **Dismissive**:  Logan's initial resistance to Deadpool's request and self-proclaimed lack of heroism create a 

***Overall Sentiment: Humorous and Action-Packed***<br/> <br/> - **Humorous**: The dialogue is filled with witty banter, sarcasm, and pop culture references, creating a lighthearted and comedic tone. <br/> - **Action-Packed**: The video includes references to fight scenes and intense situations, suggesting a thrilling and fast-paced plot.<br/> <br/> * **Opening Scene**:<br/>     - **Confrontational**: The bartender's hostility towards Logan sets a tense atmosphere.<br/>     - **Humorous**: Deadpool's entrance and subsequent interaction with Logan injects humor into the scene.<br/> * **Deadpool's Plea**:<br/>     - **Desperate**: Deadpool's urgency and mention of losing everything he cares about indicate a dire situation. <br/>     - **Mysterious**: The reference to Logan's past and a potential failure adds an element of intrigue.<br/> * **Wolverine's Reluctance**:<br/>     - **Dismissive**:  Logan's initial resistance to Deadpool's request and self-proclaimed lack of heroism create a sense of conflict.<br/> * **Action and Excitement**:<br/>     - **Thrilling**: The mention of slow-motion action sequences and the uncertainty of life or death suggests exciting and high-stakes scenarios.<br/> * **Censorship and Humor**:<br/>     - **Humorous**: The conversation between Deadpool and Blind Al about censored content is filled with playful banter and witty references. <br/> 

In [None]:
prompt = '''
    Give me a summary of the video transcription. Organize it by topic, and then add the sentiment for each topic. 
    Structure your response as markdown format, with the overall topic and sentiment at the top followed by each supporting topic and sentiment.
    Topics and Sentiments should be in chronological order. 
    The Overall topic and sentiment should be bolded and italicized, and the sub-topics and sentiments should be formatted as a dash list, with the topic and sentiment bolded and description non-bolded. 
    Do not repeat profanity words found in the transcription within your answer.
    Video transcription
    '''

In [37]:
print(f"Video language is {video_language(transcript)}.")

Video language is English.


In [69]:
# whether it has been manually created or generated by YouTube
f"Transcription generated by Youtube {transcript.is_generated}\n\n"
# whether this transcript can be translated or not
f"Is it translatable? {transcript.is_translatable}\n\n"
# a list of languages the transcript can be translated to
f"List of languages it can be translated {transcript.translation_languages}\n\n"

"List of languages it can be translated [{'language': 'Afrikaans', 'language_code': 'af'}, {'language': 'Akan', 'language_code': 'ak'}, {'language': 'Albanian', 'language_code': 'sq'}, {'language': 'Amharic', 'language_code': 'am'}, {'language': 'Arabic', 'language_code': 'ar'}, {'language': 'Armenian', 'language_code': 'hy'}, {'language': 'Assamese', 'language_code': 'as'}, {'language': 'Aymara', 'language_code': 'ay'}, {'language': 'Azerbaijani', 'language_code': 'az'}, {'language': 'Bangla', 'language_code': 'bn'}, {'language': 'Basque', 'language_code': 'eu'}, {'language': 'Belarusian', 'language_code': 'be'}, {'language': 'Bhojpuri', 'language_code': 'bho'}, {'language': 'Bosnian', 'language_code': 'bs'}, {'language': 'Bulgarian', 'language_code': 'bg'}, {'language': 'Burmese', 'language_code': 'my'}, {'language': 'Catalan', 'language_code': 'ca'}, {'language': 'Cebuano', 'language_code': 'ceb'}, {'language': 'Chinese (Simplified)', 'language_code': 'zh-Hans'}, {'language': 'Chine

In [33]:
transcript = transcript_list.find_transcript(['en'])
translated_transcript = transcript.translate('es')
print(translated_transcript.fetch())

[{'text': 'BARTENDER\nTe lo dije, no eres bienvenido aquí.', 'start': 1.367, 'duration': 2.084}, {'text': 'No eres bienvenido a ningún lado.', 'start': 3.493, 'duration': 1.207}, {'text': 'Ahora, lárgate de mi bar.', 'start': 5.201, 'duration': 1.25}, {'text': 'LOGAN\nSólo dame un trago más', 'start': 6.575, 'duration': 1.167}, {'text': 'y luego me iré.', 'start': 7.784, 'duration': 1.0}, {'text': 'DEADPOOL\nHola, maní.', 'start': 10.658, 'duration': 0.793}, {'text': 'Voy a necesitar que\nvengas conmigo ahora mismo.', 'start': 11.534, 'duration': 2.0}, {'text': 'LOGAN\nMire señora, no me interesa.', 'start': 14.201, 'duration': 1.583}, {'text': 'DEADPOOL\nMuy bien, bueno, estoy como', 'start': 16.617, 'duration': 0.834}, {'text': 'en el tic-tic,\nasí que upsy-daisy, allá vamos.', 'start': 17.534, 'duration': 2.0}, {'text': 'LOGAN ¡\nVaya, oye, oye!', 'start': 19.575, 'duration': 0.834}, {'text': 'Consorcio Inactivo\nAh.', 'start': 21.825, 'duration': 0.709}, {'text': '[ __ ] de whisky 

In [32]:
print(transcript)

en ("English")[TRANSLATABLE]


In [34]:
transcript = transcript_list.find_transcript(['en'])

In [35]:
transcript

<youtube_transcript_api._transcripts.Transcript at 0x7fe702137130>

In [36]:
print(
    f"Video id is {transcript.video_id}\n\n",
    f"The language in the video is in {transcript.language}\n\n",
    # transcript.language_code,
    # whether it has been manually created or generated by YouTube
    f"Transcription manually or generated by Youtube {transcript.is_generated}\n\n",
    # whether this transcript can be translated or not
    f"Can it be translated? True or False -> {transcript.is_translatable}\n\n",
    # a list of languages the transcript can be translated to
    f"List of languages it can be translated {transcript.translation_languages}\n\n",
)

Video id is cen0rBKLuYE

 The language in the video is in English

 Transcription manually or generated by Youtube False

 Can it be translated? True or False -> True

 List of languages it can be translated [{'language': 'Afrikaans', 'language_code': 'af'}, {'language': 'Akan', 'language_code': 'ak'}, {'language': 'Albanian', 'language_code': 'sq'}, {'language': 'Amharic', 'language_code': 'am'}, {'language': 'Arabic', 'language_code': 'ar'}, {'language': 'Armenian', 'language_code': 'hy'}, {'language': 'Assamese', 'language_code': 'as'}, {'language': 'Aymara', 'language_code': 'ay'}, {'language': 'Azerbaijani', 'language_code': 'az'}, {'language': 'Bangla', 'language_code': 'bn'}, {'language': 'Basque', 'language_code': 'eu'}, {'language': 'Belarusian', 'language_code': 'be'}, {'language': 'Bhojpuri', 'language_code': 'bho'}, {'language': 'Bosnian', 'language_code': 'bs'}, {'language': 'Bulgarian', 'language_code': 'bg'}, {'language': 'Burmese', 'language_code': 'my'}, {'language': '

# Downloading Youtube videos using PyTube

In [35]:
pip install pytube

Collecting pytube
  Downloading pytube-15.0.0-py3-none-any.whl.metadata (5.0 kB)
Downloading pytube-15.0.0-py3-none-any.whl (57 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: pytube
Successfully installed pytube-15.0.0
Note: you may need to restart the kernel to use updated packages.


In [36]:
from pytube import YouTube

In [37]:
yt = YouTube(VIDEO_LINK_NO_SPEECH)

In [38]:
yt.title

'Charlie Chaplin - The Kid - Fight Scene'

In [40]:
yt.streams

[<Stream: itag="18" mime_type="video/mp4" res="360p" fps="25fps" vcodec="avc1.42001E" acodec="mp4a.40.2" progressive="True" type="video">, <Stream: itag="22" mime_type="video/mp4" res="720p" fps="25fps" vcodec="avc1.64001F" acodec="mp4a.40.2" progressive="True" type="video">, <Stream: itag="137" mime_type="video/mp4" res="1080p" fps="25fps" vcodec="avc1.640028" progressive="False" type="video">, <Stream: itag="248" mime_type="video/webm" res="1080p" fps="25fps" vcodec="vp9" progressive="False" type="video">, <Stream: itag="136" mime_type="video/mp4" res="720p" fps="25fps" vcodec="avc1.4d401f" progressive="False" type="video">, <Stream: itag="247" mime_type="video/webm" res="720p" fps="25fps" vcodec="vp9" progressive="False" type="video">, <Stream: itag="135" mime_type="video/mp4" res="480p" fps="25fps" vcodec="avc1.4d401e" progressive="False" type="video">, <Stream: itag="244" mime_type="video/webm" res="480p" fps="25fps" vcodec="vp9" progressive="False" type="video">, <Stream: itag="1

In [41]:
yt.streams.filter(file_extension='mp4')

[<Stream: itag="18" mime_type="video/mp4" res="360p" fps="25fps" vcodec="avc1.42001E" acodec="mp4a.40.2" progressive="True" type="video">, <Stream: itag="22" mime_type="video/mp4" res="720p" fps="25fps" vcodec="avc1.64001F" acodec="mp4a.40.2" progressive="True" type="video">, <Stream: itag="137" mime_type="video/mp4" res="1080p" fps="25fps" vcodec="avc1.640028" progressive="False" type="video">, <Stream: itag="136" mime_type="video/mp4" res="720p" fps="25fps" vcodec="avc1.4d401f" progressive="False" type="video">, <Stream: itag="135" mime_type="video/mp4" res="480p" fps="25fps" vcodec="avc1.4d401e" progressive="False" type="video">, <Stream: itag="134" mime_type="video/mp4" res="360p" fps="25fps" vcodec="avc1.4d401e" progressive="False" type="video">, <Stream: itag="133" mime_type="video/mp4" res="240p" fps="25fps" vcodec="avc1.4d4015" progressive="False" type="video">, <Stream: itag="160" mime_type="video/mp4" res="144p" fps="25fps" vcodec="avc1.4d400c" progressive="False" type="video

In [42]:
stream = yt.streams.get_by_itag(22)
stream.download()

'/home/jupyter/geminihackathon24/notebooks/Charlie Chaplin - The Kid - Fight Scene.mp4'

In [43]:
VIDEO_LOCATION = '/home/jupyter/geminihackathon24/notebooks/Charlie Chaplin - The Kid - Fight Scene.mp4'

# Creating a Cloud Storage bucket to save videos

In [8]:
os.getcwd()

'/home/jupyter/geminihackathon24/notebooks'

In [9]:
import os
from google.cloud import storage
# Required role Storage Admin: roles/storage.admin
# Limited role permissions
# storage.buckets.create
# storage.buckets.enableObjectRetention (only required if enabling object retention configurations for the bucket)
# storage.buckets.list (only required if creating a bucket using the Google Cloud console)
# resourcemanager.projects.get (only required if creating a bucket using the Google Cloud console)

# Authorization to access the client.
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = r"/home/jupyter/geminihackathon24/vertexai-gemini-hackathon-2024-key.json"

def gs_bucket_creation(bucket_name: str, location = "us"):
    """ Creating a Google Storage bucket in the US region with the coldline storage class """
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    bucket.storage_class = "COLDLINE"
    new_bucket = storage_client.create_bucket(bucket, location=location)
    print("Created bucket under the name {}.\n Its location is in {}.\n The storage class is {}.".format( new_bucket.name, new_bucket.location, new_bucket.storage_class))
    return new_bucket

In [10]:
gs_bucket_creation("youtube_videos")

RefreshError: ('invalid_grant: Invalid JWT Signature.', {'error': 'invalid_grant', 'error_description': 'Invalid JWT Signature.'})

In [None]:
def saving_on_gs(bucket_name, blob_name):
    """Write and read a blob from GCS using file-like IO"""
    # The ID of your GCS bucket
    # bucket_name = "your-bucket-name"

    # The ID of your new GCS object
    # blob_name = "storage-object-name"

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(blob_name)

    # Mode can be specified as wb/rb for bytes mode.
    # See: https://docs.python.org/3/library/io.html
    with blob.open("w") as f:
        f.write("Hello world")

In [44]:
result = video_transcription(VIDEO_LOCATION, PROMPT, PROJECT_ID, LOCATION)
result

InvalidArgument: 400 Unable to submit request because the fileUri parameter must be a Cloud Storage URI starting with 'gs://' but the entered value was '/home/jupyter/geminihackathon24/notebooks/Charlie Chaplin - The Kid - Fight Scene.mp4'. Update the fileUri value and try again. Learn more: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/gemini