In [19]:
import os
import yt_dlp
import openai

# Step 1: Ask for YouTube URL
youtube_url = input("Enter the YouTube video URL: ")

# Step 2: Define download options
download_dir = "downloads"
os.makedirs(download_dir, exist_ok=True)

output_template = os.path.join(download_dir, "%(title)s.%(ext)s")

ydl_opts = {
    'format': 'bestaudio/best',
    'outtmpl': output_template,
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'mp3',  # Convert to MP3
        'preferredquality': '192',  # High quality audio
    }],
}

# Step 3: Download and extract audio
try:
    print("Downloading audio...")

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(youtube_url, download=True)
        audio_file_path = ydl.prepare_filename(info_dict).replace('.webm', '.mp3').replace('.mp4', '.mp3')

    print(f"Audio file saved to: {audio_file_path}")

# we have to come back and fix for the case where the file size is larger than allowed by whisper: 
# APIStatusError: Error code: 413 - {'error': {'message': '413: Maximum content size limit (26214400) exceeded (26423588 bytes read)', 
#                                    'type': 'server_error', 'param': None, 'code': None}}

except Exception as e:
    print(f"Error downloading video: {e}")
    exit(1)




Enter the YouTube video URL:  https://www.youtube.com/watch?v=KrL2PIkJ1k0


Downloading audio...
[youtube] Extracting URL: https://www.youtube.com/watch?v=KrL2PIkJ1k0
[youtube] KrL2PIkJ1k0: Downloading webpage
[youtube] KrL2PIkJ1k0: Downloading tv client config
[youtube] KrL2PIkJ1k0: Downloading player f6e09c70
[youtube] KrL2PIkJ1k0: Downloading tv player API JSON
[youtube] KrL2PIkJ1k0: Downloading ios player API JSON
[youtube] KrL2PIkJ1k0: Downloading m3u8 information
[info] KrL2PIkJ1k0: Downloading 1 format(s): 251
[download] Destination: downloads\EUROPA LEAGUE LAST 16 PREDICTIONS.webm
[download] 100% of    9.45MiB in 00:00:00 at 35.98MiB/s  
[ExtractAudio] Destination: downloads\EUROPA LEAGUE LAST 16 PREDICTIONS.mp3
Deleting original file downloads\EUROPA LEAGUE LAST 16 PREDICTIONS.webm (pass -k to keep)
Audio file saved to: downloads\EUROPA LEAGUE LAST 16 PREDICTIONS.mp3


In [20]:
import openai
import os

def transcribe_audio(file_path):
    """Sends an MP3 file to OpenAI's Whisper API and returns the transcription."""
    api_key = os.getenv("OPENAI_API_KEY")  # Load API key securely

    if not api_key:
        raise ValueError("OpenAI API key is missing. Set the OPENAI_API_KEY environment variable.")

    print("Transcribing audio...")

    client = openai.OpenAI(api_key=api_key)  # New API Client
    with open(file_path, "rb") as audio_file:
        response = client.audio.transcriptions.create(
            model="whisper-1",
            file=audio_file
        )

    transcript = response.text  # Updated response structure
    return transcript



In [21]:
# Example usage:

print(f"Audio file to transcribe: {audio_file_path}")

transcribed_text = transcribe_audio(audio_file_path)

print("\nTranscription:\n", transcribed_text)

Audio file to transcribe: downloads\EUROPA LEAGUE LAST 16 PREDICTIONS.mp3
Transcribing audio...

Transcription:
 We are now into the round of 16 of the Europa League. Let's do some predictions. Hello guys and welcome back to yet another video. As I say we are predicting the first legs of the Europa League round of 16 and this is something we haven't done much on the channel so do let me know if you like this and be sure to support the video if you want to see more Europa League predictions. Let's go for 1,100 likes. It takes two seconds to hit the like button so do be sure to drop a like and hit the subscribe button if you want to see more of this stuff. Like I say I'm happy to do more. I've really enjoyed doing the Champions League predictions so let's have a go at doing some Europa League predictions. Be sure to share your predictions down in the comments as we go along and let's get into the games and we start with AZ Alkmaar versus Tottenham Hotspur and this one could be tasty. Alk

In [23]:
import openai
import os

def extract_predictions(transcript):
    """Sends the transcription to GPT-4-Turbo and extracts predictions about the future."""
    
    api_key = os.getenv("OPENAI_API_KEY")  # Load API key

    if not api_key:
        raise ValueError("OpenAI API key is missing. Set the OPENAI_API_KEY environment variable.")

    print("Analyzing transcript for predictions...")

     # Improved prompt to extract better predictions
    prompt = f"""
    You are analyzing a conversation transcript. Your goal is to extract **clear, concrete predictions about the future**, 
    avoiding vague or uncertain statements.

    **What counts as a prediction?**
    - Statements that clearly express **what will happen**, **what is expected**, or **likely future outcomes**.
    - Example phrases: "will happen," "is expected to," "is likely to," "is projected to," "experts predict that," "data suggests that."

    **What to ignore?**
    - Unclear or subjective statements (e.g., "it's not over," "maybe," "we will see").
    - General reflections, opinions, or past events.

    **Transcript:**
    {transcript}

    **Task:**
    - Extract **up to 10 of the most important predictions** in a numbered list.
    - Ensure that each prediction is **specific, meaningful, and clearly about the future**.
    - If no predictions are found, respond with: "No clear predictions were made in this conversation."

    **Response Format Example:**
    1. The team is expected to switch to a defensive strategy in the next game.
    2. Analysts predict that inflation will decrease by 2% next quarter.
    3. AI adoption in healthcare will grow significantly in the next five years.
    4. The player is likely to miss the next match due to injury.
    5. Scientists anticipate a major breakthrough in battery technology by 2030.
    """

    # Send the request to OpenAI's GPT-4-Turbo
    client = openai.OpenAI(api_key=api_key)
    response = client.chat.completions.create(
        model="gpt-4-turbo",
        messages=[{"role": "system", "content": "You are an AI assistant that analyzes transcripts."},
                  {"role": "user", "content": prompt}],
        max_tokens=300
    )

    return response.choices[0].message.content.strip()



In [24]:
predictions = extract_predictions(transcribed_text)

print("\nExtracted Predictions:\n", predictions)


Analyzing transcript for predictions...

Extracted Predictions:
 1. Tottenham will win against AZ Alkmaar with a predicted score of 1-0.
2. Stuart Bucharest and Lyon are predicted to draw 1-1 in their match.
3. Fenerbahce and Rangers are expected to draw 1-1 in their game.
4. Manchester United is predicted to win against Real Sociedad with a score of 2-1.
5. Ajax is expected to defeat Eintracht Frankfurt with a score of 2-1.
6. Olympiakos is predicted to win against Bodo Glimt with a score of 2-0.
7. Roma and Atletico Bilbao are predicted to draw 2-2 in their match.
8. Lazio is expected to win against Victoria Pinson with a score of 2-1.
