In [11]:
import os
import yt_dlp
import openai

# Step 1: Ask for YouTube URL
youtube_url = input("Enter the YouTube video URL: ")

# Step 2: Define download options
download_dir = "downloads"
os.makedirs(download_dir, exist_ok=True)

output_template = os.path.join(download_dir, "%(title)s.%(ext)s")

ydl_opts = {
    'format': 'bestaudio/best',
    'outtmpl': output_template,
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'mp3',  # Convert to MP3
        'preferredquality': '192',  # High quality audio
    }],
}

# Step 3: Download and extract audio
try:
    print("Downloading audio...")

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(youtube_url, download=True)
        audio_file_path = ydl.prepare_filename(info_dict).replace('.webm', '.mp3').replace('.mp4', '.mp3')

    print(f"Audio file saved to: {audio_file_path}")

# we have to come back and fix for the case where the file size is larger than allowed by whisper: 
# APIStatusError: Error code: 413 - {'error': {'message': '413: Maximum content size limit (26214400) exceeded (26423588 bytes read)', 
#                                    'type': 'server_error', 'param': None, 'code': None}}

except Exception as e:
    print(f"Error downloading video: {e}")
    exit(1)




Enter the YouTube video URL:  https://www.youtube.com/watch?v=KrL2PIkJ1k0


Downloading audio...
[youtube] Extracting URL: https://www.youtube.com/watch?v=KrL2PIkJ1k0
[youtube] KrL2PIkJ1k0: Downloading webpage
[youtube] KrL2PIkJ1k0: Downloading tv client config
[youtube] KrL2PIkJ1k0: Downloading player f6e09c70
[youtube] KrL2PIkJ1k0: Downloading tv player API JSON
[youtube] KrL2PIkJ1k0: Downloading ios player API JSON
[youtube] KrL2PIkJ1k0: Downloading m3u8 information
[info] KrL2PIkJ1k0: Downloading 1 format(s): 251
[download] Destination: downloads\EUROPA LEAGUE LAST 16 PREDICTIONS.webm
[download] 100% of    9.45MiB in 00:00:01 at 9.41MiB/s   
[ExtractAudio] Destination: downloads\EUROPA LEAGUE LAST 16 PREDICTIONS.mp3
Deleting original file downloads\EUROPA LEAGUE LAST 16 PREDICTIONS.webm (pass -k to keep)
Audio file saved to: downloads\EUROPA LEAGUE LAST 16 PREDICTIONS.mp3


In [15]:
import openai
import os

def transcribe_audio(file_path):
    """Sends an MP3 file to OpenAI's Whisper API and returns the transcription."""
    api_key = os.getenv("OPENAI_API_KEY")  # Load API key securely

    if not api_key:
        raise ValueError("OpenAI API key is missing. Set the OPENAI_API_KEY environment variable.")

    print("Transcribing audio...")

    client = openai.OpenAI(api_key=api_key)  # New API Client
    with open(file_path, "rb") as audio_file:
        response = client.audio.transcriptions.create(
            model="whisper-1",
            file=audio_file
        )

    transcript = response.text  # Updated response structure
    return transcript



In [16]:
# Example usage:

print(f"Audio file to transcribe: {audio_file_path}")

transcribed_text = transcribe_audio(audio_file_path)

print("\nTranscription:\n", transcribed_text)

Audio file to transcribe: downloads\EUROPA LEAGUE LAST 16 PREDICTIONS.mp3
Transcribing audio...

Transcription:
 We are now into the round of 16 of the Europa League. Let's do some predictions. Hello guys and welcome back to yet another video. As I say we are predicting the first legs of the Europa League round of 16 and this is something we haven't done much on the channel so do let me know if you like this and be sure to support the video if you want to see more Europa League predictions. Let's go for 1,100 likes. It takes two seconds to hit the like button so do be sure to drop a like and hit the subscribe button if you want to see more of this stuff. Like I say I'm happy to do more. I've really enjoyed doing the Champions League predictions so let's have a go at doing some Europa League predictions. Be sure to share your predictions down in the comments as we go along and let's get into the games and we start with AZ Alkmaar versus Tottenham Hotspur and this one could be tasty. Alk

In [17]:
import openai
import os
import re

def extract_predictions(transcript):
    """Sends the transcription to GPT-4-Turbo and extracts predictions as a Python list."""
    
    api_key = os.getenv("OPENAI_API_KEY")  # Load API key

    if not api_key:
        raise ValueError("OpenAI API key is missing. Set the OPENAI_API_KEY environment variable.")

    print("Analyzing transcript for predictions...")

     # Improved prompt to extract better predictions
    prompt = f"""
    You are analyzing a conversation transcript. Your goal is to extract **clear, concrete predictions about the future**, 
    avoiding vague or uncertain statements.

    **What counts as a prediction?**
    - Statements that clearly express **what will happen**, **what is expected**, or **likely future outcomes**.
    - Example phrases: "will happen," "is expected to," "is likely to," "is projected to," "experts predict that," "data suggests that."

    **What to ignore?**
    - Unclear or subjective statements (e.g., "it's not over," "maybe," "we will see").
    - General reflections, opinions, or past events.

    **Transcript:**
    {transcript}

    **Task:**
    - Extract **up to 10 of the most important predictions** in a numbered list.
    - Ensure that each prediction is **specific, meaningful, and clearly about the future**.
    - If no predictions are found, respond with: "No clear predictions were made in this conversation."

    **Response Format Example:**
    1. The team is expected to switch to a defensive strategy in the next game.
    2. Analysts predict that inflation will decrease by 2% next quarter.
    3. AI adoption in healthcare will grow significantly in the next five years.
    4. The player is likely to miss the next match due to injury.
    5. Scientists anticipate a major breakthrough in battery technology by 2030.
    """

    # Send the request to OpenAI's GPT-4-Turbo
    client = openai.OpenAI(api_key=api_key)
    response = client.chat.completions.create(
        model="gpt-4-turbo",
        messages=[{"role": "system", "content": "You are an AI assistant that analyzes transcripts."},
                  {"role": "user", "content": prompt}],
        max_tokens=400
    )

    predictions_text = response.choices[0].message.content.strip()

    # Extract predictions using regex to capture numbered list format
    predictions_list = re.findall(r"\d+\.\s*(.*)", predictions_text)

    if not predictions_list:
        print("❌ No clear predictions found.")
        return []

    return predictions_list  # Returns a clean list of predictions




In [18]:
predictions = extract_predictions(transcribed_text)

print("\n✅ Extracted Predictions:")
for p in predictions:
    print(f"- {p}")


Analyzing transcript for predictions...

✅ Extracted Predictions:
- AZ Alkmaar is predicted to lose against Tottenham Hotspur, with a score of 0-1.
- Stuart Bucharest and Lyon are expected to draw, with a score of 1-1.
- Fenerbahce and Rangers are projected to draw, with a score of 1-1.
- Manchester United is anticipated to defeat Real Sociedad, with a predicted score of 2-1.
- Ajax is expected to win against Eintracht Frankfurt, with a score of 2-1.
- Olympiakos is predicted to win against Bodo Glimt, with a score of 2-0.
- Roma and Atletico Bilbao are expected to draw, with a score of 2-2.
- Lazio is anticipated to win against Victoria Pinson, with a predicted score of 2-1.


In [25]:
import os
import requests

def search_google(query):
    """Search Google using SerpAPI and return top search results with refined queries."""
    api_key = os.getenv("SERPAPI_KEY")
    if not api_key:
        raise ValueError("SerpAPI key is missing. Set the SERPAPI_KEY environment variable.")

    # Enhance the search query by appending contextual words for match outcomes
    refined_query = f"{query} final score OR match result OR who won OR game summary"

    url = "https://serpapi.com/search"
    params = {
        "q": refined_query,  # Use refined query
        "hl": "en",
        "gl": "us",
        "api_key": api_key
    }

    response = requests.get(url, params=params)
    response.raise_for_status()

    search_results = response.json()
    return [result["snippet"] for result in search_results.get("organic_results", [])[:3]]  # Get top 3 snippets



In [26]:
test_query = predictions[0]  # Change index to test another prediction
search_results = search_google(test_query)

print("\n🔍 Search Results for:", test_query)
for i, result in enumerate(search_results, start=1):
    print(f"{i}. {result}")



🔍 Search Results for: AZ Alkmaar is predicted to lose against Tottenham Hotspur, with a score of 0-1.
1. Bergvall own goal condemns Spurs to loss at AZ. Tottenham Hotspur suffered a 1-0 defeat at AZ Alkmaar in the first leg of their Europa League last-16 tie on ...
2. Lucas Bergvall's own goal led to a flat Tottenham losing the first leg of their Europa League last-16 tie at AZ Alkmaar.
3. But instead, we get inverted terroristball, where we panic in midfield and just ping the ball around in the final third against a 10-man block.


In [27]:
import openai

def verify_prediction(prediction, search_snippets):
    """Use GPT-4o to verify if a prediction is TRUE, FALSE, UNCLEAR, or NOT YET,
       and extract the key event from search results."""
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        raise ValueError("OpenAI API key is missing. Set the OPENAI_API_KEY environment variable.")

    client = openai.OpenAI(api_key=api_key)

    prompt = f"""
    You are verifying whether a prediction has come true using real-time Google search results.
    
    **Prediction:** "{prediction}"

    **Search Results:**
    {search_snippets}

    **Your Task:**
    1. **Summarize the key event from the search results** that confirms or contradicts the prediction. If the search results don't contain relevant details, say "No clear result found."
    2. **Classify the prediction as:**
       - **TRUE** → The event has definitively happened.
       - **FALSE** → The event did not happen.
       - **UNCLEAR** → There are conflicting sources, partial evidence, or no conclusive proof yet.
       - **NOT YET** → The event is in the future, and there is no evidence that it has happened.

    **Response Format Example:**
    Actual Result: "AZ Alkmaar won the game 1-0, contradicting the prediction."
    Rating: FALSE
    """

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "system", "content": "You are an AI that verifies predictions using real-time Google search results."},
                  {"role": "user", "content": prompt}],
        max_tokens=150  # Increased token limit for a better response
    )

    return response.choices[0].message.content.strip()



In [22]:
test_prediction = predictions[0]  # Change index to test another prediction
test_search_results = search_google(test_prediction)

print(f"\n🔍 Searching for: {test_prediction}")
for i, result in enumerate(test_search_results, start=1):
    print(f"{i}. {result}")

print("\n🤖 Asking GPT-4o to verify...")
verification_result = verify_prediction(test_prediction, test_search_results)

print(f"✅ Result: {test_prediction} → {verification_result}")



🔍 Searching for: AZ Alkmaar is predicted to lose against Tottenham Hotspur, with a score of 0-1.
1. The runs on and off the ball were non existent. AZ alkmaar is a good side and away games in Europe are always tough, but with the squad we have ...
2. That didn't happen today. Tottenham played one of their worst matches of the season, conceding a first half own goal from Lucas Bergvall, and ...
3. Spurs leave themselves with work to do​​ They will have to be at their best to overturn this 1-0 deficit to beat AZ and make it through to the ...

🤖 Asking GPT-4o to verify...
✅ Result: AZ Alkmaar is predicted to lose against Tottenham Hotspur, with a score of 0-1. → FALSE


In [28]:
verified_results = {}

for prediction in predictions:
    print(f"\n🔍 Searching for: {prediction}")
    search_snippets = search_google(prediction)

    if not search_snippets:
        verified_results[prediction] = {
            "actual": "No relevant search results found.",
            "rating": "UNCLEAR"
        }
        continue

    print("\n🤖 Asking GPT-4o to verify...")
    gpt_response = verify_prediction(prediction, search_snippets)

    # Extracting structured response
    response_lines = gpt_response.split("\n")
    actual_result = response_lines[0].replace("Actual Result:", "").strip()
    rating = response_lines[-1].replace("Rating:", "").strip()

    verified_results[prediction] = {
        "actual": actual_result,
        "rating": rating
    }

# Print the results in the requested format
print("\n✅ Final Verified Predictions:\n")
for i, (pred, details) in enumerate(verified_results.items(), start=1):
    print(f"Prediction {i}: {pred}")
    print(f"    Actual Result: {details['actual']}")
    print(f"    Rating: {details['rating']}\n")



🔍 Searching for: AZ Alkmaar is predicted to lose against Tottenham Hotspur, with a score of 0-1.

🤖 Asking GPT-4o to verify...

🔍 Searching for: Stuart Bucharest and Lyon are expected to draw, with a score of 1-1.

🤖 Asking GPT-4o to verify...

🔍 Searching for: Fenerbahce and Rangers are projected to draw, with a score of 1-1.

🤖 Asking GPT-4o to verify...

🔍 Searching for: Manchester United is anticipated to defeat Real Sociedad, with a predicted score of 2-1.

🤖 Asking GPT-4o to verify...

🔍 Searching for: Ajax is expected to win against Eintracht Frankfurt, with a score of 2-1.

🤖 Asking GPT-4o to verify...

🔍 Searching for: Olympiakos is predicted to win against Bodo Glimt, with a score of 2-0.

🤖 Asking GPT-4o to verify...

🔍 Searching for: Roma and Atletico Bilbao are expected to draw, with a score of 2-2.

🤖 Asking GPT-4o to verify...

🔍 Searching for: Lazio is anticipated to win against Victoria Pinson, with a predicted score of 2-1.

🤖 Asking GPT-4o to verify...

✅ Final Veri