In [1]:
import os 
import requests

import matplotlib.pyplot as plt
import pandas as pd
import yt_dlp

DEFAULT_API_URL = api_url = "https://agents-course-unit4-scoring.hf.space"
questions_url = f"{api_url}/questions" 
files_url = f"{api_url}/files" 

### Downloading questions

In [2]:
print(f"Fetching questions from: {questions_url}")
try:
    response = requests.get(questions_url, timeout=15)
    response.raise_for_status()
    files_data = response.json()
    if not files_data:
            print("Fetched questions list is empty.")
    print(f"Fetched {len(files_data)} questions.")
except requests.exceptions.RequestException as e:
    print(f"Error fetching questions: {e}")
except requests.exceptions.JSONDecodeError as e:
        print(f"Error decoding JSON response from questions endpoint: {e}")
        print(f"Response text: {response.text[:500]}")
except Exception as e:
    print(f"An unexpected error occurred fetching questions: {e}")

Fetching questions from: https://agents-course-unit4-scoring.hf.space/questions
Fetched 20 questions.


In [3]:
for item in files_data:
    task_id = item.get("task_id")
    question_text = item.get("question")
    if not task_id or question_text is None:
        print(f"Skipping item with missing task_id or question: {item}")
    else:
        print("Taks ID for the question: ", task_id)
        print(question_text,end="\n========================================================================\n")

Taks ID for the question:  8e867cd7-cff9-4e6c-867a-ff5ddc2550be
How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.
Taks ID for the question:  a1e91b78-d3d8-4675-bb8d-62741b4b68a6
In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?
Taks ID for the question:  2d83110e-a098-4ebb-9987-066c06fa42d0
.rewsna eht sa "tfel" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI
Taks ID for the question:  cca530fc-4052-43b2-b130-b30968d8aa44
Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation.
Taks ID for the question:  4fc2f1ae-8625-45b5-ab34-ad4433bc21f8
Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in November 2016?
Taks ID for the

### Working with Audio

In [None]:
class FailedToDownloadAudio(Exception):
    pass

def download_audio_mp3(file_path:str, url:str, target_height:int=144):
    """ Primarily configured and used for YouTube. """
    file_path = f'{file_path}'
    ydl_opts = {
    'format': 'bestaudio/best',  # Best audio quality
    'outtmpl': file_path,
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'mp3',  # Or 'aac', 'm4a', etc.
        'preferredquality': '192',  # Bitrate (e.g., 192k)
        }],
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=False)
        ydl.download([url])

    return file_path, info

def download_audio(file_name: str, url: str) -> tuple[str, dict[str,str]]:
    file_path_no_extension = os.path.abspath(file_name)
    file_path, meta = download_audio_mp3(file_path_no_extension, url)
    print(f"Downloaded file to {file_path} with meta info {meta}.")
    
    return file_path, meta


In [None]:
print(f"Fetching files from: {files_url}")
task_id = "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3"
file_url = f"{files_url}/{task_id}"
file_type = "audio"
file_name = "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3"
try:
    if not response.content:
        raise ValueError("Fetched files list is empty.")
    match file_type.lower(): 
        case "audio":
            file_path, meta = download_audio(file_name, file_url)
        case _:
            raise NotImplementedError("this file type is not implemented")
    print("Downloaded the file: ", file_path)
except requests.exceptions.RequestException as e:
    print(f"Error fetching files: {e}")
except Exception as e:
    print(f"An unexpected error occurred fetching questions: {e}")

In [None]:
import whisper

audio_transcriber = whisper.load_model("tiny")
transcription = audio_transcriber.transcribe("99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3")

### Working with Video

In [None]:
class FailedToDownloadVideo(Exception):
    pass 

def download_video_mp4(file_path:str, url:str, target_height:int=730):
    """ Primarily configured and used for YouTube. """
    file_path = f'{file_path}.mp4'
    ydl_opts = {
        'format': f'bestvideo[height<={target_height}][ext=mp4]',
        'outtmpl': file_path,
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=False)
        ydl.download([url])

    return file_path, info
    
def download_video(file_name: str, url: str) -> tuple[str, dict[str,str]]:
    file_path_no_extension = os.path.abspath(file_name)
    file_path, meta = download_video_mp4(file_path_no_extension, url)
    print(f"Downloaded file to {file_path} with meta info {meta}.")

    return file_path, meta

In [None]:
print(f"Fetching files from: {files_url}")
task_id = "a1e91b78-d3d8-4675-bb8d-62741b4b68a6"
file_url = "https://www.youtube.com/watch?v=L1vXCYZAYYM"
file_type = "video"
file_name = "a1e91b78-d3d8-4675-bb8d-62741b4b68a6"
try:
    match file_type.lower(): 
        case "audio":
            file_path, meta = download_audio(file_name, file_url)
        case "video":
            file_path, meta = download_video(file_name, file_url)
        case _:
            raise NotImplementedError("this file type is not implemented")
    print("Downloaded the file: ", file_path)
except requests.exceptions.RequestException as e:
    print(f"Error fetching files: {e}")
except Exception as e:
    print(f"An unexpected error occurred fetching questions: {e}")

In [None]:
import cv2 

# Capture frames from the video
def capture_frames(file_path: str, output_folder_prefix: str = "frames") -> str: 
    file_name = file_path.split("/")[-1]
    output_folder = os.path.join(output_folder_prefix, file_name)
    os.makedirs(output_folder, exist_ok=False)

    captioner = cv2.VideoCapture(file_path)
    # Reads the metadata of the video file 
    # If the FPS is variable (VFR) instead of constant, this value is just an average, not the exact number for each second
    fps = int(captioner.get(cv2.CAP_PROP_FPS))      
    frame_counter = 0 
    continue_flag = True
    while continue_flag:
        continue_flag, frame = captioner.read()
        if not continue_flag:
            break 

        if frame_counter % fps == 0:
            frame_file_name = os.path.join(output_folder, f"frame_{frame_counter}.jpg")
            cv2.imwrite(frame_file_name, frame)

        frame_counter+=1
    
    return output_folder

video_file_path = "/Users/maksim.rostov/pdev/freestyling/agents/hf-course/unit4_general_agent/a1e91b78-d3d8-4675-bb8d-62741b4b68a6.mp4"
video_frames_folder = capture_frames(video_file_path)

In [None]:
from ultralytics import YOLO


def plot_annotated_image_ultralytics(result):
    # The plot() method returns a BGR numpy array of the image with predictions
    bgr_array = result.plot()
    rgb_array = cv2.cvtColor(bgr_array, cv2.COLOR_BGR2RGB)
    plt.imshow(rgb_array)
    plt.axis("off")
    plt.show()    

def count_ultralytics_class(result, detect_class_name: str = "bird", threshold_confidence_lvl=0):
    class_count = 0 
    for box in result.boxes:
        class_id = int(box.cls[0])
        confidence_lvl = float(box.conf[0])
        class_name = model.names[class_id]
        if class_name == detect_class_name and confidence_lvl > threshold_confidence_lvl:
            class_count += 1
             
    return class_count

video_frames_folder = "/Users/maksim.rostov/pdev/freestyling/agents/hf-course/unit4_general_agent/frames/a1e91b78-d3d8-4675-bb8d-62741b4b68a6.mp4"
frames = sorted([f for f in os.listdir(video_frames_folder) if f.endswith(('.png', '.jpg', '.jpeg'))])
    
model = YOLO('yolov8n.pt')

frame_birds_counts = dict()
for frame in frames:
    frame_file_path = os.path.join(video_frames_folder, frame)

    # Since we give only one image at the time, just take the first result
    result = model(frame_file_path)[0]
    count = count_ultralytics_class(result)
    frame_birds_counts[frame] = count

In [None]:
max_key = max(frame_birds_counts, key=frame_birds_counts.get)
max_value = max(frame_birds_counts.values())

print("Highest count of birds amongst all frames is ", max_key, "with the value ", max_value)