# YouTube Shorts Recommender (Week 1)

This notebook implements the **YouTube Viral Finder** logic. 
It takes a YouTube URL, fetches the transcript, and uses a local LLM (Ollama) to recommend the best segments for Short-form content.

In [None]:
import re
from youtube_transcript_api import YouTubeTranscriptApi
from openai import OpenAI

# Configuration
OLLAMA_BASE_URL = "http://localhost:11434/v1"
MODEL = "gpt-oss:20b-cloud"

SYSTEM_PROMPT = """
You are an expert video editor and viral content strategist. 
Your goal is to analyze a video transcript and identify the BEST segments for short-form content (TikTok, Shorts, Reels).

For each segment you choose, provide:
1. **Timestamp**: Approximate start and end time (based on the text flow).
2. **Hook**: Why this part grabs attention immediately.
3. **Reasoning**: Why this specific part will go viral (e.g., humor, shock, strong opinion, useful tip).

Output your answer in clear Markdown.
"""

In [None]:
def extract_video_id(url):
    """
    Extracts the YouTube Video ID from a URL.
    Supports standard and short URLs.
    """
    regex = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*"
    match = re.search(regex, url)
    if match:
        return match.group(1)
    return None

def get_transcript(video_id):
    """
    Fetches the transcript for a given YouTube video ID.
    Tries English ('en') and Indonesian ('id').
    """
    try:
        languages = ['en', 'id']
        
        # Try standard method
        if hasattr(YouTubeTranscriptApi, 'get_transcript'):
            transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=languages)
        elif hasattr(YouTubeTranscriptApi, 'list_transcripts'):
            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id).find_transcript(languages).fetch()
        elif hasattr(YouTubeTranscriptApi, 'fetch'):
            try:
                transcript_list = YouTubeTranscriptApi().fetch(video_id, languages=languages)
            except TypeError:
                transcript_list = YouTubeTranscriptApi.fetch(video_id, languages=languages)
        else:
            return "Error: No valid method found to fetch transcript."

        formatted_text = ""
        for item in transcript_list:
            # Handle dict vs object
            if isinstance(item, dict):
                start = item['start']
                text = item['text']
            else:
                start = getattr(item, 'start', 0)
                text = getattr(item, 'text', "")
            
            start = int(start)
            minutes = start // 60
            seconds = start % 60
            formatted_text += f"[{minutes:02d}:{seconds:02d}] {text}\n"
            
        return formatted_text
    except Exception as e:
        return f"Error fetching transcript: {str(e)}"

In [None]:
def analyze_transcript(transcript_text):
    """
    Sends the transcript to Ollama for analysis.
    """
    client = OpenAI(base_url=OLLAMA_BASE_URL, api_key='ollama')
    
    # Limit to ~30 minutes (22,500 chars)
    truncated_text = transcript_text[:22500]
    
    user_message = f"""
    Here is the transcript of a YouTube video, with timestamps:
    
    {truncated_text}
    
    Please identify the Top 3 segments for a YouTube Short.
    """

    print("Sending to AI... (This may take a minute)")
    response = client.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": user_message}
        ]
    )
    return response.choices[0].message.content

In [None]:
# --- MAIN EXECUTION ---

# 1. Enter URL here
url = "https://www.youtube.com/watch?v=gs3VxACKogM"  # <--- Change this URL!

video_id = extract_video_id(url)
print(f"Video ID: {video_id}")

# 2. Get Transcript
transcript = get_transcript(video_id)

if transcript.startswith("Error"):
    print(transcript)
else:
    print(f"Transcript Length: {len(transcript)} characters")
    print("First 500 chars preview:")
    print(transcript[:500])

In [None]:
# 3. Analyze with AI
if not transcript.startswith("Error"):
    result = analyze_transcript(transcript)
    from IPython.display import Markdown
    display(Markdown(result))