<a href="https://colab.research.google.com/github/borjasolerme/AI-YouTube-Trends-Researcher/blob/main/Claude_youtube_trends_researcher.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install youtube-search-python
!pip install anthropic
!pip install requests
!pip install datetime
!pip install python-docx

In [92]:
import asyncio
import re
import requests
from youtubesearchpython import CustomSearch, VideosSearch
import anthropic
import nest_asyncio
import time
import logging
from datetime import datetime, timedelta
from google.colab import files
from docx import Document
from collections import defaultdict

nest_asyncio.apply()  # Apply nest_asyncio to allow nested event loops

ANTHROPIC_API_KEY = "YOUR_API_KEY_HERE"  # Replace with your Anthropic API key

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def remove_first_line(test_string):
    # Remove the first line of the response if it starts with "Here" and ends with a colon
    if test_string.startswith("Here") and test_string.split("\n")[0].strip().endswith(":"):
        return re.sub(r'^.*\n', '', test_string, count=1)
    return test_string

def generate_text(prompt, model="claude-3-haiku-20240307", max_tokens=2000, temperature=0.7):
    # Generate text using the Anthropic API
    headers = {
        "x-api-key": ANTHROPIC_API_KEY,
        "anthropic-version": "2023-06-01",
        "content-type": "application/json"
    }
    data = {
        "model": model,
        "max_tokens": max_tokens,
        "temperature": temperature,
        "system": "You are a world-class researcher. Analyze the given information and generate a well-structured report.",
        "messages": [{"role": "user", "content": prompt}],
    }
    try:
        response = requests.post("https://api.anthropic.com/v1/messages", headers=headers, json=data)
        response.raise_for_status()
        response_text = response.json()['content'][0]['text']
        return remove_first_line(response_text.strip())
    except requests.exceptions.RequestException as e:
        logger.error(f"Error generating text: {str(e)}")
        return None

def get_top_topics(trending_videos, count=30):
    topic_count = defaultdict(lambda: {'count': 0, 'views': 0})

    for video in trending_videos:
        if 'descriptionSnippet' in video and video['descriptionSnippet']:
            description = video['descriptionSnippet'][0]['text']
            words = re.findall(r'\w+', description)
            for word in words:
                topic_count[word]['count'] += 1
                topic_count[word]['views'] += int(video['viewCount']['text'].replace(',', ''))

    sorted_topics = sorted(topic_count.items(), key=lambda x: x[1]['count'], reverse=True)
    return [(topic, data['count'], data['views']) for topic, data in sorted_topics[:count]]

def rank_video_trends(trending_videos):
    titles = [video['title'] for video in trending_videos]
    views = [int(video['viewCount']['text'].replace(',', '').replace(' views', '')) if video.get('viewCount') and video['viewCount'].get('text') else 0 for video in trending_videos]
    prompt = f"""
    I am going to give you a list of videos from Youtube along with their view counts. After you will need to analyze them and rank the videos by topics.

    <examples>
    {format_examples_with_views(titles, views)}
    </examples>

    <instructions>
    Analyze the video titles and view counts, then perform the following tasks:
    - Identify the main topic or theme of each video based on the video titles.
    - Group the videos by their identified topics.
    - Use the most relevant keywords or phrases from the video titles to name each topic.
    - Rank the grouped topics from most repeated to least repeated.
    - Include the total view count for each topic.
    - Give me the output directly without additional comments like in the <output_format> tags
    </instructions>

    <output_format>
    Trends:
    1. [Topic 1]: [Count], [Total Views]
    - [Video Title 1] ([View Count 1])
    - [Video Title 2] ([View Count 2])
    ...
    2. [Topic 2]: [Count], [Total Views]
    - [Video Title 1] ([View Count 1])
    - [Video Title 2] ([View Count 2])
    ...
    ...
    </output_format>

    <prompt>
    Perform the analysis and provide the output in the specified format.
    </prompt>

    Go step by step. Take your time and consider each step carefully to develop the best possible answer.
    """

    ranked_trends = generate_text(prompt)
    return ranked_trends

def generate_mini_report(topic, trending_videos, ranked_trends):
    titles = [video['title'] for video in trending_videos]
    views = [video['viewCount']['text'] for video in trending_videos]

    prompt = f"""
    I am going to give you a list of videos from Youtube and ranked trends from those videos. After you will need to analyze them and generate a report.

    <examples>
    {format_examples_with_views(titles, views)}
    </examples>

    <ranked_trends>
    {ranked_trends}
    </ranked_trends>

    <context>
    Analyze the trending videos and ranked video trends to identify the key trends and insights related to the topic '{topic}'. Generate a report similar to McKinsey and Boston Consulting Group reports.
    </context>

    <instructions>
    - Summarize the main trends and patterns observed in the trending videos and ranked video trends.
    - Highlight the videos with the highest view counts and discuss their impact on the overall trends.
    - Provide insights on how to benefit from the observed trends and create alternative content following the successful patterns.
    - Include actionable recommendations based on the analysis, focusing on leveraging the most viewed videos and their characteristics.
    - Give me the output directly without additional comments like in the <output_format> tags
    </instructions>

    <prompt>
    Generate a comprehensive and data-driven report on the top trends related to the given topic based on the trending YouTube videos and ranked video trends.
    </prompt>

    <output_format>
    Title page
    Table of Contents
    Abstract
    Key findings
    Trends rising
    Opportunities for content creation
    Topics to avoid
    Conclusion/recommendations
    </output_format>

    Go step by step. Take your time and consider each step carefully to develop the best possible answer.
    """

    mini_report = generate_text(prompt)
    return mini_report

async def generate_video_ideas(topic, examples, views, mini_report, num_ideas=5):
    prompt = f"""
    I am going to give you a list of videos from Youtube and a report I have generated. After you will need to give me new video ideas to create.

    <examples>
    {format_examples_with_views(examples, views)}
    </examples>

    <mini_report>
    {mini_report}
    </mini_report>

    <context>
    Use the insights from the trending videos, especially the ones with the highest view counts, and the mini report to inspire new video ideas that are likely to perform well and attract a similar audience.
    </context>

    <instructions>
    - Generate {num_ideas} distinct video ideas that are similar in style and format to the provided examples.
    - Each idea should be creative, engaging, and closely related to the topic and the trending videos.
    - Provide a title for each video idea, following the patterns observed in the examples.
    - Give me the output directly without additional comments like in the <output_format> tags
    </instructions>

    <prompt>
    Generate {num_ideas} video ideas related to the given topic based on the trending YouTube videos and the mini report.
    </prompt>

    <output_format>
    1. [Video title]
    2. [Video title]
    3. [Video title]
    ...
    ...
    </output_format>

    Go step by step. Take your time and consider each step carefully to develop the best possible answer.
    """

    video_ideas = generate_text(prompt)
    if video_ideas:
        video_ideas = video_ideas.split("\n")
        return [idea.strip() for idea in video_ideas if idea.strip()]
    else:
        return []

def format_examples_with_views(examples, views):
    formatted_examples = ""
    for example, view_count in zip(examples, views):
        formatted_examples += f"- {example} ({view_count} views)\n"
    return formatted_examples

def generate_document_content(mini_report, video_ideas, trending_videos):
    document = "Mini Report:\n"
    document += mini_report + "\n\n"

    document += "Most Viewed Trending Videos:\n"
    sorted_videos = sorted(trending_videos, key=lambda x: int(x['viewCount']['text'].replace(',', '').replace(' views', '')) if x.get('viewCount') and x['viewCount'].get('text') else 0, reverse=True)
    for video in sorted_videos[:5]:
        document += f"- {video['title']} ({video['viewCount']['text']} views) - {video['link']}\n"
    document += "\n"

    document += "Video Ideas (Inspired by Trending Videos):\n"
    for i, idea in enumerate(video_ideas, start=1):
        document += f"{i}. {idea}\n"
    document += "\n"

    document += "All Trending YouTube Videos:\n"
    for video in trending_videos:
        document += f"- {video['title']} ({video['viewCount']['text']} views) - {video['link']}\n"

    return document

async def main():
    topic = input("Enter a topic to generate video ideas for: ")
    num_ideas = int(input("Enter the number of video ideas to generate: "))

    # Perform the search without date range filter
    videosSearch = VideosSearch(topic, limit=40)
    trending_videos = videosSearch.result()['result']

    ranked_trends = rank_video_trends(trending_videos)
    print("\nRanked Video Trends:")
    print(ranked_trends)

    mini_report = generate_mini_report(topic, trending_videos, ranked_trends)
    video_ideas = await generate_video_ideas(topic, [video['title'] for video in trending_videos], [video['viewCount']['text'] for video in trending_videos], mini_report, num_ideas)

    document_content = generate_document_content(mini_report, video_ideas, trending_videos)

    print("\nMini Report:")
    print(mini_report)

    print("\nVideo Ideas:")
    for idea in video_ideas:
        print(idea)

    print("\nTrending YouTube Videos:")
    for video in trending_videos:
        print(f"- {video['title']} ({video['viewCount']['text']} views) - {video['link']}")

    # Generate a unique filename based on the topic and current timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"{topic}_{timestamp}.docx"

    # Create a new Word document
    doc = Document()

    # Add the document content to the Word document
    doc.add_paragraph(document_content)

    # Save the Word document
    doc.save(filename)

    print(f"\nDocument saved as: {filename}")

    files.download(filename)

if __name__ == "__main__":
    asyncio.get_event_loop().run_until_complete(main())


Enter a topic to generate video ideas for: formula 1
Enter the number of video ideas to generate: 10

Ranked Video Trends:
Trends:
1. Formula 1 Race Highlights: 5, 19,966,659 views
- Race Highlights | 2024 Japanese Grand Prix (5,420,016 views)
- Race Highlights | 2024 Australian Grand Prix (8,577,670 views)
- Race Highlights | 2024 Saudi Arabian Grand Prix (5,968,303 views)
- Qualifying Highlights | 2024 Japanese Grand Prix (3,419,964 views)
- Qualifying Highlights | 2024 Saudi Arabian Grand Prix (4,080,052 views)

2. Formula 1 News and Analysis: 6, 1,628,789 views
- Seven things the Japanese GP revealed about F1 2024 (475,859 views)
- The brutal cost of Williams's shocking F1 crash streak (344,307 views)
- HUGE TENSION At Mercedes After Hamilton's FURIOUS MESSAGE! (64,208 views)
- Aston Martin JUST DESTROYED Stroll with SHOCKING STATEMENT! (54,003 views)
- Audi busca a Checo y Sainz pero Red Bull le cierra la puerta | ¡Ojo! Mercado de pilotos se mueve (40,179 views)
- WHY 2026 is a cr

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>