<!-- # Content Planning and Publishing Crew -->

<!-- This notebook demonstrates how to create an AI crew for planning and publishing content using CrewAI Flows.
The crew will take a link to blog post, download content as markdown using firecrawl, analyze it and generate a twitter thread and schedule it on Typefully. -->

<!-- ### Initialization and Setup
Initial imports for the CrewAI Flow and Crew and setting up the environment -->

In [1]:
# Importing necessary libraries
import getpass
import os
import datetime
import uuid
import yaml
import json
import subprocess
from pathlib import Path
import pydantic
from pydantic import BaseModel
from typing import Optional

# Firecrawl SDK
from firecrawl import FirecrawlApp

# Typefully scheduler
import scheduler

# Importing Crew related components
from crewai import Agent, Task, Crew, LLM

# Importing CrewAI Flow related components
from crewai.flow import Flow, listen, start, router, or_

from dotenv import load_dotenv
load_dotenv()
# Apply a patch to allow nested asyncio loops in Jupyter
import nest_asyncio
nest_asyncio.apply()

<!-- ## Setup LLM

Make sure you have ollama installed and running on your machine -->

In [2]:
#By default, the llm is set to openai
#  llm = LLM(
#     model="ollama/llama3.2",
#     base_url="http://localhost:11434"
# )

<!-- # Blog Post URL -->

In [2]:
blog_post_url ="https://www.firecrawl.dev/blog/ai-powered-web-scraping-solutions-2025"

<!-- ## Plan for our Flow

1. Scrape the blog post
2. Decode where to post using a router
3. Kickoff the right **[Crew of Agents]** to prepare a draft ready to publish
4. Publish it using typefully -->

<!-- # Twitter Thread Planning Crew

This structure will be used to capture the output of the planning crew which will be used to create the twitter thread and schedule it on Typefully. -->

In [4]:
class Tweet(BaseModel):
    """Represents an individual tweet in a thread"""
    content: str
    is_hook: bool = False  # Identifies if this is the opening/hook tweet
    media_urls: Optional[list[str]] = []  # Optional media attachments (images, code snippets)

class Thread(BaseModel):
    """Represents a Twitter thread"""
    topic: str  # Main topic/subject of the thread
    tweets: list[Tweet]  # List of tweets in the thread

In [5]:
from crewai_tools import (
    DirectoryReadTool,
    FileReadTool,
)

# Load agent and task configurations from YAML files
with open('config/planner_agents.yaml', 'r') as f:
    agents_config = yaml.safe_load(f)

with open('config/planner_tasks.yaml', 'r') as f:
    tasks_config = yaml.safe_load(f)

/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pydantic/_internal/_config.py:295: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  warn(
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/crewai_tools/tools/scrapegraph_scrape_tool/scrapegraph_scrape_tool.py:34: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/
  @validator("website_url")
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/crewai_tools/tools/selenium_scraping_tool/selenium_scrapin

In [6]:
draft_analyzer = Agent(config=agents_config['draft_analyzer'], tools=[
    DirectoryReadTool(),
    FileReadTool()
])
twitter_thread_planner = Agent(config=agents_config['twitter_thread_planner'], tools=[
    DirectoryReadTool(),
    FileReadTool()
])

analyze_draft = Task(
  config=tasks_config['analyze_draft'],
  agent=draft_analyzer
)
create_twitter_thread_plan = Task(
  config=tasks_config['create_twitter_thread_plan'],
  agent=twitter_thread_planner,
  output_pydantic=Thread
)

planning_crew = Crew(
    agents=[draft_analyzer, twitter_thread_planner],
    tasks=[analyze_draft, create_twitter_thread_plan],
    verbose=False
)

<!-- # LinkedIn Post Planning Crew -->

<!-- # Create Content Planning Flow

A Flow to create the content planning for twitter and linkedin using separate crews for twitter and linkedin -->

In [8]:
from crewai.flow.flow import Flow, listen, start, router, or_
import re
class ContentPlanningState(BaseModel):
  """
  State for the content planning flow
  """
  blog_post_url: str = blog_post_url
  draft_path: Path = "workdir/"
  post_type: str = "twitter"
  path_to_example_threads: str = "workdir/example_thread.txt"



class CreateContentPlanningFlow(Flow[ContentPlanningState]):
  # Scrape the blog post  
  # No need for AI Agents on this step, so we just use regular Python code
  @start()
  def scrape_blog_post(self):
    print(f"# fetching draft from: {self.state.blog_post_url}")
    app = FirecrawlApp(api_key=os.getenv("FIRECRAWL_API_KEY"))
    scrape_result = app.scrape_url(self.state.blog_post_url, formats=['markdown'])
    markdown = scrape_result['markdown'] if isinstance(scrape_result, dict) else scrape_result.markdown
    os.makedirs('workdir', exist_ok=True)  # <-- Add this line
    self.state.draft_path = f'workdir/myblog.md'
    with open(self.state.draft_path, 'w') as f:
        f.write(markdown)


  @listen(scrape_blog_post)
  def twitter_draft(self):
    print(f"# Planning content for: {self.state.draft_path}")
    result = planning_crew.kickoff(inputs={'draft_path': self.state.draft_path,
                                           'path_to_example_threads': self.state.path_to_example_threads})
    print(f"# Planned content for {self.state.draft_path}:")
    for tweet in result.pydantic.tweets:
        print(f"    - {tweet.content}")
    return result
  
  @listen(twitter_draft)
  def publish(self, plan):
    print(f"# Publishing thread for: {self.state.draft_path}")
    ## Schedule for 1 hour from now    
    response = scheduler.schedule(
        thread_model=plan,
        post_type=self.state.post_type
    )
    print(f"# Thread created for: {self.state.draft_path}")
    print(f"Here's the link to the created draft: {response['share_url']}")



<!-- Implementing helper methods to plot and execute the flow in a Jupyter notebook -->

In [9]:
flow = CreateContentPlanningFlow()
flow.kickoff()

[1m[35m Flow started with ID: 8112d170-8f67-4814-b088-acdcfd111147[00m


# fetching draft from: https://www.firecrawl.dev/blog/ai-powered-web-scraping-solutions-2025


# Planning content for: workdir/myblog.md
# Planned content for workdir/myblog.md:
    - Top 7 AI-Powered Web Scraping Solutions in 2025
    - What is AI-Powered Web Scraping? 🤖

It's a game-changer! Users can now communicate data needs in plain language, allowing complex web structures to be navigated easily. A huge leap from traditional methods! #WebScraping #AI
    - Leading AI Web Scraping Solutions 🌟

Discover tools that handle JavaScript, manage proxies automatically, and process natural language seamlessly—making data extraction accessible to everyone! #TechInnovation
    - Why Firecrawl is the Best Choice 🔥

Advanced features like real-time adaptation, anti-bot countermeasures, and a developer-friendly API set it apart in the world of AI scraping! #DataExtraction
    - Other Notable AI Scraping Tools 🚀

Explore ScrapingBee, Import.io, Browse.AI & others. Get the features you need, whether you're an indie dev or part of a large enterprise! #WebScrapingTools
    - Key Takeaway fo

# Publishing thread for: workdir/myblog.md
######## Thread JSON:  {'topic': 'Top 7 AI-Powered Web Scraping Solutions in 2025', 'tweets': [{'content': 'Top 7 AI-Powered Web Scraping Solutions in 2025', 'is_hook': True, 'media_urls': []}, {'content': "What is AI-Powered Web Scraping? 🤖\n\nIt's a game-changer! Users can now communicate data needs in plain language, allowing complex web structures to be navigated easily. A huge leap from traditional methods! #WebScraping #AI", 'is_hook': False, 'media_urls': ['https://www.firecrawl.dev/images/blog/ai-scraping-tools/ai-scraping-tools.jpg']}, {'content': 'Leading AI Web Scraping Solutions 🌟\n\nDiscover tools that handle JavaScript, manage proxies automatically, and process natural language seamlessly—making data extraction accessible to everyone! #TechInnovation', 'is_hook': False, 'media_urls': ['https://www.firecrawl.dev/images/blog/ai-scraping-tools/firecrawl.png', 'https://www.firecrawl.dev/images/blog/ai-scraping-tools/extract.chat.png'