In [None]:
pip install crewai litellm python-dotenv tqdm crewai-tools opencv-python-headless

In [None]:
import os
import json
import random
import time
import logging
import re
import csv
import requests
import pandas as pd
from datetime import datetime
from typing import Dict, Any, Optional, List
from concurrent.futures import ThreadPoolExecutor, as_completed

from tqdm import tqdm
from pydantic import BaseModel
from litellm import completion

# -------------------- Blog Generator Code --------------------

class FlexibleBlogArticleGenerator:
    """
    Generates a ~2000-2500 word blog article with a flexible number of sections.
    At the end, SEO metadata (seoTitle, seoDescription, seoKeywords, excerpt, tags)
    is generated using litellm completion and added to the front matter.
    """

    def __init__(self, model_config):
        """
        model_config example:
        {
            'model_name': 'friendliai/meta-llama-3.3-70b-instruct',  # blog generation model
            'rate_limit_delay': (3, 6)  # delay range in seconds
        }
        """
        self.model_config = model_config
        self.rate_limit_range = model_config.get('rate_limit_delay', (2, 5))
        self.memory = ""  # to accumulate text from each section to avoid repetition

        logging.basicConfig(
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s',
            handlers=[logging.StreamHandler(), logging.FileHandler('blog_generation.log')]
        )
        logging.info("Flexible Blog Article Generator initialized.")
        logging.info("Using blog model: %s", self.model_config.get('model_name', 'No Model Specified'))

    def _rate_limit_cooldown(self):
        """Sleeps a random duration within the configured range to mitigate rate limits."""
        delay = random.uniform(*self.rate_limit_range)
        with tqdm(total=100, desc="Rate limit cooldown", bar_format="{desc}: {percentage:3.0f}%|{bar}|{n_fmt}/{total_fmt}") as pbar:
            for _ in range(100):
                time.sleep(delay / 100)
                pbar.update(1)

    def _safe_completion(self, **kwargs):
        """Wrapper for litellm completion with retry mechanism."""
        retries = 3
        for attempt in range(retries):
            try:
                result = completion(**kwargs)
                # Ensure the result has the expected structure
                if not result.choices or not hasattr(result.choices[0], 'message') or not result.choices[0].message.content:
                    raise ValueError("Invalid completion response structure.")
                return result
            except Exception as e:
                logging.error(f"Completion error: {e}. Retrying ({attempt+1}/{retries})...")
                time.sleep(2)
        raise Exception("Failed to get valid completion response after retries.")

    def generate_outline(self, title, description):
        """
        Produces an outline with 3-7 total main sections: Opening + 1-5 Body + Conclusion.
        Each section has 2-4 bullet points describing subtopics.
        """
        logging.info("Generating flexible outline...")
        outline_prompt = f"""
You are a professional content strategist. You will create a detailed outline for a long-form blog article (~2000-2500 words).

TITLE: "{title}"
DESCRIPTION: "{description}"

Outline Requirements:
1. Include ONE Opening section (approx. 150-200 words). 
2. Include ONE Conclusion (150-200 words) summarizing the blog’s key takeaways.
3. In between Opening & Conclusion, include core Body sections (so total main sections).
4. Each main section should have 2-3 bullet points describing what it will cover.
5. Provide variety: no repeated bullet points or identical ideas. 
6. Output ONLY the final structured outline in clear multi-level format (e.g. 1.2 type numbering or bullet points). 
   No disclaimers or extra commentary.
"""
        with tqdm(total=1, desc="Generating Outline") as pbar:
            response = self._safe_completion(
                model=self.model_config.get('model_name'),
                messages=[{"role": "user", "content": outline_prompt}],
                max_tokens=1024,
                temperature=0.1
            )
            pbar.update(1)

        outline_text = response.choices[0].message.content.strip()
        logging.info("Outline generation complete.")
        print(outline_text)
        self._rate_limit_cooldown()
        return outline_text

    def generate_section(self, title, description, section_title, total_sections, section_index):
        """
        Generates a single section (~400-500 words) of the blog.
        """
        logging.info(f"Generating section: {section_title}")

        prompt = f"""
You are a highly skilled content writer who creates engaging, consistent, "human-like" blog sections.

BLOG TITLE: "{title}"
DESCRIPTION: "{description}"
SECTION NAME: "{section_title}"
Section #{section_index+1} of {total_sections} total.

Memory of previously generated text (avoid repeating it):
{self.memory}

SECTION INSTRUCTIONS:
- Approx. 400-500 words total (avoid going over 500).
- Use 4-6 sentences per paragraph. Keep paragraphs short and lively.
- Insert at least one bullet list or numbered list. Possibly sub-bullets for variety.
- If this is the Conclusion (last section), summarize or wrap-up, no new claims. 
- For Body sections, each should reflect the sub-bullets from the outline; keep it distinct from the other sections.
- Write in valid Markdown, starting with a heading (## or ###) that matches or is close to "{section_title}".

TONE INSTRUCTIONS:
**Conversational Elements**
- Write as if speaking to a friend while maintaining professionalism.
- Use personal pronouns ("you," "I," "we") to create engagement.
- Ask rhetorical questions to maintain reader involvement.
- Include transitional phrases between ideas.

**Natural Flow**
- Maintain consistent voice throughout the piece.
- Vary paragraph lengths for natural rhythm.
- Use punctuation to create natural pauses.
- Include occasional sentence fragments for authenticity.

**Professional Balance**
- Strike a balance between formal and informal language.
- Keep the writing clear and direct without being overly casual.
- Express calm confidence rather than excessive enthusiasm.
- Maintain appropriate level of expertise without being condescending.

**Authenticity Markers**
- Include occasional imperfections for genuineness.
- Add sensory details to make content more vivid.
- Use relevant cultural references when appropriate.
- Incorporate personal insights and reflections.

WRITING STYLE INSTRUCTIONS:
- Use words from this list where required:
    "surprising", "critical", "huge", "big", "powerful", "smart", "effective",
    "gloomy", "glitter", "hazy", "sparkling", "roaring", "buzz", "serene", 
    "crashing", "stinky", "bitter", "gooey", "rotten", "fluffy", "slimy", 
    "woolly", "hairy", "paralyzed", "swirling", "choppy",
    "grab", "boost", "improve", "engage", "succeed", "stop", "provide",
    "absolutely", "definitely", "certainly", "undoubtedly", "precisely", 
    "exactly", "indeed", "remarkably", "fascinating", "intriguing", "insightful",
    "noteworthy", "compelling", "resonate", "authentic", "genuine", "impactful",
    "thoughtful", "enlightening", "profound", "stimulating", "engaging",
    "captivating", "riveting", "persuasive", "convincing", "affirmative",
    "assuredly", "unquestionably", "wholeheartedly", "significantly",
    "meaningfully", "expressively", "emphatically", "sincerely", "passionately",
    "evidently", "clearly", "notably", "essentially", "fundamentally",
    "respectfully", "honestly", "truthfully", "undeniably", "reassuringly",
    "positively", "confidently",
    "you", "your", "what", "which", "when"
- Use everyday words instead of complex terminology like [As we dive into the world, As we, As we move].
- Include contractions (e.g., "I'd" instead of "I would").
- Incorporate idioms and colloquialisms naturally where appropriate.
- Avoid buzzwords and corporate jargon.
- Add personal anecdotes and examples to build connection.
- Include emotional language that resonates with readers.
- Show empathy and understanding of reader pain points.
- Incorporate subtle humor where appropriate.

Now produce this single section following all these guidelines.
"""
        with tqdm(total=1, desc=f"Writing: {section_title[:25]}") as pbar:
            response = self._safe_completion(
                model=self.model_config.get('model_name'),
                messages=[{"role": "user", "content": prompt}],
                temperature=0.4
            )
            pbar.update(1)

        section_text = response.choices[0].message.content.strip()
        logging.info(f"Section '{section_title}' generated.")
        print(section_text)
        self._rate_limit_cooldown()
        return section_text

    def generate_seo_metadata(self, title, article_body, blog_info):
        """
        Uses litellm completion to generate SEO metadata for the article.
        The metadata includes: seoTitle, seoDescription, seoKeywords, excerpt, and tags.
        """
        seo_prompt = f"""
You are an expert SEO content strategist. Based on the following blog article content and its title, generate SEO metadata including:
- seoTitle: a compelling, SEO-friendly title,
- seoDescription: a concise description optimized for search engines,
- seoKeywords: a list of 5 to 10 SEO keywords (output as a JSON array),
- excerpt: a brief excerpt summarizing the article,
- tags: a list of relevant tags (output as a JSON array).

Article Title: "{title}"
Article Content:
{article_body}

Please output the metadata in the following YAML format exactly:
seoTitle: "<seo title>"
seoDescription: "<seo description>"
seoKeywords: [ "<keyword1>", "<keyword2>", ... ]
excerpt: "<short excerpt>"
tags: [ "<tag1>", "<tag2>", ... ]
"""
        with tqdm(total=1, desc="Generating SEO Metadata") as pbar:
            time.sleep(4)
            response = self._safe_completion(
                model=self.model_config.get('model_name'),
                messages=[{"role": "user", "content": seo_prompt}],
                temperature=0.1
            )
            pbar.update(1)
        seo_metadata_output = response.choices[0].message.content.strip()
        logging.info("SEO metadata generation complete.")
        return seo_metadata_output

    def generate_full_article(self, blog_info):
        """
        Generates the full blog article.
        1. Creates an outline.
        2. Parses the outline to identify all sections.
        3. Ensures the article has a defined Opening at the start and a Conclusion at the end.
        4. Generates each section in order.
        5. After the full article body is generated, uses litellm completion to generate SEO metadata.
        6. Assembles the final Markdown article with updated front matter.
        """
        logging.info("Starting full article generation...")

        title = blog_info["title"]
        category = blog_info["category"]
        subcategory = blog_info["subcatergory"]
        # Use the title as a fallback description if none is provided.
        description = blog_info.get("description", title)

        # STEP 1: Generate Outline
        outline_text = self.generate_outline(title, description)

        # STEP 2: Parse sections from the outline
        lines = outline_text.split("\n")
        raw_sections = []
        for line in lines:
            ln = line.strip().lstrip("-# ").rstrip(":")
            if ln:
                # Treat lines that start with a digit or mention "Opening/Conclusion" as main sections
                if ln[0].isdigit() or "opening" in ln.lower() or "conclusion" in ln.lower():
                    raw_sections.append(ln)

        # Ensure at least 3 sections: Opening, a body, Conclusion
        if len(raw_sections) < 3:
            raw_sections = [
                "Opening",
                "Core Section",
                "Conclusion"
            ]

        # STEP 3: Generate each section, in order, referencing memory
        final_body = ""
        total_sections = len(raw_sections)
        for idx, sec_title_raw in enumerate(raw_sections):
            # Clean numbering, e.g., "1) Opening" -> "Opening"
            cleaned_title = sec_title_raw
            if len(cleaned_title) > 2 and cleaned_title[1] in [")", ".", "-"]:
                cleaned_title = cleaned_title[2:].strip(").:-# ")

            section_text = self.generate_section(
                title=title,
                description=description,
                section_title=cleaned_title,
                total_sections=total_sections,
                section_index=idx
            )
            final_body += f"\n{section_text}\n"
            # Update memory with newly generated text
            self.memory += f"\n{section_text}"

        # STEP 4: Generate SEO metadata using the full article body.
        seo_metadata = self.generate_seo_metadata(title, final_body, blog_info)

        # STEP 5: Create final article with front matter.
        pub_date_str = blog_info.get("pub_date_str", datetime.now().strftime('%Y-%m-%d'))
        up_date_str = blog_info.get("up_date_str", pub_date_str)
        front_matter = (
            f"---\n"
            f'title: "{title}"\n'
            f'date: "{pub_date_str}"\n'
            f'up_date: "{up_date_str}"\n'
            f'category: "{category}"\n'
            f'subcategory: "{subcategory}"\n'
            f'coverImage: "https://images.pexels.com/photos/5473955/pexels-photo-5473955.jpeg?auto=compress&cs=tinysrgb&w=1260&h=750&dpr=1"\n'
            f"{seo_metadata}\n"
            f"---\n"
        )

        final_article = front_matter + final_body.strip() + "\n"
        logging.info("Full article generation complete.")
        return final_article

    def save_article(self, final_article, file_title):
        """
        Saves the final article as Markdown in the 'generated_articles' folder.
        """
        logging.info("Saving final article to disk...")
        safe_title = file_title.lower().replace(':', '').replace('?', '').replace(' ', '-')
        filename = f"{safe_title}.md"
        os.makedirs("generated_articles", exist_ok=True)
        filepath = os.path.join("generated_articles", filename)

        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(final_article)

        logging.info(f"Article saved at {filepath}")
        return filepath

# -------------------- Main Execution with Multi-Threading --------------------

def process_article(idx, row, model_config):
    """
    Processes a single row from the CSV:
      1. Creates a new generator instance.
      2. Generates the full article.
      3. Saves the article to disk.
      4. Returns the row index and updated status.
    """
    blog_info = {
        "title": row["topic"],
        "category": row["category"],
        "subcatergory": row["subcatergory"],
        "pub_date_str": row["pub_date_str"],
        "up_date_str": row["up_date_str"]
    }
    try:
        logging.info(f"=== Generating Article for: {blog_info['title']} ===")
        generator = FlexibleBlogArticleGenerator(model_config)
        article_markdown = generator.generate_full_article(blog_info)
        logging.info(f"=== Saving Article for: {blog_info['title']} ===")
        saved_path = generator.save_article(article_markdown, blog_info["title"])
        print(f"\nDone! Article '{blog_info['title']}' has been generated and saved at:\n{saved_path}\n")
        return idx, "TRUE"
    except Exception as e:
        logging.error(f"An error occurred while generating the article for {blog_info['title']}.", exc_info=True)
        print(f"ERROR for {blog_info['title']}: {e}")
        return idx, f"ERROR: {e}"

def main():
    # Set required API keys (update as needed)
    os.environ["FIREWORKS_AI_API_KEY"] = "YOURAPIKEYHERE"

    # Configure model parameters
    model_config = {
        'model_name': 'fireworks_ai/accounts/fireworks/models/deepseek-v3',  # Blog generation model
        'rate_limit_delay': (5, 6)
    }

    csv_file = "Blog_Gen.csv"  # Change the path if needed.
    try:
        df = pd.read_csv(csv_file)
    except Exception as e:
        logging.error("Error reading CSV file.", exc_info=True)
        print(f"ERROR reading CSV file: {e}")
        return

    # Prepare a list to hold futures
    futures = []
    with ThreadPoolExecutor(max_workers=3) as executor:
        # Submit a task for each row that does not have a status set.
        for idx, row in df.iterrows():
            if 'status' in df.columns and pd.notnull(row['status']) and str(row['status']).strip() != "":
                logging.info(f"Skipping article for '{row['topic']}' because status is set to '{row['status']}'")
                continue
            futures.append(executor.submit(process_article, idx, row, model_config))

        # Process the results as they complete
        for future in as_completed(futures):
            idx, new_status = future.result()
            df.at[idx, 'status'] = new_status
            # Optionally, update the CSV file after each processed article
            df.to_csv(csv_file, index=False)

    # Save the final updated CSV
    df.to_csv(csv_file, index=False)
    logging.info("All articles processed and CSV updated.")

if __name__ == "__main__":
    main()