In [1]:
!pip install openai

Defaulting to user installation because normal site-packages is not writeable


In [53]:
from openai import OpenAI
import os
import json
import re
import time
openai_client = OpenAI(api_key="sk-")
ASSISTANT_ID = "asst_kTfFkImWj1AkkkCea9AXFJZA"  # Your assistant ID
POST_WRITER_ASSISTANT_ID = "asst_jwJfa6mtMpdBSJovmeJYVSFy"
MARKDOWN_FILE_PATH  = "user_posts_summary.md"  # File containing scraped posts

In [50]:
def upload_file(file_path):
    with open(file_path, "rb") as file:
        upload = openai_client.files.create(file=file, purpose="assistants")
        return upload.id

In [51]:
def process_assistant(assistant_id, thread_id, file_ids, content):
    try:
        # Attach files to message
        openai_client.beta.threads.messages.create(
            thread_id=thread_id,
            role="user",
            content=content,
            attachments=[
                {
                    "file_id": file_id,
                    "tools": [{"type": "file_search"}],
                }
                for file_id in file_ids
            ],
        )

        # Create and poll run
        run = openai_client.beta.threads.runs.create_and_poll(
            assistant_id=assistant_id,
            thread_id=thread_id,
        )

        if run.last_error:
            raise Exception(run.last_error.message)

        return fetch_response(thread_id, run.id)

    except Exception as e:
        print(f"❌ Error running assistant: {e}")
        return None


# -----------------------
# Fetch assistant response
# -----------------------
def fetch_response(thread_id, run_id):
    messages = openai_client.beta.threads.messages.list(
        thread_id=thread_id, run_id=run_id
    )

    if not messages:
        raise ValueError("No messages found.")

    latest_message = messages.data[0]
    content_block = latest_message.content[0].text

    annotations = content_block.annotations
    response_text = content_block.value

    citations = []
    for i, annotation in enumerate(annotations):
        response_text = response_text.replace(annotation.text, f"[{i}]")
        if file_citation := getattr(annotation, "file_citation", None):
            cited_file = openai_client.files.retrieve(file_citation.file_id)
            citations.append(f"[{i}] {cited_file.filename}")

    return response_text


# -----------------------
# Save Markdown Output
# -----------------------
def save_response_markdown(text, filename="assistant_response.md"):
    with open(filename, "w", encoding="utf-8") as f:
        f.write(text)
    print(f"📝 Saved response to {filename}")


def extract_json_block(text):
    try:
        return json.loads(text)
    except json.JSONDecodeError:
        matches = re.findall(r'\[\s*{.*?}\s*\]', text, re.DOTALL)
        if matches:
            try:
                return json.loads(matches[0])
            except json.JSONDecodeError:
                return []
        return []

In [52]:
if __name__ == "__main__":
    print("🚀 Running LinkedIn Content Assistant...")

    # Upload file
    print("📂 Uploading the scrapped posts...")
    file_id = upload_file(MARKDOWN_FILE_PATH )

    # Create thread
    thread = openai_client.beta.threads.create()

    # Send prompt to assistant
    print("💬 Asking assistant for analysis + post generation...")
    response = process_assistant(
        assistant_id=ASSISTANT_ID,
        thread_id=thread.id,
        file_ids=[file_id],
        content=(
    "The uploaded file is a **Markdown file containing multiple LinkedIn posts**.\n"
    "Each post is structured using Markdown headers and follows this format:\n\n"
    "## Post {number}\n"
    "**Author:** ...\n"
    "**Reposted by:** ...\n"
    "**Timestamp:** ...\n"
    "**Reactions:** ...\n"
    "**Comments:** ...\n"
    "**Reposts:** ...\n"
    "**Media Type:** ...\n"
    "**Has Image:** ...\n"
    "**Has Article:** ...\n"
    "**Post URL:** ...\n"
    "**Content:**\n"
    "{body text of the post}\n\n"
    "---\n\n"

    "### Your task:\n"
    "1. Detect and split the file into individual posts using `## Post` headers.\n"
    "2. For **each post**, analyze the `**Content:**` section.\n"
    "3. Assign tags in these four categories **based only on the post content**:\n"
    "- **Content Type**: e.g., Educational Insight, Case Study, Thought Leadership, etc.\n"
    "- **Tone / Audience Reaction**: e.g., Empowering, Reflective, Optimistic, etc.\n"
    "- **Structure**: e.g., Narrative, Instructional, Listicle, etc.\n"
    "- **Intent / Objective**: e.g., Personal Branding, Lead Generation, Community Building, etc.\n\n"

    "### Output format:\n"
    "Return a **JSON array**, where each item looks like this:\n\n"
    "```json\n"
    "[\n"
    "  {\n"
    "    \"post_id\": \"Post 1\",\n"
    "    \"text\": \"(The main post content here)\",\n"
    "    \"tags\": {\n"
    "      \"content_type\": [\"...\"],\n"
    "      \"tone\": [\"...\"],\n"
    "      \"structure\": [\"...\"],\n"
    "      \"intent\": [\"...\"]\n"
    "    }\n"
    "  },\n"
    "  ...\n"
    "]\n"
    "```\n\n"
    "⚠️ Do not summarize or group the posts.\n"
    "Each post must be processed and tagged **independently**.\n"
    "Only return the raw JSON array. No additional text or explanations."
),



    )

    # Save and parse response
    tagged_posts = []
    if response:
        save_response_markdown(response)
        print("🔎 Preview of tagging assistant response:\n", response[:300])
        tagged_posts = extract_json_block(response)
        if not tagged_posts:
            print("❌ Failed to extract JSON array from response. Exiting.")
            exit()

    # Step 3: Generate new posts from tags
    print("\n✍️ Generating post variations...\n")
    for post in tagged_posts:
        content = post.get("text", "")
        tags = post.get("tags", {})

        if not content.strip():
            continue

        writer_thread = openai_client.beta.threads.create()
        writer_prompt = f"""
You are a skilled LinkedIn post creator. Using the content and tags below, create variations of the post.

Content:
{content}

Tags:
{json.dumps(tags, indent=2)}

Each paragraph must contain 12-15 words (max 20). Add a blank line between paragraphs.

End with a call-to-action and 5-10 relevant hashtags.

At the end, if the user provides feedback (e.g., "more playful", "shorter", "less emoji"), show a 🎯 Modified Version based on that feedback.
"""

        post_response = process_assistant(
            assistant_id=POST_WRITER_ASSISTANT_ID,
            thread_id=writer_thread.id,
            file_ids=[],
            content=writer_prompt,
        )

        print(f"📬 Post Variations for {post['post_id']}:\n{post_response}\n")


    openai_client.beta.threads.delete(thread.id)


🚀 Running LinkedIn Content Assistant...
📂 Uploading the scrapped posts...
💬 Asking assistant for analysis + post generation...
📝 Saved response to assistant_response.md
🔎 Preview of tagging assistant response:
 ```json
[
    {
        "post_id": "Post 1",
        "text": "VCs, CPG investors, and consumer brand founders—this event is for you!\n\n🏌️‍♀️LvlUp Ventures East Hampton Invitational\nA Mini-Golf Showdown + High-Growth CPG Networking Experience\n📅 June 5 | East Hampton | Part of Hamptons Tech Week\n\

✍️ Generating post variations...

📬 Post Variations for Post 1:
🏌️ Calling all VCs, CPG investors, and brand founders! Don't miss this:

LvlUp Ventures East Hampton Invitational:
📅 June 5 | East Hampton | Hamptons Tech Week

An exclusive event mixing mini-golf, CPG networking, and more:
⛳ Mini-golf tourney | 🤝 Founders-investor reception
🍪 Gourmet bites + drinks | 🎁 Curated gift bags

Exciting additions:
🎉 joins as our headline sponsor.
🍰 Treats from a sweet specialist.
🌿 Better-for