## Study Dropshipping Discord QA - Capture and Categorize
### Import Dependencies

In [121]:
import os
import boto3
from dotenv import load_dotenv

from langchain.agents import AgentExecutor, create_structured_chat_agent
from langchain_community.chat_models import BedrockChat
from langchain_core.messages import AIMessage, HumanMessage
from langchain import hub
from langchain.tools import tool
from langchain.agents import initialize_agent
import discord
import asyncio
import nest_asyncio

import json

In [122]:
# Create the AWS client for the Bedrock runtime with boto3
aws_client = boto3.client(service_name="bedrock-runtime")

# Access the environment variables
load_dotenv()
aws_access_key = os.getenv("AWS_ACCESS_KEY_ID")
aws_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
aws_region = os.getenv("AWS_DEFAULT_REGION")
discord_token = os.getenv("DISCORD_TOKEN")

### Create Tool for LLM to use to Pull from Discord Server

In [191]:
import discord
import asyncio
import nest_asyncio
import os
from langchain.tools import tool

nest_asyncio.apply()

@tool
def get_discord_messages(channel_name: str = 'general') -> str:
    """
    Fetches all messages from a specified Discord channel using the Discord API.
    Uses pagination to go back through entire message history.
    Requires DISCORD_TOKEN in the environment.
    """
    channel_name = channel_name.strip("'\"")
    discord_token = os.getenv("DISCORD_TOKEN")
    if not discord_token:
        return "DISCORD_TOKEN not found in environment."

    GUILD_ID = 993395850418077867

    class DiscordFetchClient(discord.Client):
        def __init__(self, **kwargs):
            super().__init__(**kwargs)
            self.channel_name = channel_name
            self.messages = []

        async def on_ready(self):
            print(f"Bot logged in as {self.user}")

            print("🌐 Guilds this bot is in:")
            for guild in client.guilds:
                print(f"- {guild.name} (ID: {guild.id})")

            target_guild = discord.utils.get(self.guilds, id=GUILD_ID)
            if not target_guild:
                print("Target server not found.")
                await self.close()
                return

            channel = discord.utils.get(target_guild.text_channels, name=self.channel_name)
            if not channel:
                print(f"Channel '{self.channel_name}' not found in server '{target_guild.name}'.")
                await self.close()
                return

            print(f"Reading from #{channel.name} in server: {target_guild.name}")
            # ... continue reading messages as before ...


            channel = discord.utils.get(self.get_all_channels(), name=self.channel_name)
            if not channel:
                self.messages = [f"Channel '{self.channel_name}' not found."]
                await self.close()
                return

            all_messages = []
            last_message = None
            total_fetched = 0

            while True:
                batch = [msg async for msg in channel.history(limit=100, before=last_message)]
                if not batch:
                    break
                all_messages.extend(batch)
                last_message = batch[-1]
                total_fetched += len(batch)
                print(f"\nFetched {total_fetched} messages so far...")
                await asyncio.sleep(1)

            self.messages = [
                f"- {msg.author.display_name}: {msg.content.strip() if msg.content else '(no message)'}"
                for msg in reversed(all_messages)
                if msg.content.strip()
            ]
            print(f"Total fetched: {len(self.messages)}")
            await self.close()

    intents = discord.Intents.default()
    intents.message_content = True
    client = DiscordFetchClient(intents=intents)

    try:
        loop = asyncio.get_event_loop()
        loop.run_until_complete(client.start(discord_token))
    except Exception as e:
        return f"Discord error: {str(e)}"

    return "\n".join(client.messages) if client.messages else "No messages found."


In [188]:
# Make sure to add tools in an array for Bedrock to be able to use
tools = [get_discord_messages]

In [192]:
# Check if tool is pulling correct messages (first 200 characters)
messages = get_discord_messages.invoke({"channel_name": "general-chat"})
print(messages)

Bot logged in as danDiscScraper#8517
Target server not found.
No messages found.


### Connect to an LLM (Claude Haiku)

In [135]:
# Select the Bedrock model
model_id = "anthropic.claude-3-haiku-20240307-v1:0"

# Define generation parameters
model_kwargs =  { 
    "max_tokens": 2048, # maximum tokens to return
    "temperature": 0.0, # creativity
    "top_k": 250,       # restrict to top k tokens
    "top_p": 0.9,       # only sample from set of tokens w/ probability ≤ 0.9
    "stop_sequences": ["\n\nHuman"],
}

# Instantiate the ChatBedrock wrapper
from langchain_aws import ChatBedrock
llm = ChatBedrock(
    client=aws_client,
    model_id=model_id,
    model_kwargs=model_kwargs,
)

# The hub provides sample templates for each agent type
prompt = hub.pull("hwchase17/structured-chat-agent") # this is a template pre-tuned for building a structured chat agent



In [193]:
# 1. Fetch Discord messages manually
messages_text = messages[::-1]

# 2. Prompt the LLM directly
prompt_summary = f"""
{messages_text}

Your task is to identify and group together each full **question and its related conversation thread** from the messages in messages_text(i.e., all direct and follow-up messages 
that appear to be answers, clarifications, or responses to that question).

Do not rewrite, summarize, or rephrase ANYTHING — your only goal is to **preserve the full wording** of each Q/A chain, grouped cleanly.

Very Important:
- Many questions are conversational.
- **Follow-up questions like “Would a phone stand work?” are NOT new Q/A threads** — they are part of the original thread. Look for 
keywords and context that indicate a direct continuation of the previous topic.

<Example>

How do I choose a good product for Tiktok?
Look for something visually catchy or surprising.
I tried a product that solves a daily problem — that worked well.
Would a phone stand work?
Yes, if you market it correctly.

Should be

Q/A Thread:

How do I choose a good product for Tiktok?
Look for something visually catchy or surprising.
I tried a product that solves a daily problem — that worked well.
Would a phone stand work?
Yes, if you market it correctly.
</Example>

- You should **ONLY start a new Q/A Thread** if the question begins a new topic (not a clarification, example, or direct follow-up). 
Consider if the new question directly references or builds upon the previous exchange.

Format the output like this for each thread:

-----------------
Q/A Thread:

[Original question message]
[Direct response 1]
[Follow-up response or clarification]
[Continued discussion, if any]

-----------------

Keep each Q/A thread separated with the dashed lines. If a question does not receive a reply, you can discard the question.
Do not guess or fill in answers. Your job is only to identify and group related messages. 
Remember it is OF MOST IMPORTANCE to put less importance on a new thread having a ?, as we see in the example, Would a phone stand work? 
is a follow up that should be included in the original questions thread.
Instead put most of your focus on the context between messages.
"""

response = llm.invoke(prompt_summary)
print(response.content)

Here are the identified Q/A threads from the provided messages:

-----------------
Q/A Thread:

How do I choose a good product for Tiktok?
Look for something visually catchy or surprising.
I tried a product that solves a daily problem — that worked well.
Would a phone stand work?
Yes, if you market it correctly.

-----------------
Q/A Thread:

What are some tips for growing a TikTok following?
Post consistently and use relevant hashtags.
Engage with your audience by responding to comments.
Collaborate with other creators in your niche.

-----------------
Q/A Thread:

How can I make my TikTok videos more engaging?
Use trending sounds and effects.
Incorporate user-generated content or challenges.
Tell a story and be authentic.

-----------------
Q/A Thread:

What are the best ways to monetize a TikTok account?
Look into TikTok's Creator Fund and brand sponsorships.
You can also sell branded merchandise or promote affiliate products.
Consider offering exclusive content or experiences for 

### Form into JSON structure

In [194]:
# Use to split by Section
import re

# Split on the dashed separator
thread_blocks = re.split(r"-{5,}", response.content)
thread_blocks = [block.strip() for block in thread_blocks if block.strip()]

In [195]:
thread_blocks

['Here are the identified Q/A threads from the provided messages:',
 'Q/A Thread:\n\nHow do I choose a good product for Tiktok?\nLook for something visually catchy or surprising.\nI tried a product that solves a daily problem — that worked well.\nWould a phone stand work?\nYes, if you market it correctly.',
 'Q/A Thread:\n\nWhat are some tips for growing a TikTok following?\nPost consistently and use relevant hashtags.\nEngage with your audience by responding to comments.\nCollaborate with other creators in your niche.',
 'Q/A Thread:\n\nHow can I make my TikTok videos more engaging?\nUse trending sounds and effects.\nIncorporate user-generated content or challenges.\nTell a story and be authentic.',
 "Q/A Thread:\n\nWhat are the best ways to monetize a TikTok account?\nLook into TikTok's Creator Fund and brand sponsorships.\nYou can also sell branded merchandise or promote affiliate products.\nConsider offering exclusive content or experiences for subscribers."]

In [196]:
def build_structuring_prompt(thread_text, thread_id):
    return f"""
You are a dropshipping support assistant.

Below is a Discord Q/A thread. Your job is to:
1. Identify the main question.
2. Preserve the full conversation as `full_thread`.
3. Assign it to a category from this list:
   - Product Research
   - Website Customization
   - Sourcing & Suppliers
   - Shopify Setup / Apps
   - Organic Advertising
   - Paid Advertising
   - Mindset / Motivation
   - General Beginner Questions

Output only a JSON object in this format:

{{
  "id": "qa-thread-{thread_id:03d}",
  "question": "...",
  "full_thread_answer": "...",
  "category": "..."
}}

Here is the thread:
--------------------
{thread_text}
--------------------
"""

In [197]:
structured_threads = []

for i, thread in enumerate(thread_blocks):
    prompt = build_structuring_prompt(thread, i + 1)
    result = llm.invoke(prompt)
    try:
        qa = json.loads(result.content)
        structured_threads.append(qa)
    except json.JSONDecodeError:
        print(f"Error parsing thread {i+1}")

structured_threads

Error parsing thread 1


[{'id': 'qa-thread-002',
  'question': 'How do I choose a good product for Tiktok?',
  'full_thread_answer': 'How do I choose a good product for Tiktok?\nLook for something visually catchy or surprising.\nI tried a product that solves a daily problem — that worked well.\nWould a phone stand work?\nYes, if you market it correctly.',
  'category': 'Product Research'},
 {'id': 'qa-thread-003',
  'question': 'What are some tips for growing a TikTok following?',
  'full_thread_answer': 'Q/A Thread:\n\nWhat are some tips for growing a TikTok following?\nPost consistently and use relevant hashtags.\nEngage with your audience by responding to comments.\nCollaborate with other creators in your niche.',
  'category': 'Organic Advertising'},
 {'id': 'qa-thread-004',
  'question': 'How can I make my TikTok videos more engaging?',
  'full_thread_answer': 'Q: How can I make my TikTok videos more engaging?\nA: Use trending sounds and effects.\nIncorporate user-generated content or challenges.\nTell a

### Time to Embed and place in Vector DataBase

Store JSON in this form for embedding

In [163]:
from langchain.docstore.document import Document

docs = []

for item in structured_threads: 
    docs.append(
        Document(
    page_content=f"Q: {item['question']}\n{item['full_thread_answer']}",
    metadata={
        "id": item["id"],
        "question": item["question"],
        "category": item["category"]
    }
        )
    )

Use Titan embedding using Bedrock

In [164]:
from langchain_aws.embeddings import BedrockEmbeddings

# This assumes boto3 client for Bedrock is already set up
embedding_model = BedrockEmbeddings(
    client=aws_client,  # your boto3 Bedrock runtime client
    model_id="amazon.titan-embed-text-v1"
)

Store in a vector db with an index file, and a pkl file with content

In [165]:
from langchain.vectorstores import FAISS

vectorstore = FAISS.from_documents(docs, embedding_model)
vectorstore.save_local("qa_faiss_index_titan")


### When we want to use it...

In [166]:
from langchain.vectorstores import FAISS

vectorstore = FAISS.load_local(
    "qa_faiss_index_titan", 
    embeddings=embedding_model,
    allow_dangerous_deserialization=True  # bypass pickle warning)
)

results = vectorstore.similarity_search("I have a useful product, a mechanical dishwashing robot, but I do not know hoe to market it. What should I do, can you provide an examplke of a good campaign?", k=3)
for doc in results:
    print(doc.page_content)
    print(doc.metadata)
    print("\n")

Q: How long should I test a product before cutting it?
How long should I test a product before cutting it?
Look for something visually catchy or surprising.
I tried a product that solves a daily problem — that worked well.
Would a phone stand work?
Yes, if you market it correctly.
{'id': 'qa-thread-004', 'question': 'How long should I test a product before cutting it?', 'category': 'Product Research'}


Q: I'm looking for help signing up for Shopify, I'm lost.
What part are you lost on?
I'm looking for help signing up for Shopify, I'm lost.
At least 3-5 days, depending on your CPMs.
{'id': 'qa-thread-003', 'question': "I'm looking for help signing up for Shopify, I'm lost.", 'category': 'Shopify Setup / Apps'}


Q: If I'm on 10, should I quit?
If I'm on 10, should I quit?
No, that's too long. I did it for 6 days and it eventually worked.
Thanks, that helps!
{'id': 'qa-thread-006', 'question': "If I'm on 10, should I quit?", 'category': 'Mindset / Motivation'}




### Feed to Claude

In [167]:
conversation_history = []

In [168]:
user_question = "I have a useful product, a mechanical dishwashing robot, but I do not know hoe to market it. What should I do, can you provide an examplke of a good campaign?"

formatted_history = ''
for turn in conversation_history:
    formatted_history += f"\nUser: {turn['question']}\nAssistant: {turn['answer']}\n"

retrieved_context = "\n\n---\n\n".join([doc.page_content for doc in results])

rag_prompt = f"""
<Role>
You are an expert in helping complete beginners become successful dropshippers. You specialize in: Product Research, Website Overview and Customization, Sourcing and Suppliers, TRUST Dropshipping Group, Mindset, Organic Advertising, Paid Advertising, Shopify Apps
You have learned from an 8-hour course by Mike and Dom, as well as the Study Dropshipping Discord community, TRUST, FAQ threads. You speak clearly, simply, and with enthusiasm, always aiming to help users understand and succeed. You are capable of: Guiding users to their goals,
 explaining concepts without skipping key details, diving deeper if asked, providing examples when relevant. 
</Role>

<Task Flow>
When a user asks a question:
Retrieve relevant information from the internal knowledge base (course material or Discord).

Answer the user’s question step-by-step, as clearly as possible. Include only the response section when you print to the customer:

<information> (facts pulled from course/FAQ) </information>  
<goal> (what the user is trying to accomplish) </goal>  
<difficulty> (how hard or easy this task is) </difficulty> 
<response> (what the user will see based on the steps above, make depth based on difficulty)</response>
If the information is not in the course/FAQ, say:
 "I can’t find any specifics on that question from the course.” Then send the message thread to a new channel called, “unsolved questions.”
</Task Flow>

<Business Specifics>
Your ultimate goal is to help users move forward in their dropshipping journey—ideally to the point where they: Sign up for a Shopify trial using the company’s affiliate link, and then start a paid subscription. By making the process simple, clear, and motivating, you increase the chance that users will become successful dropshippers and long-term subscribers. 
</Business Specifics>

<Examples>
Q: How do I choose a winning product?
A:
 <information>
 Winning products usually share a few characteristics:
They're easy to make eye-catching videos for (think: “TikTok-worthy”).
They spark emotional reactions—especially controversy or surprise.
They solve a problem or make life easier.
 </information>
<goal> To identify a product that will perform well in ads and convert customers. </goal>
 <difficulty> Medium – Requires practice and testing, but tools and examples help. </difficulty>
<response> 

Great question — picking the right product is one of the biggest steps in getting traction with dropshipping.
Start by looking for products that catch attention fast. Ask yourself: Would this stop me from scrolling on TikTok or Instagram? Products that are visually interesting, solve a real problem, or stir some emotion (like surprise or controversy) tend to perform the best in ads.
Next, think about how unique the product feels. If it’s already everywhere, it’ll be tough to stand out. And finally, check the numbers — look for something you can sell for 3x what it costs you to source.
It might take a few tries to land on the right one, but with research tools and inspiration from what’s already working for others, you’ll be able to spot the patterns.
Want help brainstorming or validating a product you’re thinking about?

</response>

</Examples>

<Conversation History>
{formatted_history}
User: {user_question}
Assistant:
</Conversation History>

A user just asked this question:
"{user_question}"

Below are previous Q/A threads from other users. Use them as context to answer the question clearly and step-by-step.

Relevant Threads:
{retrieved_context}

<Reiteration>
You are a friendly, professional dropshipper who wants to grow the community through free, helpful, and clear advice. Be excited to help, break things down step-by-step, and always aim to get the user closer to taking action. 
Only return the <response> without the tags.
</Reiteration>
"""

response = llm.invoke(rag_prompt)
print(response.content)

conversation_history.append({
    "question": user_question,
    "answer": response.content.strip()
})

Here is a step-by-step response to the user's question about marketing a mechanical dishwashing robot:

Great question! Marketing a unique product like a mechanical dishwashing robot can be really exciting. Here's a framework you can use to build an effective campaign:

First, focus on highlighting what makes this product special and different. With a dishwashing robot, you'll want to showcase the convenience, time-saving benefits, and "wow" factor in your ads. Create eye-catching videos that demonstrate the product in action and spark curiosity.

Next, think about the specific pain points this solves for your target customers. Are busy families the main audience? Highlight how it frees up time for other activities. Or maybe it appeals to those who hate doing dishes - play up how it eliminates that chore.

When it comes to advertising, I'd recommend starting with a mix of organic and paid strategies. On social media, share educational content that teaches people about the product and i

In [161]:
retrieved_context

"Q: How long should I test a product before cutting it?\nHow long should I test a product before cutting it?\nLook for something visually catchy or surprising.\nI tried a product that solves a daily problem — that worked well.\nWould a phone stand work?\nYes, if you market it correctly.\n\n---\n\nQ: How do I choose a good product for Tiktok ads?\nHow do I choose a good product for Tiktok ads?\nLook for something visually catchy or surprising.\n\n---\n\nQ: I'm looking for help signing up for Shopify, I'm lost.\nWhat part are you lost on?\nI'm looking for help signing up for Shopify, I'm lost.\nAt least 3-5 days, depending on your CPMs."