In [1]:
import os 
from langchain_community.tools.tavily_search import TavilySearchResults
from dotenv import load_dotenv
load_dotenv()
from langchain_groq import ChatGroq
from langchain_anthropic import ChatAnthropic
os.getenv("GROQ_API_KEY")
os.getenv("TAVILY_API_KEY")
os.getenv("ANTHROPIC_API_KEY")
groq_llm=ChatGroq(model="Llama3-8b-8192")

search = TavilySearchResults()

In [2]:
nigesh=groq_llm.invoke("Was up doc")
print(nigesh.content)

Not much, just hanging out and waiting for some witty banter. What's new with you?


In [3]:
from langchain_community.document_loaders import RedditPostsLoader


# load using 'subreddit' mode
loader2 = RedditPostsLoader(
    client_id=os.getenv("Reddit_Client_ID"),
    client_secret=os.getenv("Reddit_Client_Secret"),
    user_agent="extractor by u/Successful_Tooth3728",
    categories=["hot" , "new"],  # List of categories to load posts from
    mode="subreddit",
    search_queries=[
        "taxpros",
        "tax",
        "finance",
        
    ],  
    number_posts=10, 
)

In [4]:
documents2 = loader2.load()
documents2[:25]

[Document(metadata={'post_subreddit': 'r/taxpros', 'post_category': 'hot', 'post_title': 'Reminder: Questions about preparing your taxes belong in /r/tax.', 'post_score': 251, 'post_id': 'bxv2v5', 'post_url': 'https://www.reddit.com/r/taxpros/comments/bxv2v5/reminder_questions_about_preparing_your_taxes/', 'post_author': Redditor(name='ChimpWithACar')}, page_content="Tax prep questions will be removed without notice. This is a forum to SERVE tax professionals, not a captive audience to be served BY tax professionals.\n\n**Please use /r/tax for tax preparation questions.**\n\n.\n\n**Protip**: If you haven't already, please **update your flair** according to sub rules to reflect your professional status. Iffy posts are less likely to be removed if they're from a tax pro."),
 Document(metadata={'post_subreddit': 'r/taxpros', 'post_category': 'hot', 'post_title': 'Welcome to Tax Season. Some reminders!', 'post_score': 59, 'post_id': '1anssy7', 'post_url': 'https://www.reddit.com/r/taxpros/

In [5]:
for doc in documents2:
    print("Title : " , doc.metadata['post_title'])
    print("Subreddit : " , doc.metadata['post_subreddit'])
    print("Page Content:" , doc.page_content)
    print("URL : ", doc.metadata['post_url'])
    print("**=========================================================================================**")





Title :  Reminder: Questions about preparing your taxes belong in /r/tax.
Subreddit :  r/taxpros
Page Content: Tax prep questions will be removed without notice. This is a forum to SERVE tax professionals, not a captive audience to be served BY tax professionals.

**Please use /r/tax for tax preparation questions.**

.

**Protip**: If you haven't already, please **update your flair** according to sub rules to reflect your professional status. Iffy posts are less likely to be removed if they're from a tax pro.
URL :  https://www.reddit.com/r/taxpros/comments/bxv2v5/reminder_questions_about_preparing_your_taxes/
Title :  Welcome to Tax Season. Some reminders!
Subreddit :  r/taxpros
Page Content: Hello! Even though there is a nationwide shortage of accountants, interest in this sub is at an all-time high. If you're new here, some reminders:

**1) This sub is for those in the tax preparation profession only.**  
This doesn't mean you have to have a CPA or EA, or be the direct tax preparer.

In [6]:
import praw
import re

reddit = praw.Reddit(
    user_agent="Nomic Key by u/Successful_Tooth3728",
    client_id=os.getenv("Reddit_Client_ID"),
    client_secret=os.getenv("Reddit_Client_Secret"),
    username="Successful_Tooth3728",
    password='Reddit@987654321'
)

def is_valid_reddit_url(url):
    reddit_pattern = r'^https?://(?:www\.)?reddit\.com/r/[^/]+/comments/[^/]+/.*'
    return bool(re.match(reddit_pattern, url))

def post_comment_extractor(purl: str):
    if not is_valid_reddit_url(purl):
        return "none"
    
    try:
        post = reddit.submission(url=purl)
        comments = [comment.body for comment in post.comments]
        return comments if comments else "none"
    except praw.exceptions.PRAWException:
        return "none"


In [7]:
url="https://www.reddit.com/r/taxpros/comments/1eocu8b/hurricane_debby_federal_filing_and_payment_date/"
post_comment_extractor(url)

['3rd straight year, ugh',
 'Guess I have a bit of leeway now if my clients are lazy again with Hillsborough/Tampa.',
 'Same here in South Carolina.  Fuck this has gotten old.']

In [8]:
from pydantic import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
import json

class GradeDocument(BaseModel):
    """Used to Grade the relevance of the post with the instructions"""
    Post_relevance: str = Field(description="Check if the reddit post is discussing a pain point or problem related to finance or tax, Give a one word answer either 'Yes' OR 'No' nothing else is accepted")
    Pain_point_status: str = Field(description="Determine the status of the pain point based on the post and comments. Give one of these answers: 'Valid yet not solved', 'Valid yet solved', 'Inconclusive'")

relevant_docs_parser = PydanticOutputParser(pydantic_object=GradeDocument)

structure_grade_llm = groq_llm.with_structured_output(GradeDocument)

def document_grade_retrieval(retrieved_reddit_post: str, retrieved_reddit_comments: str):
    try:
        relevant_post_system_prompt = """You are an AI assistant tasked with analyzing Reddit posts and comments from finance and investing categories. Your job is to determine if the content discusses a pain point or problem related to finance or tax, and then analyze the community's response to this issue.

Consider the following when making your assessment:
1. Does the post describe a challenge, difficulty, or frustration in finance or tax-related tasks?
2. Is the user seeking advice or solutions for a finance or tax problem?
3. Does the content express dissatisfaction with current finance or tax processes?

Then, analyze the comments to determine:
- Do other users agree with or relate to the problem?
- Are users complaining about the same or similar issues?
- Has anyone proposed a solution to the problem?
- If solutions are proposed, do they seem feasible and well-received by others?

Provide two outputs:
1. Post_relevance: Answer "Yes" if the post discusses a finance or tax-related pain point, or "No" if it does not.
2. Pain_point_status: Provide one of the following based on your analysis of both the post and comments:
   - "Valid yet not solved" if the pain point is acknowledged but no satisfactory solutions are proposed.
   - "Valid yet solved" if the pain point is acknowledged and feasible solutions are proposed and well-received.
   - "Inconclusive" if there's disagreement or not enough information to determine community consensus.
   - A brief custom description if none of the above categories fit.

Only provide the required outputs without any additional explanation.
        """

        relevant_post = """
        Reddit Post:
        {reddit_post}

        \n

        Reddit Comments:
        {reddit_comments}
        
        \n 
        
        Formatted_Output:
        {format_instructions}
        """

        docs_relevancy_prompt_template = ChatPromptTemplate.from_messages([
            SystemMessagePromptTemplate.from_template(relevant_post_system_prompt),
            HumanMessagePromptTemplate.from_template(relevant_post)
        ])

        formatted_docs_relevant = docs_relevancy_prompt_template.format_messages(
            reddit_post=retrieved_reddit_post,
            reddit_comments=retrieved_reddit_comments,
            format_instructions=relevant_docs_parser.get_format_instructions()
        )

        relevant_docs_response = structure_grade_llm.invoke(formatted_docs_relevant)

        if isinstance(relevant_docs_response, dict):
            document_relevance = relevant_docs_response.get('Post_relevance', 'Unknown')
            pain_point_status = relevant_docs_response.get('Pain_point_status', 'Unknown')
            print("Post_relevance:", document_relevance)
            print("Pain_point_status:", pain_point_status)
            return document_relevance, pain_point_status
        elif isinstance(relevant_docs_response, GradeDocument):
            document_relevance = relevant_docs_response.Post_relevance
            pain_point_status = relevant_docs_response.Pain_point_status
            print("Post_relevance:", document_relevance)
            print("Pain_point_status:", pain_point_status)
            return document_relevance, pain_point_status
        else:
            print(f"Unexpected result type: {type(relevant_docs_response)}")
            return 'Unknown', 'Unknown'
    except Exception as e:
        print(f"Error in grading Document: {str(e)}")
        return None, None



In [9]:
class ProblemSolvable(BaseModel):
    """Used to determine if the problem in the post can be solved using AI and automation technologies"""
    Solvable: str = Field(description="Check if the finance or tax problem in the reddit post can be solved or significantly improved using AI and automation technologies. Give a one word answer either 'Yes' OR 'No'.")

solvable_docs_parser = PydanticOutputParser(pydantic_object=ProblemSolvable)

structure_solvable_llm = groq_llm.with_structured_output(ProblemSolvable)

def document_solvable_retrieval(retrieved_reddit_post: str, retrieved_reddit_comments: str):
    try:

        solvable_post_system_prompt = """
        You are an AI assistant specializing in identifying finance and tax-related problems that can be solved using advanced AI technologies and automation. Your task is to analyze Reddit posts and comments that have been identified as discussing finance or tax pain points.

        Consider if the problem can be addressed using the following technologies:
        1. AI Agents: Large language models (LLMs) that can perform tasks, make decisions, and interact with users or other systems.
        2. Large Language Models (LLMs): Advanced AI models trained on vast amounts of text data, capable of understanding and generating human-like text.
        3. Automation platforms like Make.com: Tools for creating complex, multi-step workflows connecting various apps and services.
        4. Machine Learning: Techniques for predictive analytics, pattern recognition, or data-driven insights.

        When assessing the pain point, consider:
        - Can AI agents powered by LLMs understand and address the core issue?
        - Could automation workflows in Make.com streamline or solve the problem?
        - Is the pain point solvable through a combination of AI and automation technologies?
        - Could these AI and automation technologies further improve the solution if machine learning is currently being used?

        Provide a binary grade:
        - "Yes" if the pain point can potentially be solved or significantly improved using the described AI and automation technologies.
        - "No" if the pain point is not suitable for solution by these technologies.

        Only provide the binary response without any explanation.
        """

        solvable_post = """

        Reddit Post: 
        {reddit_post}

        \n 

        Redit Comments:
        {reddit_comments}

        \n 

        Formatted_Output:
        {format_instructions}

        """

        docs_solvable_prompt_template = ChatPromptTemplate.from_messages([
            SystemMessagePromptTemplate.from_template(solvable_post_system_prompt),
            HumanMessagePromptTemplate.from_template(solvable_post)
        ])

        formatted_docs_solvable = docs_solvable_prompt_template.format_messages(
            reddit_post=retrieved_reddit_post,
            reddit_comments=retrieved_reddit_comments,
            format_instructions=solvable_docs_parser.get_format_instructions()
        )

        solvable_docs_response = structure_solvable_llm.invoke(formatted_docs_solvable)

        if isinstance(solvable_docs_response, dict):
            document_solvable = solvable_docs_response.get('Solvable', 'Unknown')
            print("Solvable:", document_solvable)
            return document_solvable
        elif isinstance(solvable_docs_response, ProblemSolvable):
            document_solvable = solvable_docs_response.Solvable
            print("Solvable:", document_solvable)
            return document_solvable
        else:
            print(f"Unexpected result type: {type(solvable_docs_response)}")
            return 'Unknown'
    except Exception as e:
        print(f"Problem cannot be solved: {str(e)}")
        return None
    
    
    

In [10]:
import datetime
import json

def generate_solution_plan(retrieved_reddit_post: str, retrieved_reddit_comments: str):
    try:
        solution_plan_system_prompt = """
        You are an AI assistant specializing in identifying finance and tax-related problems that can be solved using advanced AI technologies and automation. Your task is to analyze Reddit posts and comments that have been identified as discussing finance or tax pain points and generate a solution plan for the finance and tax problems being disscused using the following technologies:

        1. AI Agents: Large language models (LLMs) that can perform tasks, make decisions, and interact with users or other systems.
        2. Large Language Models (LLMs): Advanced AI models trained on vast amounts of text data, capable of understanding and generating human-like text.
        3. Automation platforms like Make.com: Tools for creating complex, multi-step workflows connecting various apps and services.
        4. Machine Learning: Techniques for predictive analytics, pattern recognition, or data-driven insights.


        When generating the solution plan, consider:
        - How can the pain point be addressed using the mentioned technologies?
        - What is the timeline for implementing the solution plan?
        - What is the sequence of steps to achieve the solution?

        Output Format!!! :

        1- Solution Plan: A Detailed explanation of how the pain point can be addressed using the mentioned technologies.
        2- Timeline: An estimated timeline for implementing the solution plan.
        3- Solution Map: A sequence of steps to achieve the solution.
        """

        solution_plan_prompt = """

        Reddit Post:
        {reddit_post}

        \n 

        Reddit Comments:
        {reddit_comments}

        """

        docs_solution_plan_prompt_template = ChatPromptTemplate.from_messages([
            SystemMessagePromptTemplate.from_template(solution_plan_system_prompt),
            HumanMessagePromptTemplate.from_template(solution_plan_prompt)
        ])

        formatted_docs_solution_plan = docs_solution_plan_prompt_template.format_messages(
            reddit_post=retrieved_reddit_post , 
            reddit_comments=retrieved_reddit_comments
        )

        solution_plan_response = groq_llm.invoke(formatted_docs_solution_plan)

        return solution_plan_response
    except Exception as e:
        print(f"Error in generating solution plan: {str(e)}")
        return None

def write_to_readme(post_content, url, solution_plans):
    today = datetime.date.today()
    file_name = f"README_{today}.md"

    try:
        with open(file_name, "a") as f:
            f.write(f"## Solution Plan for {today}\n\n")
            f.write(f"### Post Content:\n{post_content}\n\n")
            f.write(f"### URL:\n{url}\n\n")
            f.write("### Solution Plan:\n")
            if isinstance(solution_plans, dict):
                f.write(json.dumps(solution_plans, indent=2))
            else:
                f.write(str(solution_plans))
            f.write("\n\n" + "-"*100 + "\n\n")
    except Exception as e:
        print(f"Error in writing to README file: {str(e)}")



In [11]:
for doc in documents2:
    title = (doc.metadata['post_title'])
    category = (doc.metadata['post_subreddit'])
    reddit_post_content = (doc.page_content)
    url = doc.metadata['post_url']
    dash_line = ("-"*20)
    post_score= (doc.metadata['post_score'])

    print(post_score)

251
59
8
1
7
4
1
8
3
23
7
1
1
3
8
3
8
22
5
4
20
33
2
19
3
1
0
1
2
3
1
2
0
0
1
0
1
0
0
0
1
324
43
925
38
1771
31
201
6
79
328
45
4
932
40
1770
34
5
202
6


In [12]:
for doc in documents2:
    title = (doc.metadata['post_title'])
    category = (doc.metadata['post_subreddit'])
    reddit_post_content = (doc.page_content)
    url = doc.metadata['post_url']
    dash_line = ("-"*20)
    post_score= (doc.metadata['post_score'])

    if (post_score>10) and (reddit_post_content is not None):

        reddit_comments = post_comment_extractor(url)
        
        if reddit_comments != "none":
            document_grading_check, pain_point_status = document_grade_retrieval(reddit_post_content, reddit_comments)
            
            if (document_grading_check.lower() == "yes") and (pain_point_status.lower() == "valid yet not solved"):
                solvable_problem = document_solvable_retrieval(reddit_post_content, reddit_comments)
                if solvable_problem.lower() == "yes":
                    solution_plan = generate_solution_plan(reddit_post_content, reddit_comments)
                    solution_plan_extracted = ("SOLUTION IS :" + "\n\n" + solution_plan.content)
                    write_to_readme(doc.page_content, doc.metadata['post_url'], solution_plan_extracted)
            else:
                print("Problem not solvable")
        else:
            print("No valid comments found for this post")
    else:
        print("Post score is too low")

No valid comments found for this post
Post_relevance: Yes
Pain_point_status: Valid yet not solved
Solvable: Yes
Post score is too low
Post score is too low
Post score is too low
Post score is too low
Post score is too low
Post score is too low
Post score is too low
Post_relevance: Yes
Pain_point_status: Valid yet not solved
Solvable: Yes
Post score is too low
Post score is too low
Post score is too low
Post score is too low
Post score is too low
Post score is too low
Post score is too low
Post_relevance: Yes
Pain_point_status: Valid yet not solved
Solvable: Yes
Post score is too low
Post score is too low
Post_relevance: Yes
Pain_point_status: Inconclusive
Problem not solvable
Post_relevance: Yes
Pain_point_status: Valid yet not solved
Solvable: Yes
Post score is too low
Post_relevance: Yes
Pain_point_status: Valid yet not solved
Solvable: Yes
Post score is too low
Post score is too low
Post score is too low
Post score is too low
Post score is too low
Post score is too low
Post score is