In [1]:
import os
import re
import random
import json
import uuid
import praw
import pathlib
import textwrap
import numpy as np
from IPython.display import display, Markdown, Latex
from pprint import pprint
import ast

from crewai import Agent, Task, Crew, Process
from crewai_tools import ScrapeWebsiteTool, SerperDevTool, WebsiteSearchTool, DirectoryReadTool, FileReadTool
from crewai_tools import BaseTool, MDXSearchTool
from pydantic import BaseModel

In [3]:
file_path = '/home/ubuntu/agents_ai/reddit_info_with_comments_postid_07242024b.json'
with open(file_path, 'r') as f:
    reddit_info = json.load(f)

In [4]:
posts_list = {}
i = 0
for key_primary, item_dict in reddit_info.items():
    for key_secondary, item_list in item_dict.items():
        for item in item_list:
            if item['Type'] == 'Post':
                i += 1
                posts_list[i] = item
print(f'***Posts found : {len(posts_list)}***')

***Posts found : 864***


In [5]:
casa_ai_descr = "Casa AI is the next generation tool for transforming interior spaces. Designed for homeowners, interior designers, \
and real estate professionals, our app uses advanced AI to reimagine any room based on your style and functional needs."

In [6]:
from langchain.llms import Ollama

In [8]:
llm = Ollama(model='llama3.1:8b')

In [9]:
post_ranker = Agent(
    role="Post Ranker",
    goal="Rank the top {n} posts that can be targeted for marketing of {product}",
    backstory="You will be given json data of reddit posts. You should"
               "identify the posts that will be relevant for the digital"
               "marketing of the {product}. You work is the basis for the topic identifier"
               "who will identify the topic of the top rated posts and comments.",
    allow_delegation=False,
	  verbose=True,
    llm=llm
)

In [10]:
ranked_post_format = "[1bzrhax','15tcfrx','14ueo0i'...]"

In [11]:
k = 10
ranking_task = Task(
    description=(
        "1. Prioritize the noteworthy posts from input "
        "   that will be relevant for marketing of {product}.\n"
        "2. Rank the top {n} posts.\n"
        "3. For output, include only the Rank calculated and 'Post_id' from json input.\n"
        "4. Input data to be used is {input_data}"
    ),
    expected_output="post_ids of top ranked posts in a python list format of {format}",
    agent=post_ranker)

In [12]:
crew = Crew(
    agents=[post_ranker],
    tasks=[ranking_task],
    verbose=2
)

In [13]:
result = crew.kickoff(inputs={"product": casa_ai_descr, "n": k, "input_data": posts_list, "format": ranked_post_format})

[1m[95m [2024-08-09 10:02:42][DEBUG]: == Working Agent: Post Ranker[00m
[1m[95m [2024-08-09 10:02:42][INFO]: == Starting Task: 1. Prioritize the noteworthy posts from input    that will be relevant for marketing of Casa AI is the next generation tool for transforming interior spaces. Designed for homeowners, interior designers, and real estate professionals, our app uses advanced AI to reimagine any room based on your style and functional needs..
2. Rank the top 10 posts.
3. For output, include only the Rank calculated and 'Post_id' from json input.


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3m[0m[32;1m[1;3mThought: I will use the correct approach this time.
Action: I will carefully read the problem and understand what is being asked. Then, I will identify the relevant information in the given text, which is a list of Reddit posts with their details. Finally, I will extract the post_ids of the top-ranked posts according to their scores.

Final Answer:
[ '14u

In [15]:
result.raw

"[ '14u2b4o', '15tcfrx', '14ueo0i', '1bzrhax' ]\n\nNote: The post_ids are in a python list format as required by the problem."

In [None]:
llm_llama31_70b = Ollama(model='llama3.1:70b')

In [None]:
post_ranker_70b = Agent(
    role="Post Ranker",
    goal="Rank the top {n} posts that can be targeted for marketing of {product}",
    backstory="You will be given json data of reddit posts. You should"
               "identify the posts that will be relevant for the digital"
               "marketing of the {product}. You work is the basis for the topic identifier"
               "who will identify the topic of the top rated posts and comments.",
    allow_delegation=False,
	  verbose=True,
    llm=llm
)

In [None]:
k = 10
ranking_task_70b = Task(
    description=(
        "1. Prioritize the noteworthy posts from input "
        "   that will be relevant for marketing of {product}.\n"
        "2. Rank the top {n} posts.\n"
        "3. For output, include only the Rank calculated and 'Post_id' from json input.\n"
        "4. Input data to be used is {input_data}"
    ),
    expected_output="post_ids of top ranked posts in a python list format of {format}",
    agent=post_ranker_70b)

In [None]:
crew_70b = Crew(
    agents=[post_ranker_70b],
    tasks=[ranking_task_70b],
    verbose=2
)

In [None]:
result_70b = crew.kickoff(inputs={"product": casa_ai_descr, "n": k, "input_data": posts_list, "format": ranked_post_format})