In [1]:
import os
import re
import yaml
import json
import ast
import praw
from crewai import Agent, Task, Crew, Process
from langchain_openai import ChatOpenAI

from textwrap import dedent
from reddit_helper import *    

from langchain.llms import OpenAI, Ollama
from langchain_openai import ChatOpenAI

from pydantic import BaseModel

* 'allow_population_by_field_name' has been renamed to 'populate_by_name'
* 'smart_union' has been removed


In [2]:
api_file_path = 'api_keys.json'
with open(api_file_path, 'r') as file:
    api_keys = json.load(file)        
openai_gpt35 = ChatOpenAI(model_name="gpt-3.5-turbo", 
                                temperature=0.7,
                                openai_api_key=api_keys['openai'])
openai_gpt4 = ChatOpenAI(model_name="gpt-4", 
                                temperature=0.7,
                                openai_api_key=api_keys['openai'])        
llama31_8b = Ollama(model='llama3.1:8b',)
gemma2_9b = Ollama(model='gemma2:9b',)

In [3]:
cfg_file_path = 'casaai_config.yaml'
with open(cfg_file_path, 'r') as yaml_file:
    cfg = yaml.safe_load(yaml_file)
product_long = cfg.get('product_long_description', '')      
product_short = cfg.get('product_short_description', '')

In [4]:
def re_change(input_string):

    pattern = r"'comment_id':\s*'([^']*)',\s*'relevance_score':\s*(\d+)"

    # Use re.findall to extract the values
    matches = re.findall(pattern, input_string)

    if matches:
        comment_id, relevance_score = matches[0]
    else:
        comment_id = "na"
        relevance_score = 0

    pattern = r"\s*'justification':\s*'''(.*?)'''"  
    matches = re.findall(pattern, input_string)
    if matches:
        justification= matches[0]
    else:
        justification = "na" 
    
    return comment_id, relevance_score, justification

In [5]:
backstory = "You are a content analyst with expertise in analyzing web content and \
             extracting relevant information. You are responsible for ensuring that \
             content is relevant, high-quality, and aligned with the marketing of \
             {product_short}. "
content_analysis_agent = Agent(
                            role="Content Analyst",
                            goal="Analyze web content and extract relevant information",
                            backstory=backstory,
                            allow_delegation=False,
                            verbose=True,
                            llm=gemma2_9b,
                            )

In [6]:
backstory = "You evaluates content by analyzing user interactions, such as likes, \
            shares, comments, and views. You should also consider factors such as \
            user behavior/sentiment."
goal = "Accurately assess the impact and effectiveness of content based on user interactions"
engagement_analysis_agent = Agent(
                            role="Engagement Analyst",
                            goal=goal,
                            backstory=backstory,
                            allow_delegation=False,
                            verbose=True,
                            llm=llama31_8b,
                            )

In [7]:
backstory = "You evaluates content by analyzing its alignment with {product_short}. \
            You should also consider factors such as keyword density, context \
            accuracy, and user intent. You identifies content that effectively \
            meets audience expectations, flags irrelevant material, and \
            provides insights to enhance content targeting."
goal = "Ensure that content is highly pertinent and aligned with the intended topics and audience needs"
relevance_analysis_agent = Agent(
                            role="Relevance Analyst",
                            goal=goal,
                            backstory=backstory,
                            allow_delegation=False,
                            verbose=True,
                            llm=llama31_8b,
                        )

In [15]:
backstory = "Responsible for synthesizing and evaluating the combined outputs from the \
                Content Analysis, Engagement Analysis, and Relevance Analysis agents. You \
                ensures all aspects of content—quality, engagement, and relevance—are \
                harmonized and aligns with the marketing of {product_short}."
goal = "Ensure content is relevant, engaging, and strategically aligned to marketing of product"
content_review_agent = Agent(
                            role="Content Review Supervisor",
                            goal=goal,
                            backstory=backstory,
                            allow_delegation=False,
                            verbose=True,
                            llm=gemma2_9b,
                        )

In [16]:
class ScoreOutput(BaseModel):
    comment_id: str
    score: float
    justification: str

In [17]:
tip_text = "If you do your BEST WORK, I'll give you a $10,000 commission!"

In [18]:
output_format = "[{'comment_id': 'k31npnk', 'relevance_score': 10, 'justification': 'Perfect fit for marketing CasaAI's product'}, \
                    {'comment_id': 'm23npns', 'relevance_score': 5, 'justification': '''Talking about gardening'''},....\
                   ]"

In [19]:
descr = "{comment_id} is the id of the comment that you need to provide score and justification. You will also be \
provided data belonging to this comment and also its parent comments. You will get these data in {input_data}. Analyze this \
and determine relevance of {comment_id} based on identified keywords and phrases w.r.to the marketing of {product_long}. Then \
provide a score out of 10 for {comment_id} with justification. {tip_text}"

expected_out = "{comment_id}, its content_fitment score and a brief justification (less than 15 words) \
                explaining the rationale behind the score" 
        
content_analysis_task = Task(
                            description=descr,
                            expected_output=expected_out,
                            output_json=ScoreOutput,
                            agent=content_analysis_agent,
                            )

In [20]:
descr = "{comment_id} is the id of the comment that you need to provide score and justification. You will also be \
provided data belonging to this comment and also its parent comments. You will get these data in {input_data}. Evaluate \
the level of user interaction in {comment_id} by analyzing the provided content. This includes analyzing metrics such as \
likes, shares, comments, and views.Then provide an engagement score out of 10 for {comment_id} with justification. {tip_text}"

expected_out = "{comment_id}, its engagement score and a brief justification (less than 15 words) \
                explaining the rationale behind the score" 

engagement_analysis_task =  Task(
                                description=descr,
                                expected_output=expected_out,
                                output_json=ScoreOutput,
                                agent=engagement_analysis_agent,
                                )

In [21]:
descr = "{comment_id} is the id of the comment that you need to provide score and justification. You will also be \
provided data belonging to this comment and also its parent comments. You will get these data in {input_data}. Assess \
how well the {comment_id} aligns with {product_long} by analyzing the provided content. The goal is to assign a \
relevance score out of 10 that reflects the content’s pertinence to its intended audience and its  \
alignment with the product that is marketed. {tip_text}"        

expected_out = "{comment_id}, its relevance score and a brief justification (less than 15 words) \
                explaining the rationale behind the score" 
        
relevance_analysis_task = Task(
                                description=descr,
                                expected_output=expected_out,
                                output_json=ScoreOutput,    
                                agent=relevance_analysis_agent,
                                )

In [22]:
descr = "{comment_id} is the id of the comment that you need to provide a final score and justification. You will get \
outputs from Content Analysis, Engagement Analysis, and Relevance Analysis agents for {comment_id}. You will also be \
provided data belonging to this comment and also its parent comments. You will get these data in {input_data}. You will \
review all these content and provide a final score for {comment_id} based on the relevance to the marketing of \
{product_long}. {tip_text}"        
       
expected_out = "{comment_id}, its relevance score and a brief justification (less than 15 words) \
                explaining the rationale behind the score" 
        
final_scoring_task = Task(
            description=descr,
            expected_output=expected_out,
            output_json=ScoreOutput, 
            agent=content_review_agent,
        )  

In [23]:
content_analysis_crew = Crew(
    agents=[content_analysis_agent,],
    tasks=[content_analysis_task, ],
    verbose=True,
)

engagement_analysis_crew = Crew(
    agents=[engagement_analysis_agent,],
    tasks=[engagement_analysis_task,],
    verbose=True,
)

relevance_analysis_crew = Crew(
    agents=[relevance_analysis_agent,],
    tasks=[relevance_analysis_task,],
    verbose=True,
)

content_review_crew = Crew(
    agents=[content_review_agent,],
    tasks=[final_scoring_task,],
    verbose=True,
)

response_creation_crew = Crew(
    agents=[content_analysis_agent, engagement_analysis_agent, relevance_analysis_agent, content_review_agent,],
    tasks=[content_analysis_task, engagement_analysis_task, relevance_analysis_task, final_scoring_task,],
    verbose=True,
)

# response_creation_mgr_crew = Crew(
#     agents=[content_analysis_agent, engagement_analysis_agent, relevance_analysis_agent,],
#     tasks=[content_analysis_task, engagement_analysis_task, relevance_analysis_task,],
#     verbose=True,
#     manager_llm=gemma2_9b,
#     process=Process.hierarchical,
# )



In [93]:
# response_creation_crew = Crew(
#     agents=[content_analysis_agent, engagement_analysis_agent, relevance_analysis_agent,],
#     tasks=[content_analysis_task, engagement_analysis_task, relevance_analysis_task,],
#     verbose=True,
#     manager_llm=gemma2_9b,
#     process=Process.hierarchical,
# )

In [24]:
reddit_posts, reddit_post_ids = fetch_reddit_test()
condensed_reddit_data, unique_post_ids, unique_comment_ids = condense_data(reddit_posts, reddit_post_ids)

post_cnt: 0 + 10 = 10
comm_cnt: 0 + 560 = 560
cond_cnt: 570 = 10 + 560
Cross_ck: 10 = 10


In [25]:
comment_dict = {}
tot_data = 0
for item in condensed_reddit_data:
    for data in item:
        tot_data += 1
        c_id = data['comment_id']
        p_id = data['parent_id']
        text = data['text']
        comment_dict[c_id] = {'comment_id': c_id, 'parent_id': p_id, 'text': text}
print(len(comment_dict), '=', tot_data)

570 = 570


In [26]:
reddit_posts.keys()

dict_keys(['InteriorDesign'])

In [35]:
print(reddit_posts['InteriorDesign'].keys())
print(len(reddit_posts['InteriorDesign']['Room Style Transformation']))
print(len(condensed_reddit_data))
# condensed_reddit_data_ltd = condensed_reddit_data[-2:]
# print(len(condensed_reddit_data_ltd), len(condensed_reddit_data_ltd[0]), len(condensed_reddit_data_ltd[1]))

dict_keys(['Room Style Transformation'])
10
10


In [28]:
def get_data_details(comm_id):
    done = 0
    data_details = []
    while done==0:
        if comm_id in comment_dict:
            details = comment_dict[comm_id]
            data_details.append(details)
            comm_id = details['parent_id']
        else:
            done = 1
    return data_details

In [43]:
lst1 = []
lst2 = [1, 2,3]
lst3= [ 4, 5]
lst1.append(lst2)
lst3.append(6)
lst1.append(lst3)
lst1

[[1, 2, 3], [4, 5, 6]]

In [44]:
score_result_lst = []
for idx, reddit_data_item in enumerate(condensed_reddit_data):
    comment_lst = []
    for idx2, comment_data in enumerate(reddit_data_item):        
        comm_id = comment_data['comment_id']        
        print(f'STARTING {idx} - {idx2} - {comm_id}')
        data_details = get_data_details(comm_id)      
        input_dict = {"comment_id": comm_id,
                      "input_data": data_details,
                      "product_long": product_long,
                      "product_short": product_short,
                      "tip_text":tip_text,
                      "output_format":output_format}
        scoring_result = response_creation_crew.kickoff(inputs=input_dict)
        json_out = ast.literal_eval(scoring_result.json)
        comment_lst.append(json_out)
    score_result_lst.append(comment_lst)

STARTING 0 - 0 - kgesxn
[1m[95m [2024-08-19 07:51:02][DEBUG]: == Working Agent: Content Analyst[00m
[1m[95m [2024-08-19 07:51:02][INFO]: == Starting Task: kgesxn is the id of the comment that you need to provide score and justification. You will also be provided data belonging to this comment and also its parent comments. You will get these data in [{'comment_id': 'kgesxn', 'parent_id': None, 'text': 'Our newly renovated living room! This room has seen a huge transformation! If you’d like to see more then follow our Instagram @houseroundthebend'}]. Analyze this and determine relevance of kgesxn based on identified keywords and phrases w.r.to the marketing of CasaAI - next generation tool for transforming interior spaces is designed for homeowners, interior designers, and real estate professionals. This app uses advanced AI to reimagine any room based on style and functional needs of user.. Then provide a score out of 10 for kgesxn with justification. If you do your BEST WORK, I'll

ValueError: malformed node or string on line 1: <ast.Name object at 0x7ff8bc1e59f0>

In [49]:
scoring_result.raw

'kq75xk, 1/10 - Comment discusses irrelevant subreddit recommendations. Justification: Low engagement due to lack of relevance to AI-powered tools like CasaAI.'

In [45]:
len(score_result_lst), len(condensed_reddit_data), len(condensed_reddit_data_ltd[0]), len(condensed_reddit_data[-1])

(5, 10, 2, 35)

In [47]:
len(score_result_lst[0])

170

In [39]:
score_result_lst[-1]

{'comment_id': 'gtwa7hf',
 'score': 2.0,
 'justification': 'Comment focuses on Airbnb spending habits, not home design or AI transformation.'}