In [1]:
import os
import re
import yaml
import json
import ast
import praw
from crewai import Agent, Task, Crew, Process
from langchain_openai import ChatOpenAI

* 'allow_population_by_field_name' has been renamed to 'populate_by_name'
* 'smart_union' has been removed


In [2]:
from textwrap import dedent
from reddit_helper import *    

from langchain.llms import OpenAI, Ollama
from langchain_openai import ChatOpenAI

from pydantic import BaseModel

In [42]:
api_file_path = 'api_keys.json'
with open(api_file_path, 'r') as file:
    api_keys = json.load(file)        
openai_gpt35 = ChatOpenAI(model_name="gpt-3.5-turbo", 
                                temperature=0.7,
                                openai_api_key=api_keys['openai'])
openai_gpt4 = ChatOpenAI(model_name="gpt-4", 
                                temperature=0.7,
                                openai_api_key=api_keys['openai'])        
llama31_8b = Ollama(model='llama3.1:8b',)
gemma2_9b = Ollama(model='gemma2:9b',)

In [4]:
cfg_file_path = 'casaai_config.yaml'
with open(cfg_file_path, 'r') as yaml_file:
    cfg = yaml.safe_load(yaml_file)
product_long = cfg.get('product_long_description', '')      
product_short = cfg.get('product_short_description', '')

In [5]:
def re_change(input_string):

    pattern = r"'comment_id':\s*'([^']*)',\s*'relevance_score':\s*(\d+)"

    # Use re.findall to extract the values
    matches = re.findall(pattern, input_string)

    if matches:
        comment_id, relevance_score = matches[0]
    else:
        comment_id = "na"
        relevance_score = 0

    pattern = r"\s*'justification':\s*'''(.*?)'''"  
    matches = re.findall(pattern, input_string)
    if matches:
        justification= matches[0]
    else:
        justification = "na" 
    
    return comment_id, relevance_score, justification

In [138]:
backstory = "You are a content analyst with expertise in analyzing web content and \
             extracting relevant information. You are responsible for ensuring that \
             content is relevant, high-quality, and aligned with the marketing of \
             {product_short}. "
content_analysis_agent = Agent(
                            role="Content Analyst",
                            goal="Analyze web content and extract relevant information",
                            backstory=backstory,
                            allow_delegation=False,
                            verbose=True,
                            llm=gemma2_9b,
                            )

In [7]:
backstory = "You evaluates content by analyzing user interactions, such as likes, \
            shares, comments, and views. You should also consider factors such as \
            user behavior/sentiment."
goal = "Accurately assess the impact and effectiveness of content based on user interactions"
engagement_analysis_agent = Agent(
                            role="Engagement Analyst",
                            goal=goal,
                            backstory=backstory,
                            allow_delegation=False,
                            verbose=True,
                            llm=llama31_8b,
                            )

In [8]:
backstory = "You evaluates content by analyzing its alignment with {product_short}. \
            You should also consider factors such as keyword density, context \
            accuracy, and user intent. You identifies content that effectively \
            meets audience expectations, flags irrelevant material, and \
            provides insights to enhance content targeting."
goal = "Ensure that content is highly pertinent and aligned with the intended topics and audience needs"
relevance_analysis_agent = Agent(
                            role="Relevance Analyst",
                            goal=goal,
                            backstory=backstory,
                            allow_delegation=False,
                            verbose=True,
                            llm=llama31_8b,
                        )

In [9]:
backstory = "Responsible for synthesizing and evaluating the combined outputs from the \
                Content Analysis, Engagement Analysis, and Relevance Analysis agents. You \
                ensures all aspects of content—quality, engagement, and relevance—are \
                harmonized and aligns with the marketing of {product_short}."
goal = "Ensure content is relevant, engaging, and strategically aligned to marketing of product"
content_review_agent = Agent(
                            role="Content Review Supervisor",
                            goal=goal,
                            backstory=backstory,
                            allow_delegation=False,
                            verbose=True,
                            llm=gemma2_9b,
                        )

In [132]:
class ScoreOutput(BaseModel):
    comment_id: str
    score: float
    justification: str

In [133]:
tip_text = "If you do your BEST WORK, I'll give you a $10,000 commission!"

In [130]:
output_format = "[{'comment_id': 'k31npnk', 'relevance_score': 10, 'justification': 'Perfect fit for marketing CasaAI's product'}, \
                    {'comment_id': 'm23npns', 'relevance_score': 5, 'justification': '''Talking about gardening'''},....\
                   ]"

In [139]:
descr = "{comment_id} is the id of the comment that you need to provide score and justification. You will also be \
provided data belonging to this comment and also its parent comments. You will get these data in {input_data}. Analyze this \
and determine relevance of {comment_id} based on identified keywords and phrases w.r.to the marketing of {product_long}. Then \
provide a score out of 10 for {comment_id} with justification. {tip_text}"

# expected_out = "{output_format} that has comment_id, relevance score, and a brief justification (less than 15 words) starting \
#                 in triple quotes and ending in triple quotes explaining the rationale behind the score for each post and its \
#                 associated comments." 

expected_out = "{comment_id}, its relevance score and a brief justification (less than 15 words) \
                explaining the rationale behind the score" 
        
content_analysis_task = Task(
                            description=descr,
                            expected_output=expected_out,
                            output_json=ScoreOutput,
                            agent=content_analysis_agent,
                            )

In [86]:
descr = "Evaluating the level of user interaction with the provided content from {input_data}. This includes\
analyzing metrics such as likes, shares, comments, and views to calculate an overall engagement score out of 10. \
For example, if there are 118 comments in input \
then there needs to scores & justification for each of the 118 comments. {tip_text}"       

expected_out = "{output_format} that has comment_id, relevance score, and a brief justification (less than 15 words) starting \
                in triple quotes and ending in triple quotes explaining the rationale behind the score for each post and its \
                associated comments." 

engagement_analysis_task =  Task(
                                description=descr,
                                expected_output=expected_out,
                                agent=engagement_analysis_agent,
                                )

In [87]:
descr = "Assess how well the content in {input_data} aligns with {product_long}. The goal is to assign a \
relevance score out of 10 that reflects the content’s pertinence to its intended audience and its  \
alignment with the product that is marketed. For example, if there are 118 comments in input \
then there needs to scores & justification for each of the 118 comments. {tip_text}"        

expected_out = "{output_format} that has comment_id, relevance score, and a brief justification (less than 15 words) starting \
                in triple quotes and ending in triple quotes explaining the rationale behind the score for each post and its \
                associated comments."
        
relevance_analysis_task = Task(
                                description=descr,
                                expected_output=expected_out,
                                agent=relevance_analysis_agent,
                                )

In [88]:
descr = "You will get outputs from Content Analysis, Engagement Analysis, and Relevance Analysis agents. \
You will also get content from {input_data}. You will review the outputs from these agents and the content \
and provide a final score for each post and comment based on the relevance to the marketing of \
{product_long}. For example, if there are 118 comments in input \
then there needs to scores & justification for each of the 118 comments. {tip_text}"        
       
expected_out = "{output_format} that has comment_id, relevance score, and a brief justification (less than 15 words) starting \
                in triple quotes and ending in triple quotes explaining the rationale behind the score for each post and its \
                associated comments." 
        
final_scoring_task = Task(
            description=descr,
            expected_output=expected_out,
            agent=content_review_agent,
        )  

In [89]:
# backstory = "You will be given a python string. You should convert it to a JSON format."
# goal = "Convert the string which is a list of python dictionaries to json format"
# json_convert_agent = Agent(
#                             role="Json converted",
#                             goal=goal,
#                             backstory=backstory,
#                             allow_delegation=False,
#                             verbose=True,
#                             llm=llama31_8b,
#                         )

In [90]:
# descr = "Input is a python string of format - {output_format}. This is a string of python list that holds python \
# dictionaries whose keys are comment_id, relevance score, and justification. You need to convert this to a json \
# format. Your input is {input_data_2}. {tip_text}."     
# expected_out = "Json that has comment_id, relevance score, and justification as keys"         
# json_convert_task = Task(
#             description=descr,
#             expected_output=expected_out,
#             output_json=ScoreOutput,
#             agent=json_convert_agent,
#         )  

In [140]:
content_analysis_crew = Crew(
    agents=[content_analysis_agent,],
    tasks=[content_analysis_task, ],
    verbose=True,
)

engagement_analysis_crew = Crew(
    agents=[engagement_analysis_agent,],
    tasks=[engagement_analysis_task,],
    verbose=True,
)

relevance_analysis_crew = Crew(
    agents=[relevance_analysis_agent,],
    tasks=[relevance_analysis_task,],
    verbose=True,
)

content_review_crew = Crew(
    agents=[content_review_agent,],
    tasks=[final_scoring_task,],
    verbose=True,
)




In [92]:
# json_creation_crew = Crew(
#     agents=[json_convert_agent,],
#     tasks=[json_convert_task,],
#     verbose=True,
# )

In [93]:
# response_creation_crew = Crew(
#     agents=[content_analysis_agent, engagement_analysis_agent, relevance_analysis_agent,],
#     tasks=[content_analysis_task, engagement_analysis_task, relevance_analysis_task,],
#     verbose=True,
#     manager_llm=gemma2_9b,
#     process=Process.hierarchical,
# )

In [94]:
reddit_posts, reddit_post_ids = fetch_reddit_test()
condensed_reddit_data, unique_post_ids, unique_comment_ids = condense_data(reddit_posts, reddit_post_ids)

subreddit_name : InteriorDesign
keyword : Room Style Transformation
post_cnt: 0 + 10 = 10
comm_cnt: 0 + 560 = 560
cond_cnt: 570 = 10 + 560
Cross_ck: 10 = 10


In [95]:
reddit_posts.keys()

dict_keys(['InteriorDesign'])

In [96]:
print(reddit_posts['InteriorDesign'].keys())
print(len(reddit_posts['InteriorDesign']['Room Style Transformation']))
print(len(condensed_reddit_data))
condensed_reddit_data_ltd = condensed_reddit_data[:2]
print(len(condensed_reddit_data_ltd), len(condensed_reddit_data_ltd[0]), len(condensed_reddit_data_ltd[1]))

dict_keys(['Room Style Transformation'])
10
10
2 170 109


In [123]:
comment_dict = {}
tot_data = 0
for item in condensed_reddit_data_ltd:
    for data in item:
        tot_data += 1
        c_id = data['comment_id']
        p_id = data['parent_id']
        text = data['text']
        comment_dict[c_id] = {'comment_id': c_id, 'parent_id': p_id, 'text': text}
print(len(comment_dict), '=', tot_data)

279 = 279


In [124]:
def get_data_details(comm_id):
    done = 0
    data_details = []
    while done==0:
        if comm_id in comment_dict:
            details = comment_dict[comm_id]
            data_details.append(details)
            comm_id = details['parent_id']
        else:
            done = 1
    return data_details

In [None]:
score_result_lst = []
for idx, reddit_data_item in enumerate(condensed_reddit_data_ltd): 
    for comment_data in reddit_data_item:        
        comm_id = comment_data['comment_id']        
        data_details = get_data_details(comm_id)      
        input_dict = {"comment_id": comm_id,
                      "input_data": data_details,
                      "product_long": product_long,
                      "product_short": product_short,
                      "tip_text":tip_text,
                      "output_format":output_format}
        scoring_result = content_analysis_crew.kickoff(inputs=input_dict)
        json_out = ast.literal_eval(scoring_result.json)
        score_result_lst.append(json_out)

[1m[95m [2024-08-14 14:27:35][DEBUG]: == Working Agent: Content Analyst[00m
[1m[95m [2024-08-14 14:27:35][INFO]: == Starting Task: kgesxn is the id of the comment that you need to provide score and justification. You will also be provided data belonging to this comment and also its parent comments. You will get these data in [{'comment_id': 'kgesxn', 'parent_id': None, 'text': 'Our newly renovated living room! This room has seen a huge transformation! If you’d like to see more then follow our Instagram @houseroundthebend'}]. Analyze this and determine relevance of kgesxn based on identified keywords and phrases w.r.to the marketing of CasaAI - next generation tool for transforming interior spaces is designed for homeowners, interior designers, and real estate professionals. This app uses advanced AI to reimagine any room based on style and functional needs of user.. Then provide a score out of 10 for kgesxn with justification. If you do your BEST WORK, I'll give you a $10,000 comm

In [145]:
json_out = ast.literal_eval(scoring_result.json)
type(json_out)

dict

In [98]:
score_result_lst = []
for idx, data in enumerate(condensed_reddit_data_ltd): 
    print(idx, 'STARTED', len(data))
    input_dict = {"input_data": data,
                  "product_long": product_long,
                  "product_short": product_short,
                  "tip_text":tip_text,
                  "output_format":output_format}    
    scoring_result = content_analysis_crew.kickoff(inputs=input_dict)
    txt = scoring_result.raw
    lst = txt.split("},")
    print(f'len(lst): {len(lst)}')
    item_lst = []
    print(idx, 'WRITING TO DICT')
    for item in lst:
        item_dict = {}
        comment_id, score, justification = re_change(item)
        item_dict['comment_id'] = str(comment_id)
        item_dict['score'] = int(score)
        item_dict['justification'] = str(justification)        
        item_lst.append(item_dict)
    print(f'len(item_lst) : {len(item_lst)}')
    print(idx, 'APPENDING')
    score_result_lst.append(item_lst)    
    break

0 STARTED 170
[1m[95m [2024-08-14 12:19:00][DEBUG]: == Working Agent: Content Analyst[00m
[1m[95m [2024-08-14 12:19:00][INFO]: == Starting Task: Analyze posts and associated comments from [{'comment_id': 'kgesxn', 'parent_id': None, 'text': 'Our newly renovated living room! This room has seen a huge transformation! If you’d like to see more then follow our Instagram @houseroundthebend', 'author': 'skunkyblowburn', 'score': 1211}, {'comment_id': 'ggfdsvg', 'parent_id': 'kgesxn', 'text': 'It’s certainly bold', 'author': 'whoisNO', 'score': 157}, {'comment_id': 'ggfh2zn', 'parent_id': 'kgesxn', 'text': 'Gorgeous room - so interesting ! Is there a story behind the theatre seats? Are the wall paper and upholstery the same ? Such a non boring room thanks for sharing it !!', 'author': 'sd1212', 'score': 43}, {'comment_id': 'ggepn0p', 'parent_id': 'kgesxn', 'text': 'Massive respect for the style, not my cup of tea, but I love how well put together it looks.', 'author': 'Stevie19Y', 'score

In [99]:
scoring_result.json

'{"comment_id": "kgesxn", "score": 8.0}'

In [80]:
item_lst

[{'comment_id': 'kgesxn', 'score': 8, 'justification': 'na'},
 {'comment_id': 'ggfdsvg', 'score': 5, 'justification': 'na'},
 {'comment_id': 'ggfh2zn', 'score': 6, 'justification': 'na'},
 {'comment_id': 'ggepn0p', 'score': 7, 'justification': 'na'},
 {'comment_id': 'ggfd5lx', 'score': 2, 'justification': 'na'},
 {'comment_id': 'ggewh8g', 'score': 6, 'justification': 'na'},
 {'comment_id': 'ggffmc4', 'score': 3, 'justification': 'na'},
 {'comment_id': 'ggfqz4a', 'score': 5, 'justification': 'na'},
 {'comment_id': 'ggf874w', 'score': 4, 'justification': 'na'},
 {'comment_id': 'ggfb33x', 'score': 2, 'justification': 'na'},
 {'comment_id': 'ggg0jqv', 'score': 7, 'justification': 'na'},
 {'comment_id': 'ggflytk', 'score': 5, 'justification': 'na'},
 {'comment_id': 'ggejb04', 'score': 3, 'justification': 'na'},
 {'comment_id': 'ggfs2w1', 'score': 4, 'justification': 'na'},
 {'comment_id': 'ggfk0u3', 'score': 3, 'justification': 'na'},
 {'comment_id': 'ggfcojx', 'score': 2, 'justification': 

In [63]:
len(score_result_lst), len(score_result_lst[0])#, len(score_result_lst[1])

(1, 1)

In [81]:
len(condensed_reddit_data_ltd), len(condensed_reddit_data_ltd[0]), len(condensed_reddit_data_ltd[1])

(2, 170, 109)

In [64]:
score_result_lst[0]

[{'comment_id': 'kgesxn', 'score': 9, 'justification': 'na'}]

In [39]:
score_result_lst[1]

[{'comment_id': 'g2cjyqf',
  'score': 6,
  'justification': 'Mentioned CasaAI but not relevant to marketing'},
 {'comment_id': 'g2cjyda',
  'score': 1,
  'justification': 'Not related to CasaAI or interior design'},
 {'comment_id': 'g2dlhzg',
  'score': 1,
  'justification': 'Thank you message not relevant to marketing'},
 {'comment_id': 'g2bn34c',
  'score': 8,
  'justification': 'Discussed interior design and CasaAI'},
 {'comment_id': 'g2b5s7r',
  'score': 9,
  'justification': 'Validated the need for interior design and mentioned CasaAI'},
 {'comment_id': 'g2cdn2l',
  'score': 1,
  'justification': 'Asking about the meaning of "wall" not relevant to marketing'},
 {'comment_id': 'g2bm6mb',
  'score': 5,
  'justification': 'Mentioned balance but not directly related to CasaAI'},
 {'comment_id': 'g2bkdbq',
  'score': 3,
  'justification': 'Asking about the color of the wall not relevant to marketing'},
 {'comment_id': 'g2dp6nv',
  'score': 1,
  'justification': 'Me too message not rele

In [40]:
scoring_result.raw

'[\n {\'comment_id\': \'g2cjyqf\', \'relevance_score\': 6, \'justification\': \'\'\'Mentioned CasaAI but not relevant to marketing\'\'\'}, \n {\'comment_id\': \'g2cjyda\', \'relevance_score\': 1, \'justification\': \'\'\'Not related to CasaAI or interior design\'\'\'}, \n {\'comment_id\': \'g2dlhzg\', \'relevance_score\': 1, \'justification\': \'\'\'Thank you message not relevant to marketing\'\'\'}, \n {\'comment_id\': \'g2bn34c\', \'relevance_score\': 8, \'justification\': \'\'\'Discussed interior design and CasaAI\'\'\'}, \n {\'comment_id\': \'g2b5s7r\', \'relevance_score\': 9, \'justification\': \'\'\'Validated the need for interior design and mentioned CasaAI\'\'\'}, \n {\'comment_id\': \'g2cdn2l\', \'relevance_score\': 1, \'justification\': \'\'\'Asking about the meaning of "wall" not relevant to marketing\'\'\'}, \n {\'comment_id\': \'g2bm6mb\', \'relevance_score\': 5, \'justification\': \'\'\'Mentioned balance but not directly related to CasaAI\'\'\'}, \n {\'comment_id\': \'g2b