In [260]:
import os
import re
import yaml
import json
import ast
import praw
from crewai import Agent, Task, Crew, Process
from langchain_openai import ChatOpenAI

In [4]:
from textwrap import dedent
from reddit_helper import *    

from langchain.llms import OpenAI, Ollama
from langchain_openai import ChatOpenAI

from pydantic import BaseModel

In [5]:
api_file_path = 'api_keys.json'
with open(api_file_path, 'r') as file:
    api_keys = json.load(file)        
openai_gpt35 = ChatOpenAI(model_name="gpt-3.5-turbo", 
                                temperature=0.7,
                                openai_api_key=api_keys['openai'])
openai_gpt4 = ChatOpenAI(model_name="gpt-4", 
                                temperature=0.7,
                                openai_api_key=api_keys['openai'])        
llama31_8b = Ollama(model='llama3.1:8b',)
gemma2_9b = Ollama(model='gemma2:9b',)

In [6]:
cfg_file_path = 'casaai_config.yaml'
with open(cfg_file_path, 'r') as yaml_file:
    cfg = yaml.safe_load(yaml_file)
product_long = cfg.get('product_long_description', '')      
product_short = cfg.get('product_short_description', '')

In [266]:
def re_change(input_string):

    pattern = r"'comment_id':\s*'([^']*)',\s*'relevance_score':\s*(\d+)"

    # Use re.findall to extract the values
    matches = re.findall(pattern, input_string)

    if matches:
        comment_id, relevance_score = matches[0]
    else:
        comment_id = "na"
        relevance_score = 0

    pattern = r"\s*'justification':\s*'''(.*?)'''"  
    matches = re.findall(pattern, input_string)
    if matches:
        justification= matches[0]
    else:
        justification = "na" 
    
    return comment_id, relevance_score, justification

In [155]:
backstory = "You are a content analyst with expertise in analyzing web content and \
                extracting relevant information. You are responsible for ensuring that \
                content is relevant, high-quality, and aligned with the marketing of \
            {product_short}. "
content_analysis_agent = Agent(
                            role="Content Analyst",
                            goal="Analyze web content and extract relevant information",
                            backstory=backstory,
                            allow_delegation=False,
                            verbose=True,
                            llm=llama31_8b,
                            )

In [156]:
backstory = "You evaluates content by analyzing user interactions, such as likes, \
            shares, comments, and views. You should also consider factors such as \
            user behavior/sentiment."
goal = "Accurately assess the impact and effectiveness of content based on user interactions"
engagement_analysis_agent = Agent(
                            role="Engagement Analyst",
                            goal=goal,
                            backstory=backstory,
                            allow_delegation=False,
                            verbose=True,
                            llm=llama31_8b,
                            )


In [157]:
backstory = "You evaluates content by analyzing its alignment with {product_short}. \
            You should also consider factors such as keyword density, context \
            accuracy, and user intent. You identifies content that effectively \
            meets audience expectations, flags irrelevant material, and \
            provides insights to enhance content targeting."
goal = "Ensure that content is highly pertinent and aligned with the intended topics and audience needs"
relevance_analysis_agent = Agent(
                            role="Relevance Analyst",
                            goal=goal,
                            backstory=backstory,
                            allow_delegation=False,
                            verbose=True,
                            llm=llama31_8b,
                        )

In [158]:
backstory = "Responsible for synthesizing and evaluating the combined outputs from the \
                Content Analysis, Engagement Analysis, and Relevance Analysis agents. You \
                ensures all aspects of content—quality, engagement, and relevance—are \
                harmonized and aligns with the marketing of {product_short}."
goal = "Ensure content is relevant, engaging, and strategically aligned to marketing of product"
content_review_agent = Agent(
                            role="Content Review Supervisor",
                            goal=goal,
                            backstory=backstory,
                            allow_delegation=False,
                            verbose=True,
                            llm=gemma2_9b,
                        )

In [159]:
class ScoreOutput(BaseModel):
    comment_id: str
    score: float
    justification: str

In [160]:
tip_text = "If you do your BEST WORK, I'll give you a $10,000 commission!"

In [224]:
output_format_1 = "[{'comment_id': 'k31npnk', 'relevance_score': 10, 'justification': '''Perfect fit for marketing CasaAI's product'''}, \
                  {'comment_id': 'm23npns', 'relevance_score': 5, 'justification': '''Talking about gardening'''},....\
                 ]"

In [281]:
descr = "Analyze posts and associated comments from {input_data} to determine their relevance based on \
identified keywords and phrases w.r.to the marketing of {product_long}. Then provide a score \
out of 10 for each post and comment and provide a justification for each score. For example, if there are 118 comments in input \
then there needs to scores & justification for each of the 118 comments. {tip_text}"

expected_out = "{output_format} that has comment_id, relevance score, and a brief justification (less than 15 words) starting \
                in triple quotes and ending in triple quotes explaining the rationale behind the score for each post and its \
                associated comments." 
        
content_analysis_task = Task(
                            description=descr,
                            expected_output=expected_out,
                            agent=content_analysis_agent,
                            )

In [282]:
descr = "Evaluating the level of user interaction with the provided content from {input_data}. This includes\
analyzing metrics such as likes, shares, comments, and views to calculate an overall engagement score out of 10. \
For example, if there are 118 comments in input \
then there needs to scores & justification for each of the 118 comments. {tip_text}"       

expected_out = "{output_format} that has comment_id, relevance score, and a brief justification (less than 15 words) starting \
                in triple quotes and ending in triple quotes explaining the rationale behind the score for each post and its \
                associated comments." 

engagement_analysis_task =  Task(
                                description=descr,
                                expected_output=expected_out,
                                agent=engagement_analysis_agent,
                                )

In [283]:
descr = "Assess how well the content in {input_data} aligns with {product_long}. The goal is to assign a \
relevance score out of 10 that reflects the content’s pertinence to its intended audience and its  \
alignment with the product that is marketed. For example, if there are 118 comments in input \
then there needs to scores & justification for each of the 118 comments. {tip_text}"        

expected_out = "{output_format} that has comment_id, relevance score, and a brief justification (less than 15 words) starting \
                in triple quotes and ending in triple quotes explaining the rationale behind the score for each post and its \
                associated comments."
        
relevance_analysis_task = Task(
                                description=descr,
                                expected_output=expected_out,
                                agent=relevance_analysis_agent,
                                )

In [284]:
descr = "You will get outputs from Content Analysis, Engagement Analysis, and Relevance Analysis agents. \
You will also get content from {input_data}. You will review the outputs from these agents and the content \
and provide a final score for each post and comment based on the relevance to the marketing of \
{product_long}. For example, if there are 118 comments in input \
then there needs to scores & justification for each of the 118 comments. {tip_text}"        
       
expected_out = "{output_format} that has comment_id, relevance score, and a brief justification (less than 15 words) starting \
                in triple quotes and ending in triple quotes explaining the rationale behind the score for each post and its \
                associated comments." 
        
final_scoring_task = Task(
            description=descr,
            expected_output=expected_out,
            agent=content_review_agent,
        )  

In [203]:
# backstory = "You will be given a python string. You should convert it to a JSON format."
# goal = "Convert the string which is a list of python dictionaries to json format"
# json_convert_agent = Agent(
#                             role="Json converted",
#                             goal=goal,
#                             backstory=backstory,
#                             allow_delegation=False,
#                             verbose=True,
#                             llm=llama31_8b,
#                         )

In [181]:
# descr = "Input is a python string of format - {output_format}. This is a string of python list that holds python \
# dictionaries whose keys are comment_id, relevance score, and justification. You need to convert this to a json \
# format. Your input is {input_data_2}. {tip_text}."     
# expected_out = "Json that has comment_id, relevance score, and justification as keys"         
# json_convert_task = Task(
#             description=descr,
#             expected_output=expected_out,
#             output_json=ScoreOutput,
#             agent=json_convert_agent,
#         )  

In [285]:
response_creation_crew = Crew(
    agents=[content_analysis_agent, engagement_analysis_agent, relevance_analysis_agent,
            content_review_agent],
    tasks=[content_analysis_task, engagement_analysis_task, relevance_analysis_task,
            final_scoring_task],
    verbose=True,
)



In [218]:
# json_creation_crew = Crew(
#     agents=[json_convert_agent,],
#     tasks=[json_convert_task,],
#     verbose=True,
# )

In [152]:
# response_creation_crew = Crew(
#     agents=[content_analysis_agent, engagement_analysis_agent, relevance_analysis_agent,],
#     tasks=[content_analysis_task, engagement_analysis_task, relevance_analysis_task,],
#     verbose=True,
#     manager_llm=gemma2_9b,
#     process=Process.hierarchical,
# )

In [87]:
reddit_posts, reddit_post_ids = fetch_reddit()
condensed_reddit_data, unique_post_ids, unique_comment_ids = condense_data(reddit_posts, reddit_post_ids)

post_cnt: 24 + 688 = 712
comm_cnt: 0 + 16440 = 16440
cond_cnt: 17128 = 688 + 16440
Cross_ck: 688 = 688


In [95]:
reddit_posts.keys()

dict_keys(['HomeImprovement', 'homeimprovementideas', 'homeimprovement2', 'Sprint', 'InteriorDesignHacks', 'homedecoratingCJ', 'HomeImprovementsitcom', 'AmateurInteriorDesign', 'vr_design', 'homedecorideas', 'DecorHomeIdeas', 'HomeDecorGalore', 'tycoon', 'interiordesignideas', 'InteriorDesignAdvice', 'HomeServices', 'HotwifeXXXCaptions', 'HomeDecorSolution', 'ModernEuroDesign', 'aweism', 'DesignJobs', 'HomeImprovementGuides', 'HomeImprovementSales', 'DesignMyRoom', 'Home', 'InteriorDesignMasters', 'HomeDecorating', 'hireavirtualassistant', 'HomeDecoratingUK', 'HomeImprovement2LTime', 'vr_ar_ux_design', 'Lululemen', 'homedecordeals', 'ScandinavianInterior', 'interiordecorating', 'rupaulsdragrace', 'xxxcaptions', 'HomeDecoratingTips', 'Projectmakeover', 'InteriorDesign', 'HomeImprovementUK', 'realtime_rendering', 'InteriorDesignCanada', 'OnlinePersonalTrainer', 'HomeDecorInspiration', 'VRGaming', 'vfx'])

In [286]:
print(reddit_posts['InteriorDesign'].keys())
print(len(reddit_posts['InteriorDesign']['Room Style Transformation']))
print(len(condensed_reddit_data))
condensed_reddit_data_ltd = condensed_reddit_data[:2]
print(len(condensed_reddit_data_ltd))

dict_keys(['Interior Design Automation', 'Virtual Room Redesign', 'AI Home Decor Suggestions', 'Furniture Arrangement AI', 'Room Style Transformation'])
10
688
2


In [183]:
# input_dict = {"input_data": condensed_reddit_data_ltd,
#               "product_long": product_long,
#               "product_short": product_short,
#               "tip_text":tip_text,
#               "output_format":output_format}

In [287]:
score_result_lst = []
for idx, data in enumerate(condensed_reddit_data_ltd): 
    print(idx, 'STARTED')
    input_dict = {"input_data": data,
                  "product_long": product_long,
                  "product_short": product_short,
                  "tip_text":tip_text,
                  "output_format":output_format}    
    scoring_result = response_creation_crew.kickoff(inputs=input_dict)
    txt = scoring_result.raw
    lst = txt.split("},")
    item_lst = []
    print(idx, 'WRITING TO DICT')
    for item in lst:
        item_dict = {}
        comment_id, score, justification = re_change(item)
        item_dict['comment_id'] = str(comment_id)
        item_dict['score'] = int(score)
        item_dict['justification'] = str(justification)
        item_lst.append(item_dict)
    print(idx, 'APPENDING')
    score_result_lst.append(item_lst)

0 STARTED
[1m[95m [2024-08-13 16:23:38][DEBUG]: == Working Agent: Content Analyst[00m
[1m[95m [2024-08-13 16:23:38][INFO]: == Starting Task: Analyze posts and associated comments from [{'comment_id': 'ou1u90', 'parent_id': None, 'text': 'Is it worth it to get an interior designer?', 'author': 'Treatie915', 'score': 163}, {'comment_id': 'h6zwnlg', 'parent_id': 'ou1u90', 'text': "I married one!  \n\nBe sure to know the difference between interior decorator and interior designer. They are two very different skill sets, although many decorators call themselves designers, but don't actually do the design part. \n\nA designer is often closer to an engineer. They can do space planning, tell you where power outlets and switches should be in order to be up to code. They can tell you the best lighting fixture placements and types, etc. \n\nMy Wife (the interior designer) says they create/design a functional space based on the needs of the people in the space. \n\nBut if you want to dress up

In [288]:
len(score_result_lst), len(score_result_lst[0]), len(score_result_lst[1])

(2, 1, 10)

In [289]:
len(condensed_reddit_data_ltd), len(condensed_reddit_data_ltd[0]), len(condensed_reddit_data_ltd[1])

(2, 118, 1)

In [290]:
score_result_lst[0]

[{'comment_id': 'na', 'score': 0, 'justification': 'na'}]

In [291]:
score_result_lst[1]

[{'comment_id': '16mf4n0',
  'score': 8,
  'justification': "Asking for interior design help which aligns with CasaAI's product features and benefits"},
 {'comment_id': 'k31npnk',
  'score': 2,
  'justification': "Talking about gardening which is unrelated to CasaAI's product features and benefits"},
 {'comment_id': 'm23npns', 'score': 3, 'justification': 'na'},
 {'comment_id': 'poiu78',
  'score': 6,
  'justification': "Talking about home decor which is somewhat related to CasaAI's product features and benefits'}] , {'comment_id': 'jklp68', 'relevance_score': 9, 'justification': "},
 {'comment_id': 'nm2xza58',
  'score': 8,
  'justification': "Talking about using CasaAI's product for a home makeover project"},
 {'comment_id': 'cvbn64',
  'score': 5,
  'justification': "Talking about using CasaAI's product for a home staging project but lacks specific details'}] , {'comment_id': 'poiuyb52', 'relevance_score': 8, 'justification': "},
 {'comment_id': 'asdfg46',
  'score': 9,
  'justifica