In [1]:
import os
import re
import yaml
import json
import ast
import praw
from crewai import Agent, Task, Crew, Process
from langchain_openai import ChatOpenAI

from textwrap import dedent
from reddit_helper import *    

from langchain.llms import OpenAI, Ollama
from langchain_openai import ChatOpenAI

from pydantic import BaseModel

* 'allow_population_by_field_name' has been renamed to 'populate_by_name'
* 'smart_union' has been removed


In [2]:
api_file_path = 'api_keys.json'
with open(api_file_path, 'r') as file:
    api_keys = json.load(file)        
openai_gpt35 = ChatOpenAI(model_name="gpt-3.5-turbo", 
                                temperature=0.7,
                                openai_api_key=api_keys['openai'])
openai_gpt4 = ChatOpenAI(model_name="gpt-4", 
                                temperature=0.7,
                                openai_api_key=api_keys['openai'])        
llama31_8b = Ollama(model='llama3.1:8b',)
gemma2_9b = Ollama(model='gemma2:9b',)

In [3]:
cfg_file_path = 'casaai_config.yaml'
with open(cfg_file_path, 'r') as yaml_file:
    cfg = yaml.safe_load(yaml_file)
product_long = cfg.get('product_long_description', '')      
product_short = cfg.get('product_short_description', '')

In [28]:
backstory = "You are a content analyst with expertise in analyzing web content and \
             extracting relevant information. \
             - You ensures that content is relevant, high-quality, and aligned with the marketing of \
             {product_short}. \
             - You evaluate content by analyzing user interactions, such as likes, \
             shares, comments, and views. \
             - You should consider factors such as user behavior/sentiment. \
             - Also consider factors such as keyword density, context accuracy, and user intent. \
             - You identifies content that effectively meets audience expectations, flags irrelevant material, and \
            provides insights to enhance content targeting."
goal = "Analyze web content and extract relevant information, Accurately assess the impact and effectiveness of \
        content based on user interactions, Ensure that content is highly pertinent and aligned with the intended \
        topics and audience needs"
content_analysis_agent = Agent(
                            role="Content Analyst",
                            goal=goal,
                            backstory=backstory,
                            allow_delegation=False,
                            verbose=False,
                            llm=llama31_8b,
                            )

In [21]:
# backstory = "Responsible for synthesizing and evaluating the outputs from the content Analysis agents. You ensures \
#              all aspects of content‚Äîquality, engagement, and relevance‚Äîare harmonized and aligns with the marketing of {product_short}."
# goal = "Ensure content is relevant, engaging, and strategically aligned to marketing of product"
# content_score_review_agent = Agent(
#                             role="Content Score Reviewer",
#                             goal=goal,
#                             backstory=backstory,
#                             allow_delegation=True,
#                             verbose=False,
#                             llm=gemma2_9b,
#                         )

In [29]:
class ScoreOutput(BaseModel):
    comment_id: str
    score: float
    justification: str

In [30]:
tip_text = "If you do your BEST WORK, I'll give you a $10,000 commission!"

In [31]:
descr = "{comment_id} is the id of the comment that you need to provide score and justification. You will also be \
provided data belonging to this comment and also its parent comments. You will get these data in {input_data}. You should: \
- Analyze this and determine relevance of {comment_id} based on identified keywords and phrases w.r.to the marketing of {product_long}. \
- Evaluate the level of user interaction in {comment_id} by analyzing the provided content. This includes analyzing metrics such as \
likes, shares, comments, and views.\
- Assess how well the {comment_id} aligns with product by analyzing the provided content.\
- Finally assign a score out of 10 that reflects the content‚Äôs pertinence to its intended audience and its  \
alignment with the product that is marketed. {tip_text}"

expected_out = "{comment_id}, its score and a brief justification (less than 15 words) \
                explaining the rationale behind the score" 
        
content_analysis_task = Task(
                            description=descr,
                            expected_output=expected_out,
                            output_json=ScoreOutput,
                            agent=content_analysis_agent,
                            )

In [27]:
# descr = "{comment_id} is the id of the comment that you need to review the score and justification provided by content analysis agent. \
# You will get outputs from Content Analysis agents for {comment_id}. You will also be \
# provided data belonging to this comment and also its parent comments. You will get these data in {input_data}. You will \
# review all these content based on the relevance to the marketing of {product_long}. Your review will be shared back to content \
# analysis agent for improvement. {tip_text}"        
       
# expected_out = "{comment_id}, its relevance score and a brief justification (less than 15 words) \
#                 explaining the rationale behind the score" 
        
# content_score_review_task = Task(
#             description=descr,
#             expected_output=expected_out,
#             output_json=ScoreOutput, 
#             agent=content_score_review_agent,
#         )  

In [32]:
response_creation_crew = Crew(
    agents=[content_analysis_agent,],
    tasks=[content_analysis_task,],
    verbose=False,
)



In [12]:
reddit_posts, reddit_post_ids = fetch_reddit_test()
condensed_reddit_data, unique_post_ids, unique_comment_ids = condense_data(reddit_posts, reddit_post_ids)

post_cnt: 0 + 10 = 10
comm_cnt: 0 + 560 = 560
cond_cnt: 570 = 10 + 560
Cross_ck: 10 = 10


In [13]:
comment_dict = {}
tot_data = 0
for idx1, item in enumerate(condensed_reddit_data):
    for idx2, data in enumerate(item):
        tot_data += 1
        c_id = data['comment_id']
        p_id = data['parent_id']
        text = data['text']
        comment_dict[c_id] = {'comment_id': c_id, 'parent_id': p_id, 'text': text}
    print(idx1, ':', idx2)
print(len(comment_dict), '=', tot_data)

0 : 169
1 : 108
2 : 31
3 : 57
4 : 0
5 : 33
6 : 67
7 : 60
8 : 1
9 : 34
570 = 570


In [14]:
reddit_posts.keys()

dict_keys(['InteriorDesign'])

In [15]:
print(reddit_posts['InteriorDesign'].keys())
print(len(reddit_posts['InteriorDesign']['Room Style Transformation']))
print(len(condensed_reddit_data))
# condensed_reddit_data_ltd = condensed_reddit_data[-2:]
# print(len(condensed_reddit_data_ltd), len(condensed_reddit_data_ltd[0]), len(condensed_reddit_data_ltd[1]))

dict_keys(['Room Style Transformation'])
10
10


In [16]:
def get_data_details(comm_id):
    done = 0
    data_details = []
    while done==0:
        if comm_id in comment_dict:
            details = comment_dict[comm_id]
            data_details.append(details)
            comm_id = details['parent_id']
        else:
            done = 1
    return data_details

In [33]:
score_result_lst = []
for idx, reddit_data_item in enumerate(condensed_reddit_data):
    comment_lst = []
    for idx2, comment_data in enumerate(reddit_data_item):        
        comm_id = comment_data['comment_id']        
        print(f'STARTING {idx} - {idx2} - {comm_id}')
        data_details = get_data_details(comm_id)      
        input_dict = {"comment_id": comm_id,
                      "input_data": data_details,
                      "product_long": product_long,
                      "product_short": product_short,
                      "tip_text":tip_text}
        scoring_result = response_creation_crew.kickoff(inputs=input_dict)
        try:
            json_out = ast.literal_eval(scoring_result.json)
        except:
            print('Exception in json')
            json_out = {'comm_id': comm_id, 'score': 0.0, 'justification': 'NA'}
        comment_lst.append(json_out)
    score_result_lst.append(comment_lst)

STARTING 0 - 0 - kgesxn
[93m Error parsing JSON: Expecting value: line 1 column 1 (char 0). Attempting to handle partial JSON.[00m
STARTING 0 - 1 - ggfdsvg
[93m Error parsing JSON: Expecting value: line 1 column 1 (char 0). Attempting to handle partial JSON.[00m
STARTING 0 - 2 - ggfh2zn
[93m Error parsing JSON: Expecting value: line 1 column 1 (char 0). Attempting to handle partial JSON.[00m
STARTING 0 - 3 - ggepn0p
[93m Error parsing JSON: Expecting value: line 1 column 1 (char 0). Attempting to handle partial JSON.[00m
[93m Pydantic validation error: 1 validation error for ScoreOutput
  Invalid JSON: key must be a string at line 1 column 2 [type=json_invalid, input_value="{'comment_id': 'ggepn0p'...am @houseroundthebend'}", input_type=str]
    For further information visit https://errors.pydantic.dev/2.8/v/json_invalid. The JSON structure doesn't match the expected model. Attempting alternative conversion method.[00m
STARTING 0 - 4 - ggfd5lx
[93m Error parsing JSON: Expect

In [35]:
len(score_result_lst), len(score_result_lst[0])

(10, 170)

In [37]:
score_result_lst[-1][:4]

[{'comment_id': 'mmsi20',
  'score': 7.0,
  'justification': "Relevant content with some design insight, but lacks direct alignment with CasaAI's transformative capabilities."},
 {'comment_id': 'gttgbsf',
  'score': 6.0,
  'justification': 'Somewhat relevant to interior design, but unrelated to CasaAI marketing goals.'},
 {'comment_id': 'gttana8',
  'score': 8.0,
  'justification': "Relevant content, but tone and language not fully aligned with CasaAI's tone."},
 {'comment_id': 'gtt853k',
  'score': 6.0,
  'justification': "The link provided in gtt853k is relevant to interior design and home transformation, but lacks specific mention of CasaAI's advanced AI capabilities, which are the core selling points of the product."}]

In [38]:
with open('score_result_0819.json', 'w') as json_file:
    json.dump(score_result_lst, json_file, indent=4)  # indent=4 is optional but makes the file more readable
print("Data saved to output.json")

Data saved to output.json


In [40]:
with open('score_result_0819.json', 'r') as json_file:
    score_result_loaded_back = json.load(json_file)

print("Loaded data:", len(score_result_loaded_back), len(score_result_loaded_back[0]))

Loaded data: 10 170


In [41]:
score_result_loaded_back[-1][:4]

[{'comment_id': 'mmsi20',
  'score': 7.0,
  'justification': "Relevant content with some design insight, but lacks direct alignment with CasaAI's transformative capabilities."},
 {'comment_id': 'gttgbsf',
  'score': 6.0,
  'justification': 'Somewhat relevant to interior design, but unrelated to CasaAI marketing goals.'},
 {'comment_id': 'gttana8',
  'score': 8.0,
  'justification': "Relevant content, but tone and language not fully aligned with CasaAI's tone."},
 {'comment_id': 'gtt853k',
  'score': 6.0,
  'justification': "The link provided in gtt853k is relevant to interior design and home transformation, but lacks specific mention of CasaAI's advanced AI capabilities, which are the core selling points of the product."}]

In [34]:
# For each post-comment combo select the post-comments whose score >= 7

In [64]:
score_result_lst_gt_thres = []
tot = 0
incl = 0
excl = 0
thresh = 9
for post_lst in score_result_lst:
    comment_lst = []
    for comment in post_lst:
        tot += 1
        if comment['score'] >= thresh:
            comment_lst.append(comment)
            incl += 1
        else:
            excl += 1
    score_result_lst_gt_thres.append(comment_lst)
print(tot, '=', incl, '+', excl)

570 = 5 + 565


In [65]:
len(score_result_lst_gt_thres), len(score_result_lst_gt_thres[3]), score_result_lst_gt_thres[3][-1]

(10,
 1,
 {'comment_id': 'fxy5ec1',
  'score': 9.0,
  'justification': "Comment expresses appreciation for CasaAI's transformative capabilities, aligning with marketing goals."})

In [66]:
comment_dict['fxy5ec1']

{'comment_id': 'fxy5ec1',
 'parent_id': 'fxy4kp5',
 'text': 'Thanks! Much appreciated.'}

In [67]:
check_lst = ['fxy5ec1']

In [68]:
for idx1, item in enumerate(condensed_reddit_data):
    for idx2, data in enumerate(item):
        if data['comment_id'] in check_lst:
            print(data['text'])

Thanks! Much appreciated.


In [None]:
# Then select a threshold like 20% or top 3 whichever is bigger of comments to craft response

In [None]:
# Build response for these comments With Manager LLM + only 1 response writer + meta reviewer