In [1]:
import os
import re
import yaml
import json
import ast
import praw
from crewai import Agent, Task, Crew, Process
from langchain_openai import ChatOpenAI

from textwrap import dedent
from reddit_helper import *    

from langchain.llms import OpenAI, Ollama
from langchain_openai import ChatOpenAI

from pydantic import BaseModel
from datetime import datetime, timedelta

* 'allow_population_by_field_name' has been renamed to 'populate_by_name'
* 'smart_union' has been removed


In [2]:
api_file_path = 'api_keys.json'
with open(api_file_path, 'r') as file:
    api_keys = json.load(file)        
openai_gpt35 = ChatOpenAI(model_name="gpt-3.5-turbo", 
                                temperature=0.7,
                                openai_api_key=api_keys['openai'])
openai_gpt4 = ChatOpenAI(model_name="gpt-4", 
                                temperature=0.7,
                                openai_api_key=api_keys['openai'])        
llama31_8b = Ollama(model='llama3.1:8b',)
gemma2_9b = Ollama(model='gemma2:9b',)

In [76]:
cfg_file_path = 'casaai_config.yaml'
agent_cfg_path = 'agents.yaml'
task_cfg_path = 'tasks.yaml'

with open(cfg_file_path, 'r') as yaml_file:
    cfg = yaml.safe_load(yaml_file)
product_long = cfg.get('product_long_description', '')      
product_short = cfg.get('product_short_description', '')
product_name = cfg.get('product_short_description', '')
product_url = cfg.get('product_url', '')
domain = cfg.get('domain')
broad_keywords = cfg.get('broad_keywords', [])
output_format_1 = cfg.get('output_format_1')

with open(agent_cfg_path, 'r') as yaml_file:
    agent_cfg_data = yaml.safe_load(yaml_file)

with open(task_cfg_path, 'r') as yaml_file:
    task_cfg_data = yaml.safe_load(yaml_file)

In [4]:
tip_text = "If you do your BEST WORK, I'll give you a $100 commission!"

In [5]:
def re_decision_string(input_string):

    pattern = r"'comment_id':\s*'([^']*)',\s*'decision':\s*(\d+)"

    # Use re.findall to extract the values
    matches = re.findall(pattern, input_string)

    if matches:
        comment_id, decision = matches[0]
    else:
        comment_id = "na"
        decision = "discard"

    if "justification" in input_string:
        justification = input_string.split("justification")[-1]
    else:
        justification = "Not available"
    
    return comment_id, decision, justification

In [6]:
def re_score_string(input_string):

    pattern = r"'comment_id':\s*'([^']*)',\s*'score':\s*(\d+)"

    # Use re.findall to extract the values
    matches = re.findall(pattern, input_string)

    if matches:
        comment_id, score = matches[0]
    else:
        comment_id = "na"
        score = 0

    if "justification" in input_string:
        justification = input_string.split("justification")[-1]
    else:
        justification = "Not available"
    
    return comment_id, score, justification

In [71]:
def re_response_string(input_string):

    if "responz" in input_string:
        response = input_string.split("responz")[-1]
    else:
        response = "Not available"
    
    return response

In [7]:
def get_calendar_date(unix_timestamp):
    date_obj = datetime.utcfromtimestamp(unix_timestamp)
    readable_date = date_obj.strftime('%Y-%m-%d %H:%M:%S')
    return readable_date

In [24]:
def prep_decision_output(decision_lst, old_comments):
    decision_analysis_lst = []
    decision_bad_list = []
    tot_comments = 0
    bad_cnt = 0
    decision_relevant_json_list = []
    decision_discarded_json_list = []
    for item in decision_lst:  
        sep = "^"
        for comment in item:
            tot_comments += 1
            try:
                comm_key = list(comment.keys())[0]
                comm_id  = comment[comm_key].strip()
                if comm_id in comment_dict_list:                 
                    parent_id = comment_dict[comm_id]["parent_id"]
                    if parent_id is None:
                        parent_id = 'NA'
                    decision = comment['decision']
                    justify  = comment["justification"]
                    comment_text = comment_dict[comm_id]["text"]
                    text_str = sep + comm_id + sep + parent_id + sep + decision + sep + justify + sep + comment_text + sep
                    if comment['decision'] == 'relevant':
                        decision_relevant_json_list.append(comment)
                    else:
                        decision_discarded_json_list.append(comment)
                    decision_analysis_lst.append(text_str)            
                else: 
                    print(f'Comment ID not found : {comm_id}')
                    bad_cnt += 1
                    decision_bad_list.append(comm_id)
            except:
                print(f'comment key error : {comment}')
                bad_cnt += 1
    print(f'relevant : {len(decision_relevant_json_list)} discarded : {len(decision_discarded_json_list)} bad_cnt: {bad_cnt} total: {tot_comments}')
    
    old_comment_list = []
    for comment in old_comments:  
        sep = "^"
        comm_id  = comment["comment_id"]
        parent_id = comment["parent_id"]
        if parent_id is None:
            parent_id = 'NA'        
        comment_text  = comment["text"]
        created_date = get_calendar_date(comment['created_utc'])
        age = str(comment["age"])
        text_str = sep + comm_id + sep + parent_id + sep + age + sep + created_date + sep + comment_text + sep
        old_comment_list.append(text_str)      
    print(f'Old comments already filtered out : {len(old_comments)}')
    
    output = (decision_relevant_json_list, decision_discarded_json_list, decision_analysis_lst, old_comment_list)
    return output

In [9]:
def prep_score_output(score_result_lst):
    analysis_lst = []
    bad_list = []
    tot_comments = 0
    bad_cnt = 0
    good_json_list = []
    sep = "^"
    for comment in score_result_lst:
        tot_comments += 1
        try:
            comm_key = list(comment.keys())[0]
            comm_id  = comment[comm_key].strip()
            
            if comm_id in comment_dict_list:  
                parent_id = comment_dict[comm_id]["parent_id"]
                if parent_id is None:
                    parent_id = 'NA'
                score = comment['score']
                age = comment_dict[comm_id]["age"]
                created = comment_dict[comm_id]["created"]
                justify  = comment["justification"]
                comment_text = comment_dict[comm_id]["text"]
                text_str = sep + comm_id + sep + parent_id + sep + str(score) + sep + str(age) + sep + created + sep + justify \
                            + sep + comment_text + sep
                analysis_lst.append(text_str) 
                good_json_list.append(comm_id)
            else: 
                print(f'Comment ID not found : {comm_id}')
                bad_cnt += 1
                bad_list.append(comm_id)
        except:
            print(f'comment key error : {comment}')
            bad_cnt += 1
    print(f'relevant : {len(good_json_list)} bad_cnt: {bad_cnt} total: {tot_comments}')
    output = (good_json_list, bad_list, analysis_lst)
    return output

In [10]:
backstory = agent_cfg_data['content_filter_analyst']['backstory'] 
goal = agent_cfg_data['content_filter_analyst']['goal']
role = agent_cfg_data['content_filter_analyst']['role'] 
content_filter_analyst = Agent(
                            role=role,
                            goal=goal,
                            backstory=backstory,
                            allow_delegation=False,
                            verbose=False,
                            llm=gemma2_9b,
                            )

In [11]:
class DecisionOutput(BaseModel):
    comment_id: str
    decision: str
    justification: str

In [12]:
description = task_cfg_data['content_filter_task']['description']
expected_out = task_cfg_data['content_filter_task']['expected_out'] 
        
content_filter_task = Task(
                            description=description,
                            expected_output=expected_out,
                            output_json=DecisionOutput,
                            agent=content_filter_analyst,
                            )

In [13]:
response_creation_crew = Crew(
    agents=[content_filter_analyst,],
    tasks=[content_filter_task,],
    verbose=False,
)

In [14]:
reddit_posts, reddit_post_ids = fetch_reddit_test()
condensed_reddit_data, unique_post_ids, unique_comment_ids = condense_data(reddit_posts, reddit_post_ids)

post_cnt: 0 + 20 = 20
comm_cnt: 0 + 696 = 696
cond_cnt: 716 = 20 + 696
Cross_ck: 20 = 20


In [15]:
comment_dict = {}
tot_data = 0
for idx1, item in enumerate(condensed_reddit_data):
    for idx2, data in enumerate(item):
        tot_data += 1
        c_id = data['comment_id']
        p_id = data['parent_id']
        text = data['text']
        created_date = get_calendar_date(data['created_utc'])
        age = data['age']
        comment_dict[c_id] = {'comment_id': c_id, 'parent_id': p_id, 'text': text, 'age': age, 'created': created_date}
    print(idx1, ':', idx2)
comment_dict_list = list(comment_dict.keys())
print(len(comment_dict), '=', tot_data, len(comment_dict_list))

0 : 169
1 : 108
2 : 31
3 : 57
4 : 0
5 : 33
6 : 60
7 : 67
8 : 1
9 : 34
10 : 0
11 : 79
12 : 1
13 : 0
14 : 7
15 : 2
16 : 0
17 : 9
18 : 34
19 : 4
716 = 716 716


In [16]:
def get_data_details(comm_id):
    done = 0
    data_details = []
    while done==0:
        if comm_id in comment_dict:
            details = comment_dict[comm_id]
            data_details.append(details)
            comm_id = details['parent_id']
        else:
            done = 1
    return data_details

In [None]:
decision_lst = []
old_comments = []
age_limit = 3
for idx, reddit_data_item in enumerate(condensed_reddit_data):
    comment_lst = []
    for idx2, comment_data in enumerate(reddit_data_item):
        if comment_data['age'] < age_limit:
            comm_id = comment_data['comment_id']  
            comment_text = comment_data['text']
            print(f'STARTING {idx} - {idx2} - {comm_id}')   
            input_dict = {"comment_id": comm_id,
                          "input_data": comment_text,
                          "product_long": product_long,
                          "product_short": product_short,
                          "domain": domain,
                          "broad_keywords": broad_keywords,
                          "output_format":output_format_1, 
                          "tip_text":tip_text}
            decision_result = response_creation_crew.kickoff(inputs=input_dict)
            try:
                json_out = ast.literal_eval(decision_result.json)
            except:
                print('Exception in json - trying re')
                _, d, j = re_decision_string(decision_result.raw)            
                json_out = {'comm_id': comm_id, 'decision': str(d), 'justification': str(j)}
            comment_lst.append(json_out)
        else:
            old_comments.append(comment_data)
    decision_lst.append(comment_lst)

In [25]:
decision_out = prep_decision_output(decision_lst, old_comments)

Comment ID not found : kq75xk
Comment ID not found : krct1qh
Comment ID not found : unknown
Comment ID not found : kq6gat
Comment ID not found : krcylty
Comment ID not found : 0
Comment ID not found : iyv709
relevant : 75 discarded : 114 bad_cnt: 7 total: 196
Old comments already filtered out : 520


In [26]:
for item in decision_out:
    print(len(item))
content_filter_relevant, content_filter_discard, content_filter_analysis, content_filter_old = decision_out
content_filter_relevant[0]

75
114
189
520


{'comment_id': 'ku4kjo6',
 'decision': 'relevant',
 'justification': 'Comment seeks design inspiration and real-world kitchen usage.'}

In [27]:
with open('decision_result_v6_0902A.txt', "w") as file:
    # Iterate through the list and write each string to the file
    for item in content_filter_analysis:
        file.write(item + "\n")  # Add a newline character after each string
with open('decision_old_v6_0902A.txt', "w") as file:
    # Iterate through the list and write each string to the file
    for item in content_filter_old:
        file.write(item + "\n")  # Add a newline character after each string

print(f"Files has been written")

with open('decision_result_v6_0902A.txt', "r") as file:
    # Read all lines and strip the newline character from each line
    analysis_lst_loaded_back = [line.strip() for line in file.readlines()]
print(f"Files Reloaded")

Files has been written
Files Reloaded


In [28]:
backstory = agent_cfg_data['content_scoring_analyst']['backstory'] 
goal = agent_cfg_data['content_scoring_analyst']['goal']
role = agent_cfg_data['content_scoring_analyst']['role'] 
content_scoring_analyst = Agent(
                            role=role,
                            goal=goal,
                            backstory=backstory,
                            allow_delegation=False,
                            verbose=False,
                            llm=gemma2_9b,
                            )

In [29]:
class ScoreOutput(BaseModel):
    comment_id: str
    score: float
    justification: str

In [30]:
description = task_cfg_data['content_scoring_task']['description']
expected_out = task_cfg_data['content_scoring_task']['expected_out'] 
        
content_scoring_task = Task(
                            description=description,
                            expected_output=expected_out,
                            output_json=ScoreOutput,
                            agent=content_filter_analyst,
                            )

In [31]:
score_creation_crew = Crew(
    agents=[content_scoring_analyst,],
    tasks=[content_scoring_task,],
    verbose=False,
)



In [None]:
score_result_lst = []
for idx, comment_data in enumerate(content_filter_relevant):     
    comm_id = comment_data['comment_id']        
    print(f'STARTING {idx} - {comm_id}')
    data_details = get_data_details(comm_id)      
    input_dict = {"comment_id": comm_id,
                  "input_data": data_details,
                  "product_long": product_long,
                  "product_short": product_short,
                  "domain": domain,
                  "product_name": product_name,
                  "tip_text":tip_text}
    scoring_result = score_creation_crew.kickoff(inputs=input_dict)
    try:
        json_out = ast.literal_eval(scoring_result.json)
    except:
        print('Exception in json - trying re')
        _, s, j = re_score_string(scoring_result.raw)            
        json_out = {'comm_id': comm_id, 'score': float(s), 'justification': str(j)}            
    score_result_lst.append(json_out)

In [34]:
score_result_lst[0], len(score_result_lst)

({'comment_id': 'ku4kjo6',
  'score': 8.2,
  'justification': "Expresses desire for a functional kitchen aligning with CasaAI's capabilities."},
 75)

In [35]:
thresh_list = [7.0, 7.5, 8.0, 8.5, 9.0, 9.5]

for thresh in thresh_list:
    thresh_up = 0
    for item in score_result_lst:
        if item['score'] >= thresh:
            thresh_up += 1
    print(thresh, ':', thresh_up, '/', len(score_result_lst))

7.0 : 42 / 75
7.5 : 35 / 75
8.0 : 27 / 75
8.5 : 18 / 75
9.0 : 5 / 75
9.5 : 2 / 75


In [36]:
score_out = prep_score_output(score_result_lst)
content_score_good, content_score_bad, content_score_analysis = score_out
print(len(content_score_good), len(content_score_analysis))

relevant : 75 bad_cnt: 0 total: 75
75 75


In [37]:
with open('score_result_v6_0902A.txt', "w") as file:
    # Iterate through the list and write each string to the file
    for item in content_score_analysis:
        file.write(item + "\n")  # Add a newline character after each string

print(f"File has been written")

with open('score_result_v6_0902A.txt', "r") as file:
    # Read all lines and strip the newline character from each line
    analysis_lst_loaded_back = [line.strip() for line in file.readlines()]
print(len(analysis_lst_loaded_back))

File has been written
124


In [38]:
# For each post-comment combo select the post-comments whose score >= 7
filter_thresh = 7.0
filtered_comment_ids = []
for item in score_result_lst:
    if item['score'] >= filter_thresh:
        filtered_comment_ids.append(item['comment_id'])
print(f'len(filtered_comment_ids) : {len(filtered_comment_ids)}')

len(filtered_comment_ids) : 42


In [None]:
# Build response for these comments With Manager LLM + only 1 response writer + meta reviewer

In [77]:
backstory = agent_cfg_data['response_writer']['backstory'] 
goal = agent_cfg_data['response_writer']['goal']
role = agent_cfg_data['response_writer']['role'] 
response_writer = Agent(
                        role=role,
                        goal=goal,
                        backstory=backstory,
                        allow_delegation=False,
                        verbose=True,
                        llm=gemma2_9b,
                        )

In [79]:
backstory = agent_cfg_data['response_reviewer']['backstory'] 
goal = agent_cfg_data['response_reviewer']['goal']
role = agent_cfg_data['response_reviewer']['role'] 
response_reviewer = Agent(
                        role=role,
                        goal=goal,
                        backstory=backstory,
                        allow_delegation=True,
                        verbose=True,
                        llm=gemma2_9b,
                        )

In [80]:
class ResponseOutput(BaseModel):
    responz: str

In [81]:
description = task_cfg_data['response_writing_task']['description']
expected_out = task_cfg_data['response_writing_task']['expected_out'] 
        
response_writing_task = Task(
                            description=description,
                            expected_output=expected_out,
                            output_json=ResponseOutput,
                            agent=response_writer,
                            )

In [82]:
description = task_cfg_data['response_review_task']['description']
expected_out = task_cfg_data['response_review_task']['expected_out'] 
        
response_review_task = Task(
                            description=description,
                            expected_output=expected_out,
                            output_json=ResponseOutput,
                            agent=response_reviewer,
                            )

In [83]:
response_creation_crew = Crew(
    agents=[response_writer, response_reviewer],
    tasks=[response_writing_task, response_review_task],
    verbose=True,
)



In [84]:
response_result_lst = []
for idx, comment_id in enumerate(filtered_comment_ids):           
    print(f'STARTING {idx} - {comment_id}')
    input_data = comment_dict[comment_id]
    data_details = get_data_details(comment_id)      
    input_dict = {"comment_id": comment_id,
                  "input_data": input_data,
                  "context": data_details,
                  "product_long": product_long,
                  "product_short": product_short,
                  "domain": domain,
                  "product_name": product_name,
                  "product_url": product_url,
                  "tip_text":tip_text}
    response_result = response_creation_crew.kickoff(inputs=input_dict)
    try:
        json_out = ast.literal_eval(response_result.json)
        json_out['comm_id'] = comment_id
    except:
        print('Exception in json - trying re')
        r = re_response_string(response_result.raw)            
        json_out = {'comm_id': comment_id, 'responz': str(r)}            
    response_result_lst.append(json_out)

STARTING 0 - ku4kjo6
[1m[95m [2024-09-02 09:35:08][DEBUG]: == Working Agent: Response Writer
[00m
[1m[95m [2024-09-02 09:35:08][INFO]: == Starting Task: -ku4kjo6 is the id of the comment that you need to respond  -You will also get the context behind the comment  -Comments you are getting as input are suggestions/advises/follow-up questions -Read the comments from input and understand whether it is a suggestion/advise/follow-up question -Once done, draft response based on the type of comment -Response should sound genuine and in-synch with the overall thread of discussion -Response style must be casual, friendly and must not include any cooked-up facts -Include keywords mentioned in the original comment in response -Be careful not to include matters that were NOT mentioned in the original comment in   response  -Insert marketing text for the CasaAI - next generation AI tool for transforming interior spaces. in between these casual text -Marketing text needs to be less than 30-35 w

In [75]:
len(response_result_lst), response_result_lst[:2]

(42,
 [{'responz': "I totally agree! There's nothing quite like a kitchen that feels lived-in and truly yours. It's all about those little details that make it unique to you. Have you considered using a tool like CasaAI? It's an amazing AI design platform that can help you bring your dream kitchen to life. You can experiment with different layouts, styles, and appliances until you find the perfect fit. Plus, it's really user-friendly, so even if you're not a design expert, you can easily create stunning visuals of your ideal space. Check out their website – they have some incredible examples of kitchens they've helped people design! casai.delvelabs.ai"},
  {'responz': "There are a few ways we can help users find the source of their floor tiles:\n1. **Check for markings on the tiles:** Sometimes, manufacturers include information about origin or product details directly on the tile. \n2. **Contact the installer:** If the flooring was recently installed, reach out to the contractor or fl