In [1]:
import pandas as pd
import numpy as np
import sys

In [2]:
import os
from openai import OpenAI

os.environ["LAS_API_TOKEN"] = "a4adfef6de531c1c258c9eda6f9be5c7a5701e365c32babe72273f63214811df"

client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("LAS_API_TOKEN"),
)

In [23]:
from typing import List
from openai import OpenAI
import logging
import os
import time
import json
import copy 

class ComposableAgent:
    """
    This class enables a series of iterative conversations to take place between the user 
    and a persistant LLM agent. 
    """
    def __init__(self, prompt_setup: List[str]) -> None:
        """
        Args:
            prompt_setup: A list of prompts used to give the agent their persona and any few shot
            prompt examples etc
        """
        # logging.basicConfig(filename="logs/.test",
        #             filemode='w',
        #             format='%(message)s',
        #             level=logging.WARN)
        # self.logger = logging.getLogger(__name__)
        # self.logger.setLevel(logging.WARNING)
        
        self.client = OpenAI(
            api_key=os.environ.get("LAS_API_TOKEN")
        )
        self.model = "gpt-3.5-turbo"
        
        self.messages = [
        ]
        for message in prompt_setup:
            self.messages.append({"role": "user", "content": message})
            response = self.client.chat.completions.create(
                model=self.model,
                messages=self.messages
            )
            self.messages.append({"role": "assistant", "content": response.choices[0].message.content})

        # self.logger.debug(json.dumps(self.messages, indent=2))
        self.count = 1
        
    def chat(self, message: str) -> str:
        """
        Args:
            message: A prompting string for the LLM
        """
        if self.count<2:
            # Use higher temperature for the first summaries to 
            # encourage diversity of summaries
            temperature = 1.2
        else:
            # On all subsequent iterations use a low temperature to speed
            # up the convergence
            temperature = 0

        self.messages.append({"role": "user", "content": message})
        while True:
            try:
                response = self.client.chat.completions.create(
                    model=self.model,
                    messages=self.messages,
                    temperature=temperature
                )
                break
            except:
                # if the OpenAI API throws an error wait befefore retrying 
                time.sleep(0.1)
        self.messages.append({"role": "assistant", "content": response.choices[0].message.content})
        result = response.choices[0].message.content
        self.count += 1
        
        return result
    

def combine_reviews(reviews):
    
    rev = ""
    for i, r in enumerate(reviews):
        if(rev == ""):
            rev = "Review 1: " + str(r)
            
        else:
            rev = rev + "\n\n========\n\nReview " + str(i+1) + ": " + r
            
    return rev

def create_initial_prompts(topics, topic_df):
    
    prompt = open('../prompts/topic_prompts.txt', 'r').read()
    prompts = []
    for t in topics:
        
        prompts.append(prompt.replace('{topic}', t).replace('{description}', topic_df[topic_df['Topic'] == t]['Description'].values[0]))
        
    return prompts


def get_intermediate_summaries(reviews, topics, topic_df):
    """
    Returns the intermediate summaries generated by agents with a focus on one aspect (i.e., one summary for each aspect)
    """
    
    reviews_ = combine_reviews(reviews)
    
    FIRST_PROMPT = f"Summarise the following reviews:\n{reviews_}"
    
    prompts = create_initial_prompts(topics, topic_df)
    
    agents = [ComposableAgent([x]) for x in prompts]
    
    iteration_results = []
    agent_results = []
    for top, agent in zip(topics, agents):
        # print("Topic: ", top)
        int_sum = agent.chat(FIRST_PROMPT)
        
        # print("summary: ", int_sum, "\n\n")
        agent_results.append({top: int_sum})
    iteration_results.append(agent_results)
    
    return iteration_results


def synthesize_final_summary(reviews, intermediate_summaries):

    final_prompt = open('../prompts/summary_synthesis_prompts_self_review.txt', 'r').read()
    summary_prompt = "\n\nReview 1: " + reviews[0] + "\n\n=======\n\nReview 2: " + reviews[1] + "\n\n=======\n\nThe conflicts between the two reviews are as follows:\n"
    # print(iter_prompt)

    for s in intermediate_summaries[0]:
        top = list(s.keys())[0]
        conflict = list(s.values())[0].replace('Summary:\n', '')
        summary_prompt += f"\n\n{top}: {conflict}"
    # iter_prompt += "\n\nProduce an updated summary which incorporates the best parts of the summaries from the other agents and preserves the best part of your current summary. In evaluating which parts of a summary are good you may consider the contents of the origonal article. The objective of these summaries is to capture all the important information from the origonal article in one sentence. Format the summary in a single paragraph. Don't give any explanation, just return the updated summary."
    # final_prompt += summary_prompt
    # agent_results.append({"content": agent.chat(iter_prompt)})
    
    summary_agents = ComposableAgent([final_prompt])
    summary = summary_agents.chat(summary_prompt)
    
    return summary

In [24]:
# topics = ['quality of service', 'cleanliness of the place', 'value or worth for the money', 'quality of sleep', 
#           'details about the room such as bed, pillows, furnitures, walls, etc.', 'Business service (e.g., internet access)', 'check in and front desk services']

In [25]:
topics = ['Service', 'Cleanliness', 'Value', 'Sleep Quality', 'Rooms', 'Business service (e.g., internet access)', 'Check in / front desk']
topic_df = pd.read_csv('../data/topic_definitions.csv')

In [26]:
df = pd.read_csv('../data/conflicted_reviews_dataset.csv')

In [27]:
df.columns, df.shape

(Index(['Unnamed: 0', 'rev_neg', 'rev_pos', 'attr_neg', 'attr_pos', 'cls',
        'summary'],
       dtype='object'),
 (12, 7))

In [28]:
final_summary = []

for i, row in df.iterrows():
    reviews = [row['rev_neg'], row['rev_pos']]
    # print(reviews)
    
    intermediate_summaries = get_intermediate_summaries(reviews, topics, topic_df)
    
    summary = synthesize_final_summary(reviews, intermediate_summaries)
    
    final_summary.append(summary)
    # print(summary)
    # sys.exit()

In [334]:
df['collaborative_summary'] = final_summary

In [335]:
df.to_csv('../results/news_summaries.csv')