In [None]:
pip install schedule

In [None]:
pip install openai

In [None]:
pip install langchain

In [None]:
pip install tiktoken

In [None]:
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain import text_splitter 
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate
from langchain.docstore.document import Document


#Instructions as to how chatgpt is supposed to do with the reviews (the type of analys). Use langchain to piece together large text given there is a token limit one can input on chatgpt


def chat_gpt_review_analysis(data):

  api_key = "API KEY"
  llm = ChatOpenAI(openai_api_key=api_key, model_name="gpt-3.5-turbo") 
  doc = Document(page_content=data)

  # Get your splitter ready
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=50)
  texts = text_splitter.split_documents([doc])

  prompt_template = """

  You are an assistant that provides review insights for small business owners trying to launch new products in new spaces or improve their existing products. Each section should have detailed bullet points with easy to understand explanations. Be very specific, small business owners will make million dollar decisions based on the analysis you provide. Accurate information, that is specific and easy to understand is crucial. The analysis should have 5 sections as shown below:
  Section 1 is named "Pros." In bullet points, you will explain the pros of the product, why people like the product. Answer should be in bullet points. ['Pro Keyword']: The reason why people like the product.Also provide a % number out of 100% next to each answer which should be a keyword. If you add all the % on this section, you can not exceed 100%, the same applies to other sections. Example ['Size'] - 85% - The wagon fits anywhere such as the trunk of my small mini cooper. This means that 85% of positive commentswere about the wagon size being good becuase it could fit anywhere.
  Section 2 is named "Cons." In bullet points, you will explain the cons of the product, why people do not like the rpoduct. Answer should be in bullet points: ['Con Keyword']: The reason why people do not like the product.Also provide a % number out of 100% next to each answer which should be a keyword. Example ['Wheels'] - 20% - Made of plastic, is loud on pavement, difficult to replace, leaves a mark on my hardwood floors. This means that 20% of the negative comments was about the wheel being out of plastic.
  Section 3 is named "Improvements." In bullet points, you will explain the user painpoints of the product, and ways to fix those paint points. In other words, how to make the product better. Answer should be in bullet points: ['Product Improvement Keyword'] -  ways to improve the product. Example, ['Weight'] - It's too heavy for the average person,  consider using synthetic fibers instead of steel.
  Section 4 is named "Where." In bullet points, you will explain the places in which customers use this product. Some examples could be the fridge, the toilet, the bathtub, the front porch, the bed, the dinning table, etc. Answer should be in one word keyword" ['Where Keyword']. Also provide a % number out of 100% next to each answer which should be a keyword. For example: ['Car'] - 70%, this means that 70% of people use it in their cars, ['Bed'] - 30%, this means that 30% of people use it in their bed. It must be relevant to the reviews, no coming up with new things, simply provide analytics. Be specific.
  Section 5 is named "When." In bullet points, you will explain when do customers use this product. Examples include, daytime, nighttime, etc. Also provide a % number out of 100% next to each answer which should be a keyword. For example: ['daytime'] - 60%, this means that 60% of people use it during the day, ['nighttime'] - 40%, this means that 40% of people use it at nightime. It must be relevant to the reviews, no coming up with new things, simply provide analytics. Be specific.

  The reviews to do the analysis on are as follows:

  {text}
  """


  PROMPT = PromptTemplate(template=prompt_template, input_variables=["text"])
  chain = load_summarize_chain(llm, chain_type="map_reduce", map_prompt=PROMPT, combine_prompt=PROMPT)

  review = chain.run(texts)
  print(review)

  return review

In [None]:
import requests
import json

# function to scrape reviews from the products detail page on walmart

def scrape_data(product_id):

  data = []
  page = 1
  params = {
    'api_key': 'API KEY',
    'type': 'reviews',
    'item_id': f'{product_id}',
    'page': f'{page}'
    #'sort_by': 'newest_to_oldest'
  }

  # make the http GET request to BlueCart API
  original_api_result = requests.get('https://api.bluecartapi.com/request', params)
  how_many_pages_of_reviews = original_api_result.json()['pagination']['total_pages']
  
  # print the JSON response from BlueCart API
  data.append(json.dumps(original_api_result.json()['reviews']))
  print(json.dumps(original_api_result.json()))

  for i in range(2, 5):  #replace this with how_many_pages_of_reviews
    page = i
    params = {
    'api_key': 'API KEY',
    'type': 'reviews',
    'item_id': f'{product_id}',
    'page': f'{page}'
    #'sort_by': 'newest_to_oldest'
    }
    paginated_api_result = requests.get('https://api.bluecartapi.com/request', params)
    data.append(json.dumps(paginated_api_result.json()['reviews']))
    print(json.dumps(paginated_api_result.json()))
  
  return data


In [None]:
#Firebase set up

import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore

#firebase_admin.delete_app(firebase_admin.get_app())

cred = credentials.Certificate('/content/firestore email-automation-pt2.json')
app = firebase_admin.initialize_app(cred)
db = firestore.client()

last_doc = None


In [None]:
import schedule
import time 


# every 10 minute retrieve all product ids from users that have not been analyzed. Analyze the oldest one, then wait 10 minutes, analyze the oldest one again, etc


def process_documents():
    #retrieve product ids from firebase
    docs_ref = db.collection('review_analysis')
    query = docs_ref.where('review_analysis_pulse_check', '==', 'not_done').order_by('created_time')

    docs = query.get()
    print(docs)
    
    
    #Run chatGPT analysis
    product_id_one = docs[0].to_dict()['productID1']
    print(product_id_one)
    product_id_one_reviews = scrape_data(product_id_one)
    print(product_id_one_reviews)
    review_analysis_product_one = chat_gpt_review_analysis(str(product_id_one_reviews))
    print(review_analysis_product_one)

    product_id_two = docs[0].to_dict()['productID2']
    product_id_two_reviews = scrape_data(product_id_two)
    review_analysis_product_two = chat_gpt_review_analysis(product_id_two_reviews)

    product_id_three = docs[0].to_dict()['productID3']
    product_id_three_reviews = scrape_data(product_id_three)
    review_analysis_product_three = chat_gpt_review_analysis(product_id_three_reviews)
      
while True:
    process_documents()

    # Wait for some time before checking again
    time.sleep(10)
