# Imports

In [1]:
from haystack import Pipeline
from haystack.components.fetchers import LinkContentFetcher
from haystack.components.builders import PromptBuilder

import os
from jd_custom_components import *

# Configuration and prompt

Use this section to configure the project and how it will run. This is also where you will write the prompt template.

In [2]:
OLLAMA_MODEL_NAME = "llama3:instruct"
OLLAMA_END_POINT = "http://llm_host:11434" # This is a debug endpoint with full logging
COMPANY_NAME = "www.118118money.com"
PAGES_TO_FETCH = 10
STARS = None # Set to none for no star filter

prompt_template = get_prompt_template_by_name("prompt-review-themes")
print(prompt_template)

# IDENTITY and PURPOSE

You are an expert at evaluating reviews in bulk.

Take a step back and think step-by-step about how to achieve the best outcome by following the STEPS below.

# STEPS

1. Fully digest and understand the content of all the reviews provided.

2. Identify positive and negative reviews.

3. Identify key issue that was brought up in each review. This could be positive or negative thing.

4. Draft a short summary of the key issues in the reviews provided.

5. Write a summary of all the NEGATIVE reviews too, as these can be insightful.

# OUTPUT INSTRUCTIONS

// What the output should look like:

- Only output Markdown.

- Write SUMMARY section as exactly 20 words.

- Present table with aggregation of key issue categories and their frequencies.

- Write a short section summarising all the reviews provided.


- Do not start items with the same opening words.

- Ensure you follow ALL these instructions when creating your output.

# INPUT

INPUT:

    {% for doc in docume

In [3]:
review_urls = []

for i in range(PAGES_TO_FETCH):
    
    review_urls.append(construct_trustpilot_url(COMPANY_NAME, page=i+1, stars=STARS))
    
url_list = '\n'.join(review_urls)
print(f"Following urls will be evaluated: \n{url_list}")

Following urls will be evaluated: 
https://uk.trustpilot.com/review/www.118118money.com?sort=recency
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=2
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=3
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=4
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=5
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=6
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=7
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=8
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=9
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=10


In [4]:
experiment_pipe = Pipeline()
experiment_pipe.add_component("fetch_data", LinkContentFetcher())
experiment_pipe.add_component("review_extractor", TrustPilotReviewExtractor(review_format="short"))
experiment_pipe.add_component("flatten", FlattenDocumentsList())
experiment_pipe.add_component("build_prompt", PromptBuilder(template=prompt_template))

ollama_runner = OllamaGenerator(
    model=OLLAMA_MODEL_NAME,
    url=f"{OLLAMA_END_POINT}/api/generate",
)
experiment_pipe.add_component("llm", ollama_runner)
experiment_pipe.add_component("to_pandas", DocumentsMetaToPandas())

experiment_pipe.connect("fetch_data.streams", "review_extractor.html")
experiment_pipe.connect("review_extractor.documents", "flatten")
experiment_pipe.connect("flatten.documents", "build_prompt.documents")
experiment_pipe.connect("build_prompt", "llm")
experiment_pipe.connect("flatten", "to_pandas")

# experiment_pipe.show()

<haystack.core.pipeline.pipeline.Pipeline object at 0x7fc92d9d5490>
🚅 Components
  - fetch_data: LinkContentFetcher
  - review_extractor: TrustPilotReviewExtractor
  - flatten: FlattenDocumentsList
  - build_prompt: PromptBuilder
  - llm: OllamaGenerator
  - to_pandas: DocumentsMetaToPandas
🛤️ Connections
  - fetch_data.streams -> review_extractor.html (List[ByteStream])
  - review_extractor.documents -> flatten.documents (list[Document])
  - flatten.documents -> build_prompt.documents (list[Document])
  - flatten.documents -> to_pandas.documents (list[Document])
  - build_prompt.prompt -> llm.prompt (str)

In [5]:
res = experiment_pipe.run(
    data = {
        "fetch_data": {
            "urls":review_urls
        }
    },
    include_outputs_from=["build_prompt", "flatten"]
)

In [8]:
os.makedirs("review_summariser", exist_ok=True)
with open("review_summariser/llm_generated_summary.txt", "w") as f:
    f.write("\n".join(res["llm"]["replies"]))
print("\n".join(res["llm"]["replies"]))

What a treasure trove of reviews!

After analyzing these reviews, here are some key findings:

**Identity**: These reviewers are mostly individuals who have used 118 118's services to obtain financial assistance, such as loans or payment help. They seem to be satisfied customers who want to share their positive experiences with others.

**Purpose**: The primary purpose of these reviews is to provide social proof for potential customers. Reviewers want to express their gratitude and recommend 118 118's services to others who may be facing similar financial challenges. By sharing their stories, they aim to help others feel more confident in using the company's services.

**Common themes**:

1. **Ease of use**: Most reviewers found the application process quick, easy, and straightforward.
2. **Fast response time**: Many reviewers appreciated the fast response from 118 118's customer service team, including May, who was particularly helpful.
3. **Friendly and empathetic staff**: Reviewers 

In [9]:
df = pd.DataFrame(res["to_pandas"]["df"])
df.to_excel("review_summariser/summary.xlsx")
df

Unnamed: 0,author_id,author_name,stars,headline,content,reply,date
0,/users/6687d358753a663b2c503fdc,Mr. Joel Adu Agyeman,Rated 5 out of 5 stars,Good Service and reliable 👏,Good Service and reliable 👏 \n,,04 July 2024
1,/users/584d4bd60000ff000a606bb6,Adam Adam,Rated 5 out of 5 stars,An easy application,An easy application \n,,05 July 2024
2,/users/5ffdfe2cba2f140019926e5b,Louise Liddell,Rated 5 out of 5 stars,Very efficient and friendly.,Very efficient and friendly. \n,,04 July 2024
3,/users/6687c369198df250f897d33b,Mr. Dawid Kunicki,Rated 5 out of 5 stars,Sure,"Sure! Your guys are really good, rhia the seco...",,05 July 2024
4,/users/60510417138835001945ed17,Ann,Rated 5 out of 5 stars,Excellent service,"Excellent service, money was in my account by ...",,05 July 2024
...,...,...,...,...,...,...,...
195,/users/627cab36c4a1c0001144147b,customerJERZY JABLONSKI,Rated 5 out of 5 stars,george,"Despite my language barrier, fast, efficient a...",,28 June 2024
196,/users/601bbec44b9838001a62a9b2,Carlito,Rated 5 out of 5 stars,Fast and easy to get the loan I wanted,Fast and easy to get the loan I wanted \n,,28 June 2024
197,/users/59288ffb0000ff000a9cc859,Kelly,Rated 5 out of 5 stars,Brilliant service,Brilliant service Definitely recommend them \n,,28 June 2024
198,/users/641097514db77b001215c752,JM Jengo,Rated 4 out of 5 stars,Fast and stress free customer service.,,,28 June 2024
