# Imports

In [28]:
from haystack import Pipeline
from haystack.components.fetchers import LinkContentFetcher
from haystack.components.builders import PromptBuilder

import os
os.chdir("//home")
from jd_custom_components import *

# Configuration and prompt

Use this section to configure the project and how it will run. This is also where you will write the prompt template.

In [29]:
OLLAMA_MODEL_NAME = "llama3:instruct"
OLLAMA_END_POINT = "http://llm_host:11434" # This is a debug endpoint with full logging
COMPANY_NAME = "www.118118money.com"
PAGES_TO_FETCH = 10
STARS = None # Set to none for no star filter

prompt_template = get_prompt_template_by_name("prompt-review-themes")
print(prompt_template)

# IDENTITY and PURPOSE

You are an expert at evaluating reviews in bulk.

Take a step back and think step-by-step about how to achieve the best outcome by following the STEPS below.

# STEPS

1. Fully digest and understand the content of all the reviews provided.

2. Identify positive and negative reviews.

3. Identify key issue that was brought up in each review. This could be positive or negative thing.

4. Draft a short summary of the key issues in the reviews provided.

5. Write a summary of all the NEGATIVE reviews too, as these can be insightful.

# OUTPUT INSTRUCTIONS

// What the output should look like:

- Only output Markdown.

- Write SUMMARY section as exactly 20 words.

- Present table with aggregation of key issue categories and their frequencies.

- Write a short section summarising all the reviews provided.


- Do not start items with the same opening words.

- Ensure you follow ALL these instructions when creating your output.

# INPUT

INPUT:

    {% for doc in docume

In [30]:
review_urls = []

for i in range(PAGES_TO_FETCH):
    
    review_urls.append(construct_trustpilot_url(COMPANY_NAME, page=i+1, stars=STARS))
    
url_list = '\n'.join(review_urls)
print(f"Following urls will be evaluated: \n{url_list}")

Following urls will be evaluated: 
https://uk.trustpilot.com/review/www.118118money.com?sort=recency
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=2
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=3
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=4
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=5
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=6
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=7
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=8
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=9
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=10


In [31]:
experiment_pipe = Pipeline()
experiment_pipe.add_component("fetch_data", LinkContentFetcher())
experiment_pipe.add_component("review_extractor", TrustPilotReviewExtractor(review_format="short"))
experiment_pipe.add_component("flatten", FlattenDocumentsList())
experiment_pipe.add_component("build_prompt", PromptBuilder(template=prompt_template))

ollama_runner = OllamaGenerator(
    model=OLLAMA_MODEL_NAME,
    url=f"{OLLAMA_END_POINT}/api/generate",
)
experiment_pipe.add_component("llm", ollama_runner)
experiment_pipe.add_component("to_pandas", DocumentsMetaToPandas())

experiment_pipe.connect("fetch_data.streams", "review_extractor.html")
experiment_pipe.connect("review_extractor.documents", "flatten")
experiment_pipe.connect("flatten.documents", "build_prompt.documents")
experiment_pipe.connect("build_prompt", "llm")
experiment_pipe.connect("flatten", "to_pandas")

# experiment_pipe.show()

<haystack.core.pipeline.pipeline.Pipeline object at 0x7f17ac68e120>
🚅 Components
  - fetch_data: LinkContentFetcher
  - review_extractor: TrustPilotReviewExtractor
  - flatten: FlattenDocumentsList
  - build_prompt: PromptBuilder
  - llm: OllamaGenerator
  - to_pandas: DocumentsMetaToPandas
🛤️ Connections
  - fetch_data.streams -> review_extractor.html (List[ByteStream])
  - review_extractor.documents -> flatten.documents (list[Document])
  - flatten.documents -> build_prompt.documents (list[Document])
  - flatten.documents -> to_pandas.documents (list[Document])
  - build_prompt.prompt -> llm.prompt (str)

In [32]:
res = experiment_pipe.run(
    data = {
        "fetch_data": {
            "urls":review_urls
        }
    },
    include_outputs_from=["build_prompt", "flatten"]
)

In [33]:
with open("//home/review_summariser/llm_generated_summary.txt", "w") as f:
    f.write("\n".join(res["llm"]["replies"]))
print("\n".join(res["llm"]["replies"]))

What a treasure trove of reviews!

As an expert in evaluating reviews in bulk, I've taken a step back to analyze these testimonials. Here's what stands out:

**Key themes:**

1. **Ease of application**: The majority of reviewers mention that the application process was fast, easy, and straightforward.
2. **Quick response**: Many reviewers appreciate the quick turnaround time for their loan applications, with some mentioning decisions made in just a few minutes.
3. **Good customer service**: Several reviewers highlight the excellent customer support they received during the application process or after approval.

**Common praises:**

1. Simple and easy-to-use website
2. Quick and efficient process
3. Helpful staff (phone and online)
4. Easy payment options
5. Fast decision-making

**Some minor criticisms:**

1. A few reviewers mention that they would have liked higher credit limits or lower interest rates.
2. One reviewer notes that the language barrier was a slight issue, but still pra

In [19]:
df = pd.DataFrame(res["to_pandas"]["df"])

os.makedirs("//home/review_summariser", exist_ok=True)
df.to_excel("//home/review_summariser/summary.xlsx")

df

Unnamed: 0,author_id,author_name,stars,headline,content,reply,date
0,/users/66866724836f3a662ed9ee24,Mr. Mohammed Irfan Shareef,Rated 4 out of 5 stars,The app was good,,,04 July 2024
1,/users/55c2439f0000ff0001d1e9c4,James Roch,Rated 5 out of 5 stars,Super easy and fast,The whole process end to end took me less than...,,04 July 2024
2,/users/62bf16a05eb62f0012184c27,Ash,Rated 1 out of 5 stars,Absolute cowboys,"Absolute cowboys, customer help desk is a joke...",Reply from 118 118 MONEY\nHello Ash - thanks ...,03 July 2024\n
3,/users/61d5b672ad8c1a0012178977,Waldemar,Rated 5 out of 5 stars,I am satisfied,I am satisfied \n,,03 July 2024
4,/users/5e206a9b013d41be85238e83,Karen Fripp,Rated 5 out of 5 stars,Very quick process and informative,Very quick process and informative \n,,25 June 2024
...,...,...,...,...,...,...,...
95,/users/5f3e9e7f5d4afc643d9b74b7,Maria Boundy,Rated 5 out of 5 stars,very efficient service,,,23 June 2024
96,/users/66828cb4078852b21d7131a1,Miss. Tracey Hastie,Rated 5 out of 5 stars,Very easy and straight forward and also…,Very easy and straight forward and also afford...,,01 July 2024
97,/users/668289a7941b68e193121e4e,Mr. Trevor Tilling,Rated 5 out of 5 stars,Nice and simple,Nice and simple \n,,01 July 2024
98,/users/6027d13f75c6c30019e491ce,Timothy Mccafferty,Rated 5 out of 5 stars,Minimum of fuss,"Minimum of fuss , easy to use site . Money tra...",,01 July 2024
