# Imports

In [15]:
from haystack import Pipeline
from haystack.components.fetchers import LinkContentFetcher
from jd_custom_components import *
from haystack.components.builders import PromptBuilder
from transformers import AutoTokenizer

# Configuration and prompt

Use this section to configure the project and how it will run. This is also where you will write the prompt template.

In [16]:
OLLAMA_MODEL_NAME = "llama3:instruct"
OLLAMA_END_POINT = "http://localhost:11700" # This is a debug endpoint with full logging
COMPANY_NAME = "www.118118money.com"
PAGES_TO_FETCH = 5
STARS = None # Set to none for no star filter

prompt_template = get_prompt_template_by_name("prompt-review-themes")

In [17]:
review_urls = []

for i in range(PAGES_TO_FETCH):
    
    review_urls.append(construct_trustpilot_url(COMPANY_NAME, page=i+1, stars=STARS))
    
url_list = '\n'.join(review_urls)
print(f"Following urls will be evaluated: \n{url_list}")

Following urls will be evaluated: 
https://uk.trustpilot.com/review/www.118118money.com?sort=recency
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=2
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=3
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=4
https://uk.trustpilot.com/review/www.118118money.com?sort=recency&page=5


In [18]:
experiment_pipe = Pipeline()
experiment_pipe.add_component("fetch_data", LinkContentFetcher())
experiment_pipe.add_component("review_extractor", TrustPilotReviewExtractor(review_format="short"))
experiment_pipe.add_component("flatten", FlattenDocumentsList())
experiment_pipe.add_component("build_prompt", PromptBuilder(template=prompt_template))

ollama_runner = OllamaGenerator(
    model=OLLAMA_MODEL_NAME,
    url=f"{OLLAMA_END_POINT}/api/generate",
)
experiment_pipe.add_component("llm", ollama_runner)
experiment_pipe.add_component("to_pandas", DocumentsMetaToPandas())

experiment_pipe.connect("fetch_data.streams", "review_extractor.html")
experiment_pipe.connect("review_extractor.documents", "flatten")
experiment_pipe.connect("flatten.documents", "build_prompt.documents")
experiment_pipe.connect("build_prompt", "llm")
experiment_pipe.connect("flatten", "to_pandas")

# experiment_pipe.show()

<haystack.core.pipeline.pipeline.Pipeline object at 0x000001933F0E6A10>
🚅 Components
  - fetch_data: LinkContentFetcher
  - review_extractor: TrustPilotReviewExtractor
  - flatten: FlattenDocumentsList
  - build_prompt: PromptBuilder
  - llm: OllamaGenerator
  - to_pandas: DocumentsMetaToPandas
🛤️ Connections
  - fetch_data.streams -> review_extractor.html (List[ByteStream])
  - review_extractor.documents -> flatten.documents (list[Document])
  - flatten.documents -> build_prompt.documents (list[Document])
  - flatten.documents -> to_pandas.documents (list[Document])
  - build_prompt.prompt -> llm.prompt (str)

In [19]:
res = experiment_pipe.run(
    data = {
        "fetch_data": {
            "urls":review_urls
        }
    },
    include_outputs_from=["build_prompt", "flatten"]
)

In [20]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B")
prompt_len = len(tokenizer.tokenize(res["build_prompt"]["prompt"]))
print(f"The prompt is {prompt_len} tokens long.\n========================================")

print("\n".join(res["llm"]["replies"]))

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


The prompt is 1498 tokens long.
**SUMMARY**
In 20 words: Overall positive reviews of 118 118, praising ease, speed, and helpful customer service.

**TABLE OF KEY ISSUE CATEGORIES AND FREQUENCIES**

| Category | Frequency |
| --- | --- |
| Ease of application | 34 |
| Quick response/time | 23 |
| Helpful customer service | 21 |
| Simple and straightforward process | 18 |
| Fast decision-making | 14 |
| Good rates/loans | 12 |
| Positive experience | 11 |
| Stress-free process | 9 |
| Easy to use website/app | 8 |
| Reliable service | 6 |
| Other (various praises) | 5 |

**SUMMARY OF REVIEWS**
The majority of reviews are extremely positive, praising the ease, speed, and helpfulness of 118 118's services. Many reviewers found the application process to be simple and stress-free, with quick responses and decision-making. The customer service is also widely praised for being empathetic and knowledgeable. A few reviewers did mention some negative experiences, such as aggressive chasing for m

In [21]:
df = pd.DataFrame(res["to_pandas"]["df"])
df

Unnamed: 0,author_id,author_name,stars,headline,content,reply,date
0,/users/6050a04dc06806001b6583d9,sheila kelly,Rated 5 out of 5 stars,Great customer service excellent …,Great customer service excellent rates on l...,,02 July 2024
1,/users/5ef3474ad9f9fa305970a609,David Holmes,Rated 1 out of 5 stars,Really poor,"Really poor, you miss a payment and the next d...",,01 July 2024
2,/users/5f173337a36e877dd0cd385c,Bruce Anderson,Rated 5 out of 5 stars,Very easy to apply,,,02 July 2024
3,/users/6683dcb4b19cefbb8d1e6cf2,Mrs. Chinyere Ojiakor,Rated 5 out of 5 stars,Very good and quick to respond,Very good and quick to respond \n,,02 July 2024
4,/users/6683d682cccc1c729f147daf,Mr. Mark Ball,Rated 5 out of 5 stars,Very easy process..,Set up within 10 mins.. easy to deal with.\n,,02 July 2024
...,...,...,...,...,...,...,...
95,/users/64adee1926e9da0011684eef,Ryan Yoneda,Rated 5 out of 5 stars,Easy to apply,"Easy to apply, no complications.\n",,29 June 2024
96,/users/5c8eb90c4964629539477e1d,Erianne Ferreira,Rated 5 out of 5 stars,Fantastic experience,Fantastic experience. Thanks \n,,20 June 2024
97,/users/64c10b037fd64a00126b12a4,Mark Wall,Rated 5 out of 5 stars,Quick and very easy to apply,Quick and very easy to apply \n,,29 June 2024
98,/users/5a719f090000ff000b0b2307,Ashok Kumar Mishra,Rated 5 out of 5 stars,Very easy process and Lovely experience,Very easy process and Lovely experience \n,,29 June 2024
