In [1]:
from langchain_ollama import ChatOllama
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda,RunnablePassthrough
from langchain.text_splitter import CharacterTextSplitter

import re

In [37]:
llm = ChatOllama(model='llama3.2:3b',base_url='localhost:11434')

In [7]:
separator = '<SEP>'

#Receive a string containing comments separated by a separator token
def format_comments(x,separator):
    x = re.sub(r'(\s)\1{2,}',r'\1',x)
    x = re.sub(separator,'\n\n',x)
    x = re.sub(r'\n{3,}',r'\n\n',x)
    return x.strip()

#Receive a list of summaries chunks
def format_summaries(x,separator):
    x = separator.join(x)
    return format_comments(x,separator)

In [39]:
prompt_str = (
    "Summarize {chunk_info} of comments in {complexity} giving a general idea of what all commentts are saying "
    "highlighting the overall sentiment of the sentences. Don't answer based on individual comments.\n"
    "### Comments\n"
    "{comments}\n"
    "Summary: "
)
prompt = PromptTemplate(input_variables=['complexity','comments','chunk_info'],template=prompt_str)

In [40]:
chain = (
    {
        'complexity':RunnablePassthrough(),
        'comments': RunnableLambda(lambda x: format_comments(x['comments'],x['separator']) ),
        'chunk_info':RunnablePassthrough()
    }
    | prompt 
    | llm
    | StrOutputParser()
)

In [3]:
sample_url = 'https://www.amazon.com/Amazon-Basics-Microphone-Podcasting-Adjustable/dp/B0CL9BTQRF/ref=sr_1_1_ffob_sspa?_encoding=UTF8&content-id=amzn1.sym.edf433e2-b6d4-408e-986d-75239a5ced10&dib=eyJ2IjoiMSJ9.nT6IE53AC-z9ZyNjetcZ9BBtua7z7T6s4YSRLY6LyizSQz852yfNGPCrecJ0bONr3kXwWSWtBwE9Tl72CQhNX5aXnUgky8Aja5-OgXnztkqYv9cVdLD3Qu1t0U33s2X2kwL6SJC8SF0oj-0vyDkuoh-vUa61d-fRchh1Nr-ngKSRuMcbd5IqWc277nEpdchqpA1Q-FdOzuE5nM40X5dC5GYX8c5tL_nStcU3O9z-MDUJAkr1BvJQIlyX1Mu28o-mWX-36wT-Me9osf4q6dWwT7rsgqFqOpG34N4brSLciPc.k1pcqbAx85cWR3KuhYKiMBCuhpLK43RJgorqhr4RZGI&dib_tag=se&keywords=gaming&pd_rd_r=d7d5127a-bac2-41bf-b544-9df9cf821c92&pd_rd_w=GnUzS&pd_rd_wg=BtKp0&qid=1754908078&sr=8-1-spons&sp_csd=d2lkZ2V0TmFtZT1zcF9hdGY&psc=1#averageCustomerReviewsAnchor'

In [None]:
# import requests
# from bs4 import BeautifulSoup
# response = requests.get(sample_url)
# html_content = response.text
# soup = BeautifulSoup(html_content,'lxml')
# res = soup.find('ul', {'id':'cm-cr-dp-review-list'})

In [4]:
from langchain.document_loaders import WebBaseLoader #UnstructuredURLLoader
import bs4
web_loader = WebBaseLoader(sample_url,
                       bs_kwargs=dict(parse_only=bs4.SoupStrainer(
                           'ul',
                           id='cm-cr-dp-review-list'
                       )))
doc = web_loader.load()
len(doc)

1

In [8]:
#Formatting comments for chain
# comments = ''' Great product! <SEP> It could be better... '''
page_content = doc[0].page_content
comments = separator.join(page_content.split('Helpful\n\n\nReport\n\n\n'))
comments = re.sub(r'Helpful[\n]+Report[\n]+','',comments)
comments = re.sub(r'Read more','',comments)
with open('amazon_comments.txt','w') as f:
    f.write(comments)
#comments = comments.replace('Helpful\n\n\nReport\n','')

In [43]:
splitter = CharacterTextSplitter(separator=separator,chunk_size=2000,chunk_overlap=0)
chunks = splitter.split_text(comments)
len(chunks)

3

In [44]:
print(chunks[0])

Gregory Dahl5.0 out of 5 stars
Great and Professional Looking Microphone!
Reviewed in the United States on July 8, 2025Verified Purchase


There is a plethora of USB Microphones on the market. Even though I do not create YouTube videos, I made the right choice to utilize a professional microphone that sounds like I am in the same room. I have tuned it utilizing Microsoft Windows Sound Settings and the clarity is fantastic. It makes my Microsoft Teams experience great.

2 people found this helpful


              <SEP>Zach Johnson5.0 out of 5 stars
Perfect for starters.
Reviewed in the United States on July 30, 2025Verified Purchase
Fantastic sound quality, fantastic price point. If youâ€™re just looking to try out audio recording, streaming, podcast, etc. This is the mic to buy. Low investment cost with great sound, can attach to armatures, itâ€™s a great option for starting out or for some seasoned users needing a backup option. Highly recommend.




              <SEP>Sean5.0 out of 

In [45]:
prompt

PromptTemplate(input_variables=['chunk_info', 'comments', 'complexity'], input_types={}, partial_variables={}, template="Summarize {chunk_info} of comments in {complexity} giving a general idea of what all commentts are saying highlighting the overall sentiment of the sentences. Don't answer based on individual comments.\n### Comments\n{comments}\nSummary: ")

In [46]:
chunks_summaries = []
for i,chunk in enumerate(chunks):
    chunk_info = f'the chunk {i+1} of {len(chunks)} ' if len(chunks) > 1 else 'a sequence'
    chunks_summaries.append(
        chain.invoke({'complexity':'one short paragraph',
                      'comments': chunk, 
                      'separator':separator,
                       'chunk_info':chunk_info }))

In [None]:
prompt2_str = (
    "The following texts are summaries of different comments of the same subject. " \
    "From this summaries provide a final summary based on all of them. Answer ONLY the final summary\n" \
    "### Summaries:\n" \
    "{summaries}" \
    "\nFinal Summary: "
)
prompt2 = PromptTemplate(input_variables=['summaries'],template=prompt2_str)

In [49]:
chain2 = (
    {'summaries':RunnableLambda(lambda x: format_summaries(x['summaries'],x['separator']))}
    | prompt2
    | llm
    | StrOutputParser()
)

In [50]:
final_summary = chain2.invoke( {'summaries':chunks_summaries,'separator':separator} )

In [51]:
print(final_summary)

The microphone has received overwhelmingly positive reviews for its professional sound quality, ease of use, and affordability, with many praising its clarity, simplicity, and value. While some reviewers mention minor drawbacks such as the lack of extra features, these are largely outweighed by the microphone's strengths. The majority of comments highlight the microphone's good performance in meeting basic needs, including improved speech clarity and intelligibility, making it a solid choice for various applications, including gaming and voice-overs, at an affordable price.
