Skip to content

Commit

Permalink
fixes #15
Browse files Browse the repository at this point in the history
  • Loading branch information
WolfgangFahl committed Nov 16, 2023
1 parent 61a8843 commit 5902682
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 25 deletions.
54 changes: 39 additions & 15 deletions scan/amazon.py
Expand Up @@ -6,53 +6,77 @@
import requests
from bs4 import BeautifulSoup
from dataclasses import dataclass
from typing import List
from typing import List, Optional

@dataclass
class Product:
"""
Data class representing a product.
Attributes:
title (str): Title of the product.
image_url (str): URL of the product image.
price (str): Price of the product.
title (str): The title of the product.
image_url (str): The URL of the product image.
price (str): The price of the product.
asin (Optional[str]): The Amazon Standard Identification Number (ASIN) of the product,
which is a unique identifier on Amazon's platform.
"""
title: str
image_url: str
price: str
asin: Optional[str] = None

@property
def amazon_url(self) -> str:
return f"https://www.amazon.com/dp/{self.asin}" if self.asin else None


class Amazon:
"""
lookup products on amazon web site
"""

@classmethod
def extract_amazon_products(cls, soup: BeautifulSoup) -> List[Product]:
def __init__(self,debug: Optional[bool] = False):
"""
constructor
Args:
debug (bool, optional): If set to True, pretty-prints the first product div for debugging.
"""
self.debug=debug

def extract_amazon_products(self, soup: BeautifulSoup) -> List[Product]:
"""
Extracts product information from Amazon product listing HTML content.
Args:
soup (BeautifulSoup): Soup object of HTML content of the Amazon product listing page.
Returns:
List[Product]: A list of extracted product information as Product objects.
"""
products = []
# Find all div elements that match the product listing structure
for div in soup.find_all("div", class_="puisg-row"):
for index, div in enumerate(soup.find_all("div", class_="puisg-row")):
product_info = {}

# Pretty-print the first product div if debug is True
if self.debug and index == 0:
print("Debug - First Product Div:")
print(div.prettify()) # Pretty-print the first div

# Extracting product title
title_div = div.find("h2", class_="a-size-mini")
if title_div and title_div.a:
product_info['title'] = title_div.a.get_text(strip=True)

# Extracting product image URL
# Extracting product image URL and ASIN
image_div = div.find("div", class_="s-product-image-container")
if image_div and image_div.img:
if image_div and image_div.a:
product_info['image_url'] = image_div.img['src']

link = image_div.a['href']
asin = link.split('/dp/')[-1].split('/')[0]
product_info['asin'] = asin

# Extracting product price
price_span = div.find("span", class_="a-price")
if price_span and price_span.find("span", class_="a-offscreen"):
Expand All @@ -67,14 +91,14 @@ def extract_amazon_products(cls, soup: BeautifulSoup) -> List[Product]:
product = Product(
title=product_info['title'],
image_url=product_info.get('image_url', ''),
price=product_info.get('price', '')
price=product_info.get('price', ''),
asin=product_info.get('asin', '')
)
products.append(product)

return products

@classmethod
def lookup_products(cls,search_key:str):
def lookup_products(self,search_key:str):
"""
lookup the given search key e.g. ISBN or EAN
"""
Expand All @@ -86,7 +110,7 @@ def lookup_products(cls,search_key:str):
response = requests.get(url, headers=headers)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
product_list=cls.extract_amazon_products(soup)
product_list=self.extract_amazon_products(soup)
return product_list
else:
msg=f"lookup for {search_key} failed with HTML status code {response.status_code}"
Expand Down
31 changes: 22 additions & 9 deletions scan/webcam.py
Expand Up @@ -10,7 +10,7 @@
from ngwidgets.widgets import Link
from ngwidgets.background import BackgroundTaskHandler
from scan.barcode import Barcode

from scan.amazon import Amazon

class WebcamForm:
"""
Expand All @@ -32,6 +32,7 @@ def __init__(self, webserver, default_url: str):
self.shot_url = f"{self.url}/shot.jpg"
self.image_path = None
self.setup_form()
self.amazon=Amazon(self.webserver.debug)

def notify(self, msg):
ui.notify(msg)
Expand Down Expand Up @@ -95,26 +96,38 @@ def setup_form(self):

async def scan_barcode(self):
"""
Scan for barcodes in the most recently saved webcam image.
Scan for barcodes in the most recently saved webcam image and look up products on Amazon.
"""
msg = "No image to scan for barcodes."
if self.image_path:
barcode_path = f"{self.scandir}/{self.image_path}"
barcode_list = Barcode.decode(barcode_path)
if barcode_list:
results = "\n".join(
[
f"Code: {barcode.code}, Type: {barcode.type}"
for barcode in barcode_list
]
)
msg = f"Barcodes found:\n{results}"
results = []
for barcode in barcode_list:
# Perform Amazon lookup for each barcode
amazon_products = self.amazon.lookup_products(barcode.code)
if amazon_products:
# Assuming you want to display the first product found for each barcode
product = amazon_products[0]
product_details = f"Product: {product.title}, Price: {product.price}, ASIN: {product.asin}"
else:
product_details = "No matching Amazon product found."

barcode_result = f"Code: {barcode.code}, Type: {barcode.type}, {product_details}"
results.append(barcode_result)

msg = "\n".join(results)
else:
msg = "No barcodes found."
else:
msg = "No image to scan for barcodes."

self.notify(msg)
html_markup = f"<pre>{msg}</pre>"
self.barcode_results.content = html_markup


def update_preview(self, image_path: str = None):
"""
Update the preview with the current URL of the webcam.
Expand Down
3 changes: 2 additions & 1 deletion tests/test_amazon.py
Expand Up @@ -27,9 +27,10 @@ def test_amazon(self):
}
debug=self.debug
debug=True
amazon=Amazon(debug=debug)
# Testing each search key
for search_key, expected_product in searches.items():
products = Amazon.lookup_products(search_key)
products = amazon.lookup_products(search_key)
self.assertTrue(products, f"No products found for search key: {search_key}")
product=products[0]
if debug:
Expand Down

0 comments on commit 5902682

Please sign in to comment.