In [19]:
from bs4 import BeautifulSoup
import requests
import re
import json

In [21]:
# TASK 1

# Webpage URL
url = 'https://www.optosigma.com/eu_en/optics/lenses/spherical-lenses/plano-convex-spherical-lenses/n-bk7-plano-convex-lenses-ar-400-700nm-SLB-P-M.html'

# Get the page content
page = requests.get(url)

# Parse the HTML content of the page
soup = BeautifulSoup(page.text, 'html.parser')

# Locate the table with the products
table = soup.find('table', class_='grouped-items')

# Get links to all plano-convex lenses
items = table.find_all('a', class_='link', string=re.compile(r"plano\s*[-]?\s*convex", re.IGNORECASE))

products = {}

# Loop through each product link
for item in items:
    # Get the product's detail page
    item_page = requests.get(item['href'])  
    item_soup = BeautifulSoup(item_page.text, 'html.parser')
    
    # Check if it has the required coating information
    coating_check = item_soup.find('td', string=re.compile(r'400\s*[-]?\s*700\s*nm', re.IGNORECASE))
    
    if coating_check:
        specs = {}  
        
        # Get the product code
        code = item_soup.find('div', class_='product-info-sku').get_text(strip=True)
        
        # Find the specs table
        spec_table = item_soup.find('table', class_='data table additional-attributes')
        rows = spec_table.find_all('tr')  
        
        # Extract specs from each row
        for row in rows:
            spec = row.find('th').get_text(strip=True)  
            value = row.find('td').get_text(strip=True)  
            specs[spec] = value 
        
        products[code] = specs

# Save the products specs to a JSON file
filename = 'products.json'

with open(filename, 'w') as json_file:
    json.dump(products, json_file, indent=4)

In [31]:
len(products.keys())

232

In [33]:
# TASK 2

# Open dictionary from a JSON file
filename = 'products.json'

with open(filename, 'r') as json_file:
    data = json.load(json_file)

cleaned_data = {}

# Function to clean and standardize specs
for product_code, specs in data.items():
    cleaned_specs = {}

    for key, value in specs.items():
        # Clean and standardize spec names
        if 'Focal length' in key:
            cleaned_key = 'Focal Length'
        elif 'Diameter' in key:
            cleaned_key = 'Diameter'
        elif 'Material' in key:
            cleaned_key = 'Material'
        else:
            # Skip irrelevant specs
            continue
        
        # Separate value and unit using regex, if applicable
        if 'Diameter' in cleaned_key:
            # For Diameter, strip the φ character
            value = value.replace('φ', '').strip()

        match = re.match(r'([\d.]+)\s*([a-zA-Z%/]+)?', str(value))
        if match:
            cleaned_value = {"value": float(match.group(1)), "unit": match.group(2) if match.group(2) else None}
        else:
            # Handle non-numeric values
            cleaned_value = {"value": value, "unit": None}

        # Add the cleaned spec to the result
        cleaned_specs[cleaned_key] = cleaned_value

    cleaned_data[product_code] = cleaned_specs

print(f"{'Product Code':<20} {'Focal Length':<15} {'Diameter':<15} {'Material':<15}")
print("=" * 65)

# Iterate over the cleaned data and print the table
for product_code, specs in cleaned_data.items():
    focal_length = f"{specs['Focal Length']['value']} {specs['Focal Length']['unit']}" if specs['Focal Length']['unit'] else str(specs['Focal Length']['value'])
    diameter = f"{specs['Diameter']['value']} mm" if specs['Diameter']['unit'] else str(specs['Diameter']['value'])
    material = specs['Material']['value']
    
    print(f"{product_code:<20} {focal_length:<15} {diameter:<15} {material:<15}")

Product Code         Focal Length    Diameter        Material       
SLB-08-25PM          25.0 mm         8.0 mm          BK7            
SLB-80-800PM         800.0 mm        80.0 mm         BK7            
SLB-15-100PM         100.0 mm        15.0 mm         BK7            
SLB-25.4-35PM        35.0 mm         25.4 mm         BK7            
SLB-30-350PM         350.0 mm        30.0 mm         BK7            
SLB-40-1000PM        1000.0 mm       40.0 mm         BK7            
SLB-50.8-120PM       120.0 mm        50.8 mm         BK7            
SLB-08-50PM          50.0 mm         8.0 mm          BK7            
SLB-100-200PM        200.0 mm        100.0 mm        BK7            
SLB-20-25PM          25.0 mm         20.0 mm         BK7            
SLB-25.4-60PM        60.0 mm         25.4 mm         BK7            
SLB-30-500PM         500.0 mm        30.0 mm         BK7            
SLB-50-90PM          90.0 mm         50.0 mm         BK7            
SLB-50.8-200PM       200.0 mm     