In [39]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def fetch_product_details(links):
    category_data = []
    
    for link in links:
        page = requests.get(link)
        soup = BeautifulSoup(page.text, "html.parser")
        
        product_divs = soup.find_all("div", class_="product__name")
        
        for product_div in product_divs:
            product_link = product_div.find_parent("a")
            if product_link and 'href' in product_link.attrs:
                product_url = "https://www.lumberjack.com" + product_link['href']
                
                product_page = requests.get(product_url)
                product_soup = BeautifulSoup(product_page.text, "html.parser")
                
                product_details = {}
                
                name_tag = product_soup.find("h1", class_="product__name")
                product_details["name"] = name_tag.text.strip() if name_tag else "N/A"
                
                price_tag = product_soup.find("span", class_="product__prices-sale")
                product_details["price"] = price_tag.text.strip() if price_tag else "N/A"
                
                color_label = product_soup.find("div", class_="detail__properties-label", string=lambda t: "Colore" in t)
                if color_label:
                    color_value = color_label.find_next_sibling("div", class_="detail__properties-link")
                    product_details["color"] = color_value.text.strip() if color_value else "N/A"
                
                material_label = product_soup.find("div", class_="detail__properties-label", string=lambda t: "Materiale" in t)
                if material_label:
                    material_value = material_label.find_next_sibling("div", class_="detail__properties-link")
                    product_details["materiale"] = material_value.text.strip() if material_value else "N/A"
                
                category_data.append(product_details)
    
    return category_data

links = [
    "https://www.lumberjack.com/it/sport_shoes?gender=man",
    "https://www.lumberjack.com/it/boat-shoes?gender=man",
    "https://www.lumberjack.com/it/slip_on?gender=woman",
    "https://www.lumberjack.com/it/sandals?gender=woman",
    "https://www.lumberjack.com/it/ankle-boots?gender=woman",
    "https://www.lumberjack.com/it/beatles?gender=woman",
    "https://www.lumberjack.com/it/outdoor?gender=woman"
]

lumberjack_details = fetch_product_details(links)
lumberjack_df = pd.DataFrame(lumberjack_details)

lumberjack_df['price'] = pd.to_numeric(lumberjack_df['price'].astype(str).str.replace(r'\s*EUR', '', regex=True).str.replace(',', '.'), errors='coerce')

lumberjack_df['Category'] = lumberjack_df['name'].apply(lambda x: x.split()[-1])
lumberjack_df['Name'] = lumberjack_df['name'].apply(lambda x: x.split()[0])
lumberjack_df['Type'] = lumberjack_df['name'].apply(lambda x: ' '.join(x.split()[1:-1]) if len(x.split()) > 2 else 'N/A')

lumberjack_df['Category'] = lumberjack_df['Category'].str.title()
lumberjack_df['Name'] = lumberjack_df['Name'].str.title()
lumberjack_df['Type'] = lumberjack_df['Type'].str.title()
lumberjack_df['color'] = lumberjack_df['color'].str.title()
lumberjack_df['materiale'] = lumberjack_df['materiale'].str.title()

lumberjack_df = lumberjack_df[['Category', 'Type', 'Name', 'price', 'color', 'materiale']]

lumberjack_df


  Category      Type    Name  price              color               materiale
0     Uomo  Sneakers  Marvin  64.99              White  Suede-Synthetic Smooth
1     Uomo  Sneakers  Marvin  64.99          Navy Blue  Suede-Synthetic Smooth
2     Uomo  Sneakers  Marvin  69.99              White                 Leather
3     Uomo  Sneakers  Marvin  69.99          Navy Blue                 Leather
4     Uomo  Sneakers  Warner  79.99  Navy Blue/Bluette              Suede-Mesh
