## Imports

In [None]:
import requests
import bs4
import pandas

## Preparation

### Configuration

In [None]:
CONFIGURATION = {
    "BASE_URL": "https://www.druni.es/",
    "USER_AGENT": "Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0",
}

Request

In [None]:
response = requests.get(CONFIGURATION["BASE_URL"])

In [None]:
response.text

### Parse

In [None]:
soup = bs4.BeautifulSoup(response.text)

In [None]:
cateogries = soup.select(".navigation .level0.level-top")
print("total categories", len(cateogries))

subcategories_links = set()

for category in cateogries:
  category_title = category.select_one("a > span").text

  dropdown_wrapper = category.select_one(".ui-menu-dropdown-wrapper")
  items = dropdown_wrapper.select("a")
  # print(category_title, "items", len(items))

  for subcategory in items:
    subcategory_title = subcategory.text.strip()
    subcategories_links.add(subcategory.attrs["href"])
    # print({ "category_title": category_title, "subcategory_title": subcategory_title })

total categories 12


## Pages preparation

In [None]:
prod_links = list(subcategories_links)

In [None]:
def get_links_from_page(soup):
  return set([a.attrs["href"] for a in soup.select(".product-item-photo")])

In [None]:
visited_links = set()

def parse_page(link: str, product_links: set, limit: int):
  if link in visited_links or len(product_links) >= limit:
    return product_links

  print(link)
  visited_links.add(link)

  response = requests.get(link)
  page_soup = bs4.BeautifulSoup(response.text)
  inner_product_links = product_links.union(get_links_from_page(page_soup))

  next_page = page_soup.select_one(".item.current + .item a")
  if next_page:
    inner_product_links = parse_page(next_page.attrs["href"], inner_product_links, limit)

  return inner_product_links

In [None]:
product_links = set()

for link in prod_links:
  product_links = parse_page(link, product_links, limit=1_000)

https://www.druni.es/ecologico/cabello-natural/complementos
https://www.druni.es/ecologico/cabello-natural/complementos?p=2
https://www.druni.es/maquillaje/paletas/labios
https://www.druni.es/maquillaje/paletas/correctores
https://www.druni.es/maquillaje/paletas/profesionales
https://www.druni.es/maquillaje/paletas/profesionales?p=2
https://www.druni.es/maquillaje/paletas/profesionales?p=3
https://www.druni.es/maquillaje/paletas/profesionales?p=4
https://www.druni.es/maquillaje/paletas/low-cost
https://www.druni.es/maquillaje/paletas/low-cost?p=2
https://www.druni.es/cosmetica/corporal/exfoliantes
https://www.druni.es/cosmetica/corporal/hidratantes
https://www.druni.es/cosmetica/corporal/hidratantes?p=2
https://www.druni.es/cosmetica/corporal/hidratantes?p=3
https://www.druni.es/cosmetica/corporal/hidratantes?p=4
https://www.druni.es/cosmetica/corporal/hidratantes?p=5
https://www.druni.es/cosmetica/corporal/hidratantes?p=6
https://www.druni.es/cosmetica/corporal/hidratantes?p=7
https:/

In [None]:
print(len(product_links))

1004


In [None]:
def get_detail(el):
  if not el:
    return el

  return el.text.strip()

In [None]:
def get_product_details(link):
  response = requests.get(link, headers={"User-Agent": CONFIGURATION["USER_AGENT"]})
  soup = bs4.BeautifulSoup(response.text)

  return {
      "manufacturer": get_detail(soup.select_one(".product.manufacturer .value")),
      "name": get_detail(soup.select_one(".page-title")),
      "format": get_detail(soup.select_one(".simple-format")),
      "price": get_detail(soup.select_one(".price")),
      "special_price": get_detail(soup.select_one(".special-price")),
      "old_price": get_detail(soup.select_one(".old-price")),
      "description": get_detail(soup.select_one("#description")),
      "details": {get_detail(detail) for detail in soup.select(".bullet_point_value")}
  }

In [None]:
products = []

for link in list(product_links):
  print(link)
  products.append(get_product_details(link))

https://www.druni.es/visible-difference-moisture-body-lotion-elizabeth-arden-hidratante-cuerpo
https://www.druni.es/cera-acabado-extra-mate-nelly-cera-peinado-fijacion-total
https://www.druni.es/hot-stuff-face-palette-inglot-paleta-de-contouring
https://www.druni.es/veet-men-ducha-veet-crema-depilatoria-hombre
https://www.druni.es/forever-skin-correct-dior-corrector-antiojeras-alta-cobertura
https://www.druni.es/total-effects-bb-crema-hidratante-olay-crema-antiedad
https://www.druni.es/gel-nail-colour-essence-esmalte-unas-secado-rapido
https://www.druni.es/best-friends-makeup-set-martinelia
https://www.druni.es/beauty-inca-set-4-tizas-cabello-peine
https://www.druni.es/shine-last-go-essence-esmalte-unas
https://www.druni.es/expressie-essie-esmalte-unas-secado-rapido
https://www.druni.es/puffies-prince-husky-ty-peluche-tamano-pequeno
https://www.druni.es/balsamo-elsa-ana-lip-smacker-balsamos-labial-hidratante
https://www.druni.es/aceite-solar-spf-8-agrado-aceite-solar-en-spray-resistent

In [None]:
products

In [None]:
actual_products = [product for product in products if product["price"]]
valid_percentage = len(actual_products) / len(products) * 100
valid_percentage

60.3585657370518

In [None]:
for p in actual_products:
  print(p["price"])

26,00 €
2,99 €
28,99 €
5,99 €
41,00 €
15,99 €
1,99 €
4,95 €
3,99 €
1,99 €
7,90 €
3,99 €
1,50 €
4,99 €
7,00 €
39,99 €
35,00 €
4,45 €
3,99 €
8,34 €
29,99 €
11,95 €
4,99 €
49,00 €
45,00 €
61,50 €
7,50 €
4,75 €
2,34 €
4,99 €
4,99 €
5,69 €
82,50 €
1,99 €
50,00 €
16,95 €
3,99 €
3,99 €
42,00 €
3,19 €
22,99 €
3,95 €
5,50 €
2,95 €
1,95 €
5,99 €
4,99 €
4,99 €
45,00 €
29,99 €
20,28 €
8,49 €
5,99 €
5,95 €
34,90 €
28,00 €
12,95 €
3,25 €
9,99 €
43,00 €
1,89 €
29,99 €
4,99 €
4,49 €
19,99 €
16,95 €
1,29 €
3,99 €
3,99 €
3,99 €
50,00 €
17,95 €
4,99 €
72,00 €
29,00 €
33,50 €
9,95 €
2,99 €
21,00 €
31,00 €
55,00 €
49,95 €
3,99 €
3,95 €
3,99 €
19,95 €
79,90 €
9,90 €
4,95 €
4,19 €
2,50 €
79,90 €
2,95 €
99,00 €
1,95 €
2,95 €
4,95 €
0,99 €
1,99 €
3,99 €
1,95 €
2,99 €
24,95 €
4,99 €
5,99 €
2,80 €
2,99 €
9,99 €
19,95 €
58,00 €
5,69 €
1,99 €
55,50 €
25,99 €
0,99 €
6,99 €
3,29 €
5,99 €
27,50 €
5,95 €
8,95 €
22,95 €
1,99 €
6,95 €
3,45 €
3,99 €
29,99 €
29,99 €
50,95 €
73,00 €
6,99 €
30,00 €
4,99 €
1,99 €
39,90 €
13,

In [None]:
df = pandas.DataFrame(actual_products)
current_time = "26_03_2024-3_51"
df.to_csv(f"druni_{current_time}.csv", sep=";", encoding="utf-8")

TypeError: DataFrame.__init__() got an unexpected keyword argument 'encoding'

In [None]:
df

Unnamed: 0,manufacturer,name,format,price,special_price,old_price,description,details
0,ELIZABETH ARDEN,Visible Difference Moisture Body Lotion,| 300ML,"26,00 €","15,60 €","26,00 €",Visible Difference Moisture Body Lotion de Eli...,"{Nutrición, Hidratación, Todo tipo pieles}"
1,NELLY,Cera Acabado Extra Mate,| 100ML,"2,99 €","1,49 €","1,79 €",La Cera Nº5 te ofrece una fijación potente y r...,{}
2,INGLOT,Hot Stuff Face Palette,| 1UD,"28,99 €",,,Estas paletas son ideales para el contouring p...,{}
3,VEET,Veet Men Ducha,| 150ML,"5,99 €",,,La crema depilatoria en gel de Veet for Men el...,{Normal}
4,DIOR,Forever Skin Correct,,"41,00 €",,"41,00 €",Dior Forever Skin Correct es el corrector anti...,{Hidratación}
...,...,...,...,...,...,...,...,...
601,GARNIER,Body Superfood,| 380ML,"4,99 €",,,Garnier Body Superfood Crema Corporal Nutritiv...,"{Nutrición, Hidratación, Seca}"
602,CLEARÉ INSTITUTE,Cc Cream Matizador Violeta,| 200ML,"11,95 €",,,"Neutraliza tonos indeseados en mechas, rubios ...",{Canas}
603,BELIZ,Wax Stick,| 75GR,"6,99 €",,,La fórmula exclusiva de Beliz proporciona una ...,{Todo tipo de cabello}
604,RITUALS,The Ritual Of Sakura Medium Gift Set,| 1UD,"39,90 €",,,Este magnífico cofre de regalo es el detalle p...,{}
