In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
def get_product_detail(field, tag, filter_context=None):
    '''returns for one product one detail from the selected tag within a filter context if applicable
    i.e. get_product_detail(soup, 'div', {"class" : "m-offer-tile__quantity"'})'''
    
    return " ".join([element.text.strip() for element in field.find_all(tag, filter_context)])

In [3]:
def get_product_details(product_card, fields_list):
    '''same as get_product_detail but on a list of fields returns all the details'''
    return (get_product_detail(product_card, tag, filter_context)
           for tag, filter_context, title
           in fields_list)

In [4]:
def get_all_products_details(products_list, fields_list):
    '''loops over all products to get the details'''
    return [get_product_details(product, fields_list) for product in products_list]

In [5]:
# making the request
r = requests.get('https://www.kaufland.ro/oferte/saptamana-curenta.category=12_Cur%C4%83%C8%9Benie__detergen%C8%9Bi.html')

In [6]:
# creating the xml
soup = BeautifulSoup(r.text, 'lxml')

In [66]:
# identifying avalability
availability = [item.text for item in soup.find_all('h2') if item.text.__contains__('Valabilitate')][0]

In [140]:
# identifying products boxes/cards
products_cards = soup.find_all('a', {"class": "m-offer-tile__link u-button--hover-children"})

In [153]:
#tag type, filter context and the title that it will have
fields_to_parse = [('h5', None, 'title'),
                   ('h4', None, 'subtitle'), 
                   ('div', {"class" : "m-offer-tile__quantity"}, 'quantity'), 
                   ('div', {"class" : "m-offer-tile__basic-price"}, 'price_per_metric'), 
                   ('div', {"class" : "m-offer-tile__promo-message"}, 'promo_message'), 
                   ('div', {"class" : "a-pricetag__old-price"}, 'old_price'), 
                   ('div', {"class" : "a-pricetag__discount"}, 'discount'), 
                   ('div', {"class" : "a-pricetag__price"}, 'new_price'), 
                   ('div', {"class" : "a-eye-catcher__headline"}, 'eye_catcher'),
                   ('div', {"data-category-id" : "12"}, 'additional_offers')
                   ]

In [163]:
all_products = get_all_products_details(products_cards, fields_to_parse)
columns = [schema[2] for schema in fields_to_parse]
online_promotions_df = pd.DataFrame(data=all_products, columns=columns)

In [164]:
online_promotions_df['url'] = ['https://www.kaufland.ro' + link.get('href') for link in products_cards]

In [165]:
online_promotions_df['availability'] = availability

In [166]:
online_promotions_df.loc[online_promotions_df['additional_offers'] != "",  "additional_offers"] = 0
online_promotions_df.loc[online_promotions_df['additional_offers'] == "",  "additional_offers"] = 1

In [167]:
online_promotions_df

Unnamed: 0,title,subtitle,quantity,price_per_metric,promo_message,old_price,discount,new_price,eye_catcher,additional_offers,url,availability
0,Ariel,Detergent capsule pentru rufe,25 buc,"(=1 BUC 1,34)",Reducere cu Kaufland Card,3999,- 16%,3333,,0,https://www.kaufland.ro/oferte/saptamana-curen...,Valabilitate: din 21.10.2020 până în 27.10.2020
1,Fairy,Detergent pentru vase,875 ml,"(=1 l 7,99)",Reducere cu Kaufland Card,829,- 15%,699,,0,https://www.kaufland.ro/oferte/saptamana-curen...,Valabilitate: din 21.10.2020 până în 27.10.2020
2,Savo,Soluţie antimucegai,500 ml,"(=1 kg 34,96)",Reducere cu Kaufland Card,1955,- 10%,1748,,0,https://www.kaufland.ro/oferte/saptamana-curen...,Valabilitate: din 21.10.2020 până în 27.10.2020
3,Cif,Cremă pentru suprafeţe,700 ml,,,1185,- 11%,1049,,0,https://www.kaufland.ro/oferte/saptamana-curen...,Valabilitate: din 21.10.2020 până în 27.10.2020
4,Ariel,Detergent capsule pentru rufe,2 x 35 buc,,,,,6799,Ofertă specială,0,https://www.kaufland.ro/oferte/saptamana-curen...,Valabilitate: din 21.10.2020 până în 27.10.2020
...,...,...,...,...,...,...,...,...,...,...,...,...
57,Bol,,1 buc,,,,,599,Ofertă specială,1,https://www.kaufland.ro/oferte/saptamana-curen...,Valabilitate: din 21.10.2020 până în 27.10.2020
58,Candelă cu capac,timp de ardere: 72 h,1 buc,,,,,299,Ofertă specială,1,https://www.kaufland.ro/oferte/saptamana-curen...,Valabilitate: din 21.10.2020 până în 27.10.2020
59,Baloane Halloween,,1 buc,,,,,999,Ofertă specială,1,https://www.kaufland.ro/oferte/saptamana-curen...,Valabilitate: din 21.10.2020 până în 27.10.2020
60,Instalație decorativă,,1 buc,,,,,1999,Ofertă specială,1,https://www.kaufland.ro/oferte/saptamana-curen...,Valabilitate: din 21.10.2020 până în 27.10.2020


In [168]:
online_promotions_df.to_csv('leaflets/pdfs_online_output/RO-43-1000_online.csv', encoding='utf-8 sig', index=False)