In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from bs4 import BeautifulSoup
import time
import pandas as pd

PATH = "/Users/FaiqRasulov/Desktop/chromedriver"
driver = webdriver.Chrome(PATH)

urls = [
    "https://bazarstore.az/collections/pendir?filter.p.m.custom.lokasyonlar=4101",
    "https://bazarstore.az/collections/sud-məhsullari?filter.p.m.custom.lokasyonlar=4101",
    "https://bazarstore.az/collections/qatiq?filter.p.m.custom.lokasyonlar=4101",
    "https://bazarstore.az/collections/səhər-yeməyi?filter.p.m.custom.lokasyonlar=4101",
    "https://bazarstore.az/collections/sud?filter.p.m.custom.lokasyonlar=4101",
    "https://bazarstore.az/collections/kərə-yagi?filter.p.m.custom.lokasyonlar=4101",
    "https://bazarstore.az/collections/marqarin?filter.p.m.custom.lokasyonlar=4101"
]

def handle_popup():
    try:
        exit_button = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[@class='css-11f6z0a']")))
        actions = ActionChains(driver)
        actions.move_to_element(exit_button).click().perform()
        time.sleep(1)  
    except:
        pass

def slow_scroll_down(speed, max_attempts=3):
    current_scroll_position = 0
    attempts = 0
    while True:
        prev_scroll_position = current_scroll_position
        current_scroll_position += speed
        driver.execute_script(f"window.scrollTo(0, {current_scroll_position});")
        time.sleep(1)  
        if current_scroll_position >= driver.execute_script("return document.body.scrollHeight"):
            break
        if current_scroll_position == prev_scroll_position:
            attempts += 1
            if attempts >= max_attempts:
                break
        else:
            attempts = 0

data = []
for url in urls:
    driver.get(url)

    handle_popup()

    slow_scroll_down(333)

    soup = BeautifulSoup(driver.page_source, 'lxml')
    products = soup.find_all('div', class_='card-information')

    for product in products:
        # Find product name
        name = product.find('span', class_='card-information__text h5').text.strip()

        # Find product link and add base url
        link = "https://bazarstore.az" + product.find('a')['href']

        # Find price container
        price_container = product.find('div', class_='price')

        # Check if product is on sale
        if 'price--on-sale' in price_container['class']:
            regular_price = price_container.find('s', class_='price-item price-item--regular').text.strip()
            sale_price = price_container.find('span', class_='price-item price-item--sale').text.strip()
        else:
            regular_price = price_container.find('span', class_='price-item price-item--regular').text.strip()
            sale_price = ""

        # Navigate to product page
        driver.get(link)

        # Wait a bit for the page to load
        time.sleep(3)

        # Scrape the additional info
        page_soup = BeautifulSoup(driver.page_source, 'lxml')
        full_name = page_soup.find('h1', class_='product__title').text.strip()
        brand = page_soup.find('span', class_='product-vendor_name').find_next_sibling('a').text.strip()
        category = page_soup.find('span', class_='product-type').find_next_sibling(text=True).strip()
        sku = page_soup.find('span', class_='sku-name').find_next_sibling('span').text.strip()

        # Check if name and full name are the same, if not, replace name with full name
        if name != full_name:
            name = full_name

        data.append([name, regular_price, sale_price, link, brand, category, sku])

driver.quit()

df = pd.DataFrame(data, columns=['name','regular_price', 'sale_price', 'link', 'brand', 'category', 'sku'])
print(df)

In [None]:
import warnings 
warnings.filterwarnings("ignore")
import numpy as np

#pd.set_option('display.max_rows', None)
df1=df.copy()
df1.drop_duplicates(subset='name',keep='last', inplace=True)
df1

In [None]:
# Convert 'regular_price' and 'sale_price' to string before applying string operations
df1["regular_price"] = df1["regular_price"].astype(str)
df1["sale_price"] = df1["sale_price"].astype(str)

# Remove "₼" and "Başl. qiymət:" from Regular Price and Sale Price
df1["regular_price"] = df1["regular_price"].str.replace('₼', '').str.replace('Başl. qiymət:', '')
df1["sale_price"] = df1["sale_price"].str.replace('₼', '')

# Replace empty strings with np.nan
df1["regular_price"] = df1["regular_price"].replace('', np.nan)
df1["sale_price"] = df1["sale_price"].replace('', np.nan)

# Convert them back to float
df1["regular_price"] = df1["regular_price"].astype(float)
df1["sale_price"] = df1["sale_price"].astype(float)

# Create a new column "endirim_faiz" which is (regular_price- sale_price)/regular_price
df1["endirim_faiz"] = (df1["regular_price"] - df1["sale_price"]) / df1["regular_price"]

# Remove ":" from Category
df1["category"] = df1["category"].str.replace(':', '')

# Format endirim_faiz to 4 decimal places and convert to percentage
df1["endirim_faiz"] = (df1["endirim_faiz"] * 100).round(2)

In [None]:
# Set category to 'Marqarin' for specified links
links = ["https://bazarstore.az/products/mocuz%C9%99-k%C9%99r%C9%99-l%C9%99zz%C9%99tli%CC%87-500-gr-qargidali?_pos=1&_fid=834af9c03&_ss=c",
"https://bazarstore.az/products/mocuz%C9%99-krem-yagi-250-qr?_pos=2&_fid=834af9c03&_ss=c",
"https://bazarstore.az/products/mocuz%C9%99-qat-qat-250-qr?_pos=3&_fid=834af9c03&_ss=c",
"https://bazarstore.az/products/mocuz%C9%99-ag-x%C9%99mi%CC%87r-yagi-250-qr?_pos=4&_fid=834af9c03&_ss=c",
"https://bazarstore.az/products/mocuz%C9%99-k%C9%99r%C9%99-l%C9%99zz%C9%99tli%CC%87-qargidali-yagi-250q?_pos=5&_fid=834af9c03&_ss=c",
"https://bazarstore.az/products/fi%CC%87nal-bi%CC%87tki%CC%87-yagi-250-qr?_pos=6&_fid=834af9c03&_ss=c"]

for link in links:
    df1.loc[df1['link'] == link, 'category'] = 'Marqarin'

# Convert all category values to upper case
df1['category'] = df1['category'].str.upper()

In [None]:
# Create a new regex pattern that can handle decimal points, "+" and the new units (ML and LT)
pattern = r'(\d+\.*\d*\s*[GQRKgqrkMLLT+Rr]+)'

# Apply the pattern to extract the weight from the 'name' column
df1['qramaj'] = df1['name'].str.extract(pattern)[0]

# Fill NA values with a default string, let's say 'NA'
df1['qramaj'] = df1['qramaj'].fillna('NA')

# Now we handle the conversion for each unit
df1.loc[df1['qramaj'].str.contains('G|g', na=False), 'qramaj'] = df1.loc[df1['qramaj'].str.contains('G|g', na=False), 'qramaj'].str.replace(r'\D+', '').astype(float)
df1.loc[df1['qramaj'].str.contains('QR|qr', na=False), 'qramaj'] = df1.loc[df1['qramaj'].str.contains('QR|qr', na=False), 'qramaj'].str.replace(r'\D+', '').astype(float)
df1.loc[df1['qramaj'].str.contains('KG|KQ|kg|kq', na=False), 'qramaj'] = df1.loc[df1['qramaj'].str.contains('KG|KQ|kg|kq', na=False), 'qramaj'].str.replace(r'\D+', '').astype(float) * 1000
df1.loc[df1['qramaj'].str.contains('ML|ml', na=False), 'qramaj'] = df1.loc[df1['qramaj'].str.contains('ML|ml', na=False), 'qramaj'].str.replace(r'\D+', '').astype(float)
df1.loc[df1['qramaj'].str.contains('LT|lt', na=False), 'qramaj'] = df1.loc[df1['qramaj'].str.contains('LT|lt', na=False), 'qramaj'].str.replace(r'\D+', '').astype(float) * 1000
df1.loc[df1['qramaj'].str.contains('\+|R|r', na=False), 'qramaj'] = df1.loc[df1['qramaj'].str.contains('\+|R|r', na=False), 'qramaj'].str.replace(r'\D+', '').astype(float)  # New line to handle '+' and 'R'

# For items where the weight is not specified but KG/KQ is mentioned, we assign the value 1000
df1.loc[df1['name'].str.contains('KG|KQ|kg|kq', na=False) & df1['qramaj'].isna(), 'qramaj'] = 1000

# Extracting fat percentage
df1['yag_faiz'] = df1['name'].str.extract(r'(\d+[\.,]?\d*\s*%)')[0].str.replace('%', '').str.replace(',', '.').astype(float)

# Replace NaNs in 'fat_percentage' with 0 (assuming no percentage means 0%)
df1['yag_faiz'] = df1['yag_faiz'].replace(np.nan, 0)


In [None]:
# Set qramaj value to 1000 for specified links
links = [
"https://bazarstore.az/products/milla-suzmeli-ag-pendir-1-kg?_pos=123&_fid=a14d6ca90&_ss=c",
"https://bazarstore.az/products/goycay-sud-canax-pendi%CC%87ri%CC%87-1-kg?_pos=182&_fid=614dd75e2&_ss=c",
"https://bazarstore.az/products/n1-sor-1-kg?_pos=83&_fid=5b7f47bed&_ss=c",
"https://bazarstore.az/products/anchor-k%C9%99r%C9%99yagi-82-9-1-kg?_pos=17&_fid=0d12a6a08&_ss=c",
"https://bazarstore.az/products/vi%CC%87oletto-k%C9%99r%C9%99-yagi-1-kg-%C9%99d%C9%99d?_pos=23&_fid=0d12a6a08&_ss=c",
"https://bazarstore.az/products/master-gourment-krema-1-l-34?_pos=46&_fid=4a3e11d90&_ss=c",
"https://bazarstore.az/products/master-gourment-krema-1-l-chef?_pos=47&_fid=4a3e11d90&_ss=c",
"https://bazarstore.az/products/master-gourment-krema-1-l-gold?_pos=48&_fid=4a3e11d90&_ss=c",
"https://bazarstore.az/products/yurdum-ayran-1-l?_pos=56&_fid=5b7f47bed&_ss=c",
"https://bazarstore.az/products/atena-nan%C9%99li%CC%87-ayran-1l?_pos=146&_fid=ba53ff589&_ss=c",
"https://bazarstore.az/products/atena-ayran-1l?_pos=159&_fid=ba53ff589&_ss=c",
"https://bazarstore.az/products/atena-sud-3-2-1l?_pos=1&_fid=fac46f262&_ss=c",
"https://bazarstore.az/products/alpro-badam-1l?_pos=5&_fid=fac46f262&_ss=c",
"https://bazarstore.az/products/az%C9%99r-sud-balans-i%CC%87mmuni%CC%87tet-sudu-1l-3-2?_pos=6&_fid=fac46f262&_ss=c",
"https://bazarstore.az/products/haci-turqay-sacaq-pendir-kg?_pos=3&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/az%C9%99rsud-yagli-pendi%CC%87r-kq?_pos=4&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/g%C9%99d%C9%99b%C9%99y-setkali-pendi%CC%87r-kg?_pos=18&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/azersud-canax-pendi%CC%87r-kg?_pos=20&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/heyat-qida-pendir-kend-kg?_pos=22&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/svalya-pendir-parnidzio-38-kg?_pos=23&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/yurdum-yagli-pendir-kg?_pos=24&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/svalya-pendi%CC%87r-mozzarella-45-kq?_pos=25&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/svalya-c%C9%99ki%CC%87-pendi%CC%87ri%CC%87-kg?_pos=26&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/selman-yuvarlaq-ag-pendi%CC%87r-kg?_pos=27&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/sehliyali-sacaqli-pendir-vakkum-kg?_pos=28&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/pendir-atena-canaq-kg?_pos=29&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/oldenburger-gouda-48-yagli-pendir-kg?_pos=30&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/nova-pendir-gouda-kg?_pos=31&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/muhlenhof-pendir-tilsiter-kg?_pos=32&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/mlekovita-pendir-cheddar-kg?_pos=33&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/mi%CC%87lla-ag-pendi%CC%87r-kq?_pos=34&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/i%CC%87calki%CC%87-yantar-pendi%CC%87r-hi%CC%87s%C9%99-veri%CC%87lm%C9%99z-kq?_pos=35&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/feta-pendi%CC%87ri%CC%87-sultan-kg?_pos=36&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/etle-sacaq-pendi%CC%87ri%CC%87-kg?_pos=37&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/emmental-ag-pendir-kg?_pos=38&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/azersud-mozzarella-pendir-kg?_pos=39&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/az%C9%99r-sud-dan%C9%99li%CC%87-kg?_pos=40&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/avrupa-mozzarella-pendir-kg?_pos=41&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/anadolu-yagli-pendir-kg?_pos=42&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/adelle-ag-pendir-kg?_pos=43&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/final-metbex-yag-kg?_pos=10&_fid=0d12a6a08&_ss=c",
"https://bazarstore.az/products/best-cow-k%C9%99r%C9%99-yagi-kq?_pos=11&_fid=0d12a6a08&_ss=c",
"https://bazarstore.az/products/i%CC%87calki%CC%87-yantar-pendi%CC%87r-hi%CC%87s%C9%99-veri%CC%87lmi%CC%87s-kq?_pos=35&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/heyat-qida-pendir-sulquni-kg?_pos=36&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/goycay-sud-canax-pendi%CC%87ri%CC%87-kg?_pos=37&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/galbani-garganzola-piccante-pendir-kg?_pos=38&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/doktor-mi%CC%87lk-gauda-kg-4?_pos=39&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/cheesella-sacaqli-pendir-kq-1x5?_pos=40&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/az%C9%99rsud-antep-pendi%CC%87ri%CC%87-kq?_pos=41&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/atena-agcabedi-pendiri-kg?_pos=42&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/anchor-feta-pendi%CC%87ri%CC%87-kq?_pos=43&_fid=f059b159d&_ss=c",
"https://bazarstore.az/products/uzun-omur-sor-kg?_pos=1&_fid=1a580a405&_ss=c",
"https://bazarstore.az/products/yurdum-qaymaq-kg?_pos=11&_fid=1a580a405&_ss=c",
"https://bazarstore.az/products/azersud-yurdum-k%C9%99smi%CC%87k-kg?_pos=12&_fid=1a580a405&_ss=c",
"https://bazarstore.az/products/uzun-omur-qaymaq-kg?_pos=13&_fid=1a580a405&_ss=c",
"https://bazarstore.az/products/gerda-alman-k%C9%99r%C9%99-yagi-kq?_pos=9&_fid=0d12a6a08&_ss=c",
]

for link in links:
    df1.loc[df1['link'] == link, 'qramaj'] = 1000

In [None]:
df1['qramaj'][df1.link == "https://bazarstore.az/products/ulker-dido-vafli-55-5-gr-kare-sokoladli?_pos=23&_fid=4175d108b&_ss=c"] = "55.5"
df1['qramaj'][df1.link == "https://bazarstore.az/products/fi%CC%87nal-m%C9%99tb%C9%99x-yagi-1-8-kg-200-gr-1?_pos=1&_fid=0d12a6a08&_ss=c"] = "2000"
df1['qramaj'][df1.link == "https://bazarstore.az/products/milla-qatiq-1-5-kq-1-5?_pos=18&_fid=40df411d5&_ss=c"] = "1500"
df1['qramaj'][df1.link == "https://bazarstore.az/products/n1-k%C9%99smi%CC%87k-400-gr-meyv%C9%99li%CC%87?_pos=75&_fid=5b7f47bed&_ss=c"] = "400"
df1['qramaj'][df1.link == "https://bazarstore.az/products/n1-k%C9%99nd-soru-400-gr?_pos=82&_fid=5b7f47bed&_ss=c"] = "400"
df1['qramaj'][df1.link == "https://bazarstore.az/products/n1-qaymaqli-sor-400-gr?_pos=76&_fid=5b7f47bed&_ss=c"] = "400"
df1['qramaj'][df1.link == "https://bazarstore.az/products/ulker-dido-vafli-55-5-gr-kare-sokoladli?_pos=23&_fid=4175d108b&_ss=c"] = "400"
df1['qramaj'][df1.link == "https://bazarstore.az/products/mocuz%C9%99-k%C9%99r%C9%99-l%C9%99zz%C9%99tli%CC%87-qargidali-yagi-250q?_pos=5&_fid=834af9c03&_ss=c"] = "250"
df1['qramaj'][df1.link == "https://bazarstore.az/products/tahsi%CC%87ldaroglu-kohne-kasar-350-q-%C9%99zi%CC%87n%C9%99?_pos=128&_fid=a14d6ca90&_ss=c"] = "350"
df1['qramaj'][df1.link == "https://bazarstore.az/products/i%CC%87calki-yantar-pendi%CC%87r-80-q?_pos=58&_fid=f059b159d&_ss=c"] = "80"
df1['qramaj'][df1.link == "https://bazarstore.az/products/savuski%CC%87n-brest-li%CC%87tovsk-k%C9%99si%CC%87k-pendi%CC%87r-45?_pos=62&_fid=f059b159d&_ss=c"] = "210"
df1['qramaj'][df1.link == "https://bazarstore.az/products/savuski%CC%87n-pendi%CC%87r-brest-li%CC%87tovsk-leqk-na-35?_pos=67&_fid=f059b159d&_ss=c"] = "210"
df1['qramaj'][df1.link == "https://bazarstore.az/products/savuski%CC%87n-pendi%CC%87r-brest-li%CC%87tovsk-klas-na-45?_pos=69&_fid=f059b159d&_ss=c"] = "210"
df1['qramaj'][df1.link == "https://bazarstore.az/products/emborg-pendir-grana-padano-32?_pos=97&_fid=a14d6ca90&_ss=c"] = "150"
df1['qramaj'][df1.link == "https://bazarstore.az/products/savuski%CC%87n-pendi%CC%87r-rossiyskiy-k%C9%99smi%CC%87k-50-20?_pos=106&_fid=a14d6ca90&_ss=c"] = "200"
df1['qramaj'][df1.link == "https://bazarstore.az/products/prezi%CC%87dent-di%CC%87li%CC%87mli%CC%87-mozarella-pendi%CC%87r-150?_pos=155&_fid=614dd75e2&_ss=c"] = "150"
df1['qramaj'][df1.link == "https://bazarstore.az/products/tahsildaroglu-ezine-klasik-ag-pendi%CC%87r-600?_pos=180&_fid=614dd75e2&_ss=c"] = "600"
df1['qramaj'][df1.link == "https://bazarstore.az/products/doktor-mi%CC%87lk-k%C9%99smi%CC%87k-ci%CC%87y%C9%99l%C9%99k-fruk-0-180?_pos=102&_fid=5b7f47bed&_ss=c"] = "180"
df1['qramaj'][df1.link == "https://bazarstore.az/products/ivanovka-uzun-omur-erikli-kesmik-18-400?_pos=119&_fid=5b7f47bed&_ss=c"] = "400"
df1['qramaj'][df1.link == "https://bazarstore.az/products/prostokvasi%CC%87no-si%CC%87rok-qatilasdirilmis-sud?_pos=144&_fid=ba53ff589&_ss=c"] = "50"


df1['yag_faiz'][df1.link == "https://bazarstore.az/products/dr-milk-kaunas-xama-200-gr-10?_pos=30&_fid=4a3e11d90&_ss=c"] = "10"

In [None]:
df1

In [None]:
import datetime as dt
df1['tarix'] = pd.to_datetime("today").strftime("%d/%m/%Y")
df1 = df1[['tarix', 'sku','brand','category','name','qramaj','yag_faiz','regular_price', 'sale_price','endirim_faiz','link']]
df1

In [None]:
TodaysDate = time.strftime("%d-%m-%Y")
excelfilename = '/Users/FaiqRasulov/Desktop/Qiymet_list/bazarstore_qiymet_list ' + TodaysDate +".xlsx"

df1.to_excel(excelfilename, sheet_name='BazarStore '+ TodaysDate, index=False, header=True)