In [None]:
import pandas as pd
import numpy as np
import requests
import time
from bs4 import BeautifulSoup as bs


class MicroScraper:
    def __init__(self, base_url):
        self.base_url = base_url
        self.data = []

    def scrape_pages(self, pages):
        # Downloading data from pages
        for page in range(1, pages + 1):
            print(f'Page: {page} / {pages}')
            page_url = f'{self.base_url}{page}'
            r = requests.get(page_url)
            time.sleep(4)
            soup = bs(r.content, 'html.parser')
            self._scrape_offers(soup)

    def _scrape_offers(self, soup):
        # Searching for offers with microphones on page
        micro_offers = soup.find_all('div', class_='tests-product-entry')
        # Collecting parameters
        for offer in micro_offers:
            # Creating data dictionary
            offer_dict = {}
            # Defining base parameters
            url = offer.a['href']
            title = offer.a.get_text().strip()
            price = offer.find('div', class_='text-3xl font-bold leading-8').text.replace(u'\xa0', u' ').strip()
            offer_dict['Nazwa'] = title
            offer_dict['URL'] = url
            offer_dict['Cena'] = price
            params = offer.find_all('div', class_='py-1')[:-1]
            # Filling dict with parameters
            for param in params:
                try:
                    name, value = param.get_text().replace('\n', '').strip().split(':')
                    offer_dict[name.strip()] = value.strip()
                except:
                    pass
            # Adding dict to list
            self.data.append(offer_dict)

In [None]:
# Link
base_url = 'https://www.komputronik.pl/category/8083/mikrofony.html?showBuyActiveOnly=0'

# Collecting pages with micros

r = requests.get(base_url)
print(r.status_code) # checking HTTP response code
time.sleep(4)
soup = bs(r.content, 'html.parser')
pages = soup.find_all('li', class_='flex justify-center items-center w-8 h-8 rounded border border-gray-mercury cursor-pointer')
pages = int(pages[-1].get_text().strip())

# Creating scraper
scraper = MicroScraper(base_url)
# Searching pages and collecting data
scraper.scrape_pages(pages)

In [None]:
# Conversion from dict to data frame
df = pd.DataFrame(scraper.data)

In [None]:
df

In [None]:
# Data analysis
df.info()

In [None]:
# Prices
df['Cena'] = df['Cena'].apply(lambda x: float(x.split('zł')[0].replace(' ', '').replace(',', '.')))

In [None]:
df.head()

In [None]:
# prices statistics
df['Cena'].describe()

In [None]:
# Showing data of most expensive micro
df[df['Cena'] == 1279.00]

In [None]:
# Showing data of least expensive micro
df[df['Cena'] == 29.70]

In [None]:
df.isna().sum()