In [1]:
import requests
from bs4 import BeautifulSoup
import csv

# Create new csv file
CSV = 'phones.csv'

# Create URL for parsing
URL = 'https://www.sulpak.kz/f/smartfoniy'

# Create headers that don't think of it as a script or anything
HEADERS = {
    'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Mobile Safari/537.36',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9'
}


# Create function that can get html
def get_html(url, params=None):
    return requests.get(url, headers=HEADERS, params=params)  # (Our URL, HEADERS, parameters)


# Function retrieves dates from the site
def get_content(html):
    soup = BeautifulSoup(html, 'html.parser')  # Create bs4 for get html code
    items = soup.find_all('li', class_='tile-container')  # Collect all phones of this class
    phones = []  # Create empty list for dates
    for item in items:  # Collect the most necessary information about these phones
        phones.append({
            'title': item.get('data-name'),  # Name of phone
            'code': item.get('data-code'),  # Product code of phone
            'price': item.get('data-price'),  # Price of phone
            'brand': item.get('data-brand')  # Brand of phone
        })
    return phones  # Return all dates


# Function enters all data into a CSV file
def save_to_csv(items, path):
    with open(path, 'w', newline='') as file:
        writer = csv.writer(file, delimiter=';')  # delimiter equals to ; because it is better for CSV
        writer.writerow(['Name', 'Product code', 'Price(Тг)', 'Brand'])  # Enter the data types
        for item in items:
            writer.writerow([item['title'], item['code'], item['price'], item['brand']])
            #  Divide them by type


# Function responds to the parsing and inserts it into the csv file
def parsing():
    html = get_html(URL)
    if html.status_code == 200:  # Parsing works only when the status code is 200
        phones = []  # Create empty list for dataset
        for page in range(1, 27):  # Total number of pages is 17. So it is repeated 17 times
            print(f'Page: {page}')  # For control of the work, we print the number of pages before each iteration
            html = get_html(URL, params={'page': page})  # When a page changes, it changes in numbers
            phones.extend(get_content(html.text))  # Enter the data in the list
        save_to_csv(phones, CSV)  # Save list to csv
    else:
        print('ERROR')  # If status code is not 200, then print error


# Call the function parsing
parsing()


Page: 1
Page: 2
Page: 3
Page: 4
Page: 5
Page: 6
Page: 7
Page: 8
Page: 9
Page: 10
Page: 11
Page: 12
Page: 13
Page: 14
Page: 15
Page: 16
Page: 17
Page: 18
Page: 19
Page: 20
Page: 21
Page: 22
Page: 23
Page: 24
Page: 25
Page: 26
