In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager

import time
import re

In [None]:
service = Service(executable_path=ChromeDriverManager().install())
options = Options()

driver = webdriver.Chrome(service=service, options=options)

url = "https://www.thegioididong.com/laptop-apple-macbook"

driver.get(url)
time.sleep(8)

parent_tags_list = driver.find_elements(By.XPATH, "//li[@class=' item  __cate_44']")

def check_element_html(parent_tag, string_find_child_tag):
    try:
        element = parent_tag.find_element(By.XPATH, string_find_child_tag)
    except NoSuchElementException:
        element = None

    return element

def find_specification_screen(parts):
    parts = parts.split(', ')
    screen_inch = float(parts[0].replace('"', ''))
    screen_resolution = parts[1]
    screen_frequency_Hz = int(parts[2].replace('Hz', '')) if len(parts) > 2 else None
    return screen_inch, screen_resolution, screen_frequency_Hz

def find_specification_cpu(parts):
    if ', ' not in parts:
        return parts, None, None
    
    parts = parts.split(', ')   
    core = parts[0]
    core_gen = parts[1] if 'Apple' not in core else None
    core_speed = parts[1] if 'Apple' in core else (parts[2] if len(parts) > 2 else None)
    return core, core_gen, core_speed

rams = {
    4: 1,
    8: 2,
    16: 3,
    18: 4,
    24: 5,
    32: 6,
    36: 7,
    64: 8,
    96: 9
}

capacities = {
    32: 1,
    64: 2,
    128: 3,
    256: 4,
    512: 5,
    1: 6,
    2: 7,
    4: 8
}

def laptop_specifications():
    i = 0
    for parent_tag in parent_tags_list:
        product_id = parent_tag.get_attribute('data-id')
        specification_tag_list = parent_tag.find_elements(By.XPATH, ".//div[@class='utility']//p")
        ram_capacity_tag_list = parent_tag.find_elements(By.XPATH, "./a[@class='main-contain ']//div[@class='item-compare gray-bg']//span")
        
        specifications = {}
        specifications['product_id'] = product_id
        
        for tag in specification_tag_list:
            specification_values = tag.text[tag.text.index(':')+1:].strip()
            
            if 'Màn hình' in tag.text:
                screen_inch , screen_resolution, screen_frequency_Hz = find_specification_screen(specification_values)
                
                specifications.update({
                    'screen_inch': screen_inch,
                    'screen_resolution': screen_resolution,
                    'screen_frequency_hz': screen_frequency_Hz
                })    
            elif 'Card' in tag.text:
                specifications['card_screen'] = specification_values
            elif 'CPU' in tag.text:
                core , core_gen, core_speed = find_specification_cpu(specification_values)
                specifications.update({
                    'core': core,
                    'core_gen': core_gen,
                    'core_speed': core_speed
                })
            elif 'Pin' in tag.text:
                specifications['battery'] = specification_values
            elif 'Khối lượng' in tag.text:
                specifications['weight_kg'] = float(specification_values.replace(' kg', ''))
         
        for tag in ram_capacity_tag_list:
            if 'RAM' in tag.text:
                ram = int(re.findall(r'[\d]+', tag.text)[0])
                specifications['ram_id'] = rams[ram]
            elif 'SSD' or 'HDD' in tag.text:
                capacity = int(re.findall(r'[\d]+', tag.text)[0])
                specifications['capacity_id'] = capacities[capacity]
                
        print(specifications)
laptop_specifications()
        