In [1]:
import pandas as pd
import numpy as np
import bs4
import requests

In [2]:
cpu_benchmarks = "https://browser.geekbench.com/processor-benchmarks"
gpu_benchmarks = "https://browser.geekbench.com/vulkan-benchmarks"

# CPU

In [46]:
laptops = pd.read_csv("laptops.csv")
laptops.ratings_5max = laptops.ratings_5max.apply(lambda x: float(x.split()[0]))
laptops.head()

Unnamed: 0,brand,img_url,laptop_name,display_size,processor_type,graphics_card,disk_space,discount_price,old_price,ratings_5max
0,HP,https://encrypted-tbn0.gstatic.com/images?q=tb...,Notebook 14-df0008nx,14.0,Intel Celeron N4000,Intel HD Graphics 600,64 GB (eMMC),1259.0,1259.0,0.0
1,Lenovo,https://encrypted-tbn0.gstatic.com/images?q=tb...,IdeaPad 330S-14IKB,14.0,Intel Core i5-8250U,Intel UHD Graphics 620,1 TB HDD,1849.0,2099.0,3.3
2,Huawei,https://encrypted-tbn0.gstatic.com/images?q=tb...,MateBook D Volta,14.0,Intel Core i5-8250U,NVIDIA GeForce MX150 (2 GB),256 GB SSD,2999.0,3799.0,0.0
3,Dell,https://encrypted-tbn0.gstatic.com/images?q=tb...,Inspiron 15 3567,15.6,Intel Core i3-7020U,Intel HD Graphics 620,1 TB HDD,1849.0,1849.0,0.0
4,Asus,https://encrypted-tbn0.gstatic.com/images?q=tb...,VivoBook 15 X510UR,15.6,Intel Core i7-8550U,NVIDIA GeForce 930MX (2 GB),1 TB HDD,2499.0,3149.0,0.0


In [47]:
response = requests.get(cpu_benchmarks)
response.status_code

200

In [48]:
soup = bs4.BeautifulSoup(response.text, parser="lxml")

In [49]:
single_core = soup.select(".table")[0]

In [50]:
columns = ["Processor", "Score"]
single_core.select("thead th")

[<th class="name">Processor</th>,
 <th class="score">Score</th>,
 <th class="graph"></th>]

In [51]:
processors = single_core.select("tbody tr")
def parse_processor(processor):
    
    name = processor.select("a")[0].text.strip()
    
    description = processor.select(".description")[0].text.strip().split()
    clock_speed = description[0].strip()
    cores = description[2][1:].strip()
    
    score = processor.select(".score")[0].text.strip()
    
    data = {
        "processor_type": name,
        "cpu_clock_speed": float(clock_speed),
        "cpu_cores": int(cores),
        "cpu_score": int(score)
    }
    return data

processors = pd.DataFrame(map(parse_processor, processors))
processors.head()

Unnamed: 0,processor_type,cpu_clock_speed,cpu_cores,cpu_score
0,Intel Core i9-13900KS,3.2,24,3069
1,Intel Core i9-13900KF,3.0,24,2946
2,Intel Core i9-13900K,3.0,24,2940
3,AMD Ryzen 9 7950X,4.5,16,2876
4,AMD Ryzen 9 7950X3D,4.2,16,2872


In [52]:
processors.dtypes

processor_type      object
cpu_clock_speed    float64
cpu_cores            int64
cpu_score            int64
dtype: object

# GPU

In [53]:
response = requests.get(gpu_benchmarks)
response.status_code

200

In [54]:
soup = bs4.BeautifulSoup(response.text, "lxml")

In [55]:
table = soup.select("table")[0]
cols = ["Device", "Score"]
table.select("thead tr")

[<tr class="stacked-heading">
 <th class="name">Device</th>
 <th class="score">Score</th>
 <th class="graph"></th>
 </tr>]

In [56]:
table = soup.select("tbody tr")
def parse_gpu(row):
    data = {
        "graphics_card": row.select_one(".name").text.strip(),
        "gpu_score": int(row.select_one(".score").text.strip())
    }
    return data

gpu = pd.DataFrame(map(parse_gpu, table))
gpu.head()

Unnamed: 0,graphics_card,gpu_score
0,NVIDIA GeForce RTX 4090,254828
1,AMD Radeon RX 7900 XTX (RADV GFX1100),224764
2,NVIDIA RTX 6000 Ada Generation,222276
3,AMD Radeon RX 7900 XTX,210829
4,NVIDIA GeForce RTX 4080,197582


In [57]:
gpu.dtypes

graphics_card    object
gpu_score         int64
dtype: object

In [58]:
processors

Unnamed: 0,processor_type,cpu_clock_speed,cpu_cores,cpu_score
0,Intel Core i9-13900KS,3.2,24,3069
1,Intel Core i9-13900KF,3.0,24,2946
2,Intel Core i9-13900K,3.0,24,2940
3,AMD Ryzen 9 7950X,4.5,16,2876
4,AMD Ryzen 9 7950X3D,4.2,16,2872
...,...,...,...,...
784,AMD A4-5000,1.5,4,162
785,Intel Celeron N3050,1.6,2,153
786,Intel Atom x5-Z8350,1.4,4,134
787,Intel Atom x5-Z8300,1.4,4,131


In [65]:
laptops.dropna(inplace=True)

In [66]:
cols = laptops.dtypes[laptops.dtypes == "object"].index
for col in cols:
    laptops[col] = laptops[col].apply(lambda x: x.strip())
laptops.head()

Unnamed: 0,brand,img_url,laptop_name,display_size,processor_type,graphics_card,disk_space,discount_price,old_price,ratings_5max
0,HP,https://encrypted-tbn0.gstatic.com/images?q=tb...,Notebook 14-df0008nx,14.0,Intel Celeron N4000,Intel HD Graphics 600,64 GB (eMMC),1259.0,1259.0,0.0
1,Lenovo,https://encrypted-tbn0.gstatic.com/images?q=tb...,IdeaPad 330S-14IKB,14.0,Intel Core i5-8250U,Intel UHD Graphics 620,1 TB HDD,1849.0,2099.0,3.3
2,Huawei,https://encrypted-tbn0.gstatic.com/images?q=tb...,MateBook D Volta,14.0,Intel Core i5-8250U,NVIDIA GeForce MX150 (2 GB),256 GB SSD,2999.0,3799.0,0.0
3,Dell,https://encrypted-tbn0.gstatic.com/images?q=tb...,Inspiron 15 3567,15.6,Intel Core i3-7020U,Intel HD Graphics 620,1 TB HDD,1849.0,1849.0,0.0
4,Asus,https://encrypted-tbn0.gstatic.com/images?q=tb...,VivoBook 15 X510UR,15.6,Intel Core i7-8550U,NVIDIA GeForce 930MX (2 GB),1 TB HDD,2499.0,3149.0,0.0


In [67]:
laptops.join(processors.set_index("processor_type"), on="processor_type")

Unnamed: 0,brand,img_url,laptop_name,display_size,processor_type,graphics_card,disk_space,discount_price,old_price,ratings_5max,cpu_clock_speed,cpu_cores,cpu_score
0,HP,https://encrypted-tbn0.gstatic.com/images?q=tb...,Notebook 14-df0008nx,14.0,Intel Celeron N4000,Intel HD Graphics 600,64 GB (eMMC),1259.0,1259.0,0.0,1.1,2.0,329.0
1,Lenovo,https://encrypted-tbn0.gstatic.com/images?q=tb...,IdeaPad 330S-14IKB,14.0,Intel Core i5-8250U,Intel UHD Graphics 620,1 TB HDD,1849.0,2099.0,3.3,,,
2,Huawei,https://encrypted-tbn0.gstatic.com/images?q=tb...,MateBook D Volta,14.0,Intel Core i5-8250U,NVIDIA GeForce MX150 (2 GB),256 GB SSD,2999.0,3799.0,0.0,,,
3,Dell,https://encrypted-tbn0.gstatic.com/images?q=tb...,Inspiron 15 3567,15.6,Intel Core i3-7020U,Intel HD Graphics 620,1 TB HDD,1849.0,1849.0,0.0,2.3,2.0,726.0
4,Asus,https://encrypted-tbn0.gstatic.com/images?q=tb...,VivoBook 15 X510UR,15.6,Intel Core i7-8550U,NVIDIA GeForce 930MX (2 GB),1 TB HDD,2499.0,3149.0,0.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
199,Dell,https://encrypted-tbn0.gstatic.com/images?q=tb...,Inspiron 15 3567,15.6,Intel Core i5-7200U,AMD Radeon R5-M430 (2 GB),1 TB HDD,2249.0,2249.0,3.3,,,
200,Lenovo,https://encrypted-tbn0.gstatic.com/images?q=tb...,IdeaPad 320-15IKBRN,15.6,Intel Core i5-8250U,Intel GMA HD,1 TB HDD,2099.0,2099.0,3.8,,,
201,Huawei,https://encrypted-tbn0.gstatic.com/images?q=tb...,MateBook D,15.6,Intel Core i7-8550U,NVIDIA GeForce MX150 (2 GB),128 GB SSD/1 TB HDD,3299.0,3299.0,4.0,,,
202,Apple,https://encrypted-tbn0.gstatic.com/images?q=tb...,MacBook Pro (Retina + Touch Bar),15.4,Intel Core i7 6 Core,Radeon Pro 555X GDDR5 (4 GB),256 GB SSD,10199.0,10199.0,0.0,,,
