# Web-scraping for GPU prices

Following @Tech With Tim guide on YouTube

In [None]:
from bs4 import BeautifulSoup
import requests
import re
import pandas as pd

In [None]:
gpu_input = input("What GPU are you looking for?")

url = f"https://www.newegg.com/global/sg-en/p/pl?d={gpu_input}&N=4131"

In [None]:
page = requests.get(url).text
doc = BeautifulSoup(page, "html.parser")

In [None]:
n_pages = doc.find(class_ = "list-tool-pagination-text").strong
print(n_pages)

<strong>1<!-- -->/<!-- -->64</strong>


## Using RegEx

For this cell, want to demonstrate the use of using `re` to extract out digits/ `\d`.  

`r\d`: Matches a single digit (0-9)  
`r\d+`: Matches one or more digits in sequence.  
`r\d*`: Matches zero or more digits in sequence. (can match empty string)

In [None]:
# For testing purposes, testing if my string is 1/15 will the 15 be extracted out properly or only the first instance of a digit
pattern = r'\d+'
test_str = "<strong>1<!-- -->/<!-- -->15</strong>"
matches = re.findall(pattern, test_str)
matches

['1', '15']

In [None]:
pattern = r'\d+'
matches = re.findall(pattern, str(n_pages))
matches

['1', '64']

In [None]:
# Now we have successfully extracted the page numbers, we can iterate through them and grab all the details we need
# Limited to 2 pages for now

last_page = 2
for page in range(1, last_page + 1):
  url = f"https://www.newegg.com/global/sg-en/p/pl?d={gpu_input}&N=4131&page={page}"
  page = requests.get(url).text
  doc = BeautifulSoup(page, "html.parser")
  div = doc.find(class_ = "item-cells-wrap border-cells short-video-box items-grid-view four-cells expulsion-one-cell")

  items = div.find_all(string = re.compile(f"(?=.*{gpu_input})(?=.*Card)"))
  for item in items:
    parent = item.parent
    if parent.name != "a":
      continue

    link = parent['href']
    next_parent = item.find_parent(class_ = "item-container")
    price = next_parent.find(class_ = "price-current").strong.string

    print(price)

2,917
3,192
2,763
3,023
2,944
3,660
2,849
3,083
3,160
2,923
3,808
2,893
2,801
3,122
5,632
5,246
5,556


In [None]:
last_page = int(matches[-1])
items_found = {}

for page in range(1, last_page + 1):
  url = f"https://www.newegg.com/global/sg-en/p/pl?d={gpu_input}&N=4131&page={page}"
  page = requests.get(url).text
  doc = BeautifulSoup(page, "html.parser")
  div = doc.find(class_ = "item-cells-wrap border-cells short-video-box items-grid-view four-cells expulsion-one-cell")

  items = div.find_all(string = re.compile(f"(?=.*{gpu_input})(?=.*Card)"))
  for item in items:
    parent = item.parent
    if parent.name != "a":
      continue

    link = parent['href']
    next_parent = item.find_parent(class_ = "item-container")
    price = next_parent.find(class_ = "price-current").strong.string

    items_found[item] = {"price": int(price.replace(",", "")), "link": link}

In [None]:
sorted(items_found.items(), key = lambda x: x[1]['price'])

[('GIGABYTE Gaming GeForce RTX 4090 24GB GDDR6X PCI Express 4.0 x16 ATX Video Card GV-N4090GAMING OC-24GD',
  {'price': 2763,
   'link': 'https://www.newegg.com/global/sg-en/gigabyte-geforce-rtx-4090-gv-n4090gaming-oc-24gd/p/N82E16814932550'}),
 ('ASUS TUF Gaming GeForce RTX 4090 OG OC Edition Gaming Graphics Card (PCIe 4.0, 24GB GDDR6X, DLSS 3, HDMI 2.1, DisplayPort 1.4a) TUF-RTX4090-O24G-OG-GAMING',
  {'price': 2801,
   'link': 'https://www.newegg.com/global/sg-en/asus-geforce-rtx-4090-tuf-rtx4090-o24g-og-gaming/p/N82E16814126658'}),
 ('GIGABYTE AERO GeForce RTX 4090 24GB GDDR6X PCI Express 4.0 x16 ATX Video Card GV-N4090AERO OC-24GD',
  {'price': 2849,
   'link': 'https://www.newegg.com/global/sg-en/gigabyte-geforce-rtx-4090-gv-n4090aero-oc-24gd/p/N82E16814932601'}),
 ('MSI Gaming GeForce RTX 4090 24GB GDDR6X PCI Express 4.0 Video Card RTX 4090 GAMING TRIO 24G',
  {'price': 2893,
   'link': 'https://www.newegg.com/global/sg-en/msi-geforce-rtx-4090-rtx-4090-gaming-trio-24g/p/N82E1681

In [None]:
pd.DataFrame.from_dict(items_found).T.sort_values(by = ['price'])

Unnamed: 0,price,link
GIGABYTE Gaming GeForce RTX 4090 24GB GDDR6X PCI Express 4.0 x16 ATX Video Card GV-N4090GAMING OC-24GD,2763,https://www.newegg.com/global/sg-en/gigabyte-g...
"ASUS TUF Gaming GeForce RTX 4090 OG OC Edition Gaming Graphics Card (PCIe 4.0, 24GB GDDR6X, DLSS 3, HDMI 2.1, DisplayPort 1.4a) TUF-RTX4090-O24G-OG-GAMING",2801,https://www.newegg.com/global/sg-en/asus-gefor...
GIGABYTE AERO GeForce RTX 4090 24GB GDDR6X PCI Express 4.0 x16 ATX Video Card GV-N4090AERO OC-24GD,2849,https://www.newegg.com/global/sg-en/gigabyte-g...
MSI Gaming GeForce RTX 4090 24GB GDDR6X PCI Express 4.0 Video Card RTX 4090 GAMING TRIO 24G,2893,https://www.newegg.com/global/sg-en/msi-geforc...
"ASUS TUF Gaming GeForce RTX 4090 OC Edition Gaming Graphics Card (PCIe 4.0, 24GB GDDR6X, HDMI 2.1a, DisplayPort 1.4a) TUF-RTX4090-O24G-GAMING",2917,https://www.newegg.com/global/sg-en/asus-gefor...
MSI Gaming GeForce RTX 4090 24GB GDDR6X PCI Express 4.0 x16 ATX Video Card RTX 4090 GAMING X SLIM 24G,2923,https://www.newegg.com/global/sg-en/msi-geforc...
MSI Gaming GeForce RTX 4090 24GB GDDR6X PCI Express 4.0 Video Card RTX 4090 GAMING X TRIO 24G,2944,https://www.newegg.com/global/sg-en/msi-geforc...
MSI Suprim GeForce RTX 4090 24GB GDDR6X PCI Express 4.0 Video Card RTX 4090 SUPRIM LIQUID X 24G,3023,https://www.newegg.com/global/sg-en/msi-geforc...
GIGABYTE AORUS GeForce RTX 4090 24GB GDDR6X PCI Express 4.0 x16 ATX Video Card GV-N4090AORUS M-24GD,3083,https://www.newegg.com/global/sg-en/gigabyte-g...
GIGABYTE AORUS GeForce RTX 4090 24GB GDDR6X PCI Express 4.0 ATX Video Card GV-N4090AORUSX W-24GD,3160,https://www.newegg.com/global/sg-en/gigabyte-g...
