## Scrapping C-Zone and Brightstar Online Store

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import datetime

In [None]:
pd.set_option('display.max_rows', 60) # Default 60, otherwise None (to see all)

In [7]:
url = 'https://brightstarcomp.com/collections/pc-components?sort_by=title-ascending&page=1&filter.v.price.gte=&filter.v.price.lte=&filter.p.product_type=Processor'
page = requests.get(url)
soup = BeautifulSoup(page.text, 'html')

In [None]:
print(soup)

In [None]:
products = soup.find('div', class_='right-products')
products = products.find_all('div', class_='product-item')
print(products[0])

In [70]:
def extract_product_details(product):
    """
    Extracts the URL, name, and price from a product item.

    Args:
        product (BeautifulSoup object): A single product item.

    Returns:
        dict: A dictionary containing the product's URL, name, and price.
    """
    # Extract the component URL
    url = product.find('a')['href']
    full_url = f"https://brightstarcomp.com{url}"  # Add the base URL

    # Extract the component name
    name = product.find('p', class_='product-title').text.strip()

    # Extract the price
    price = product.find('p', class_='price').text.strip()

    # Return the extracted details as a dictionary
    return {
        'url': full_url,
        'name': name,
        'price': price,
        'timestamp': datetime.datetime.now()
    }

""" 
Extract details for all products in each category.
For: Brightstar Computer Website
As of: 19th March 2025
"""

i = 1
product_list = []
product_type = 'PC Accessories'
while(True):
    url = f'https://brightstarcomp.com/collections/pc-components?sort_by=title-ascending&page={i}&filter.v.price.gte=&filter.v.price.lte=&filter.p.product_type={product_type}'
    page = requests.get(url)
    soup = BeautifulSoup(page.text, 'html')

    products = soup.find('div', class_='right-products')
    products = products.find_all('div', class_='product-item')

    if len(products) == 0:
        break
    product_list += [extract_product_details(product) for product in products]
    i += 1

# Convert the list of dictionaries into a Pandas DataFrame
df = pd.DataFrame(product_list)
df['Note'] = None

# Print the DataFrame
display(df)

# Save the DataFrame to a CSV file
df.to_csv(f'data/parts/brightstarcomp/{product_type.lower().strip()}.csv', index=False)

Unnamed: 0,url,name,price,timestamp,Note
0,https://brightstarcomp.com/collections/pc-comp...,1st Player ARGB Silicon Sleeved Extension Cabl...,RM 130.00,2025-04-02 18:59:57.131359,
1,https://brightstarcomp.com/collections/pc-comp...,1st-player-steampunk-gold-series-psu-350mm-cable,RM 75.00,2025-04-02 18:59:57.131359,
2,https://brightstarcomp.com/collections/pc-comp...,ADATA XPG Prime ARGB Extension For VGA Sleeved...,RM 149.00,2025-04-02 18:59:57.131359,
3,https://brightstarcomp.com/collections/pc-comp...,Arctic A-RGB Controller with RF Remote Control...,RM 74.00,2025-04-02 18:59:57.131359,
4,https://brightstarcomp.com/collections/pc-comp...,Arctic Case PWM Sharing Fan Hub,RM 49.00,2025-04-02 18:59:57.131359,
...,...,...,...,...,...
62,https://brightstarcomp.com/collections/pc-comp...,Thermalright Intel LGA 1700 Air Cooler Refit B...,RM 15.00,2025-04-02 18:59:58.265344,
63,https://brightstarcomp.com/collections/pc-comp...,Thermalright Intel LGA1700 Bending Corrector F...,RM 35.00,2025-04-02 18:59:58.265344,
64,https://brightstarcomp.com/collections/pc-comp...,Thermaltake LCD Panel Kit for The Tower 500 - ...,RM 99.00,2025-04-02 18:59:58.265344,
65,https://brightstarcomp.com/collections/pc-comp...,Thermaltake The Tower 300 Chassis Stand Kit - ...,RM 129.00,2025-04-02 18:59:58.265344,


In [68]:
def extract_product_details(product):
    try:
        # Extract the component URL
        url = product.find('a', class_='product-item-link')['href']

        # Extract the component name
        name = product.find('strong', class_='product name product-item-name').text.strip()

        # Extract the price
        price_tag = product.find('span', class_='price-wrapper')
        price = price_tag['data-price-amount'] if price_tag else None  # Handle missing price

        # Return the extracted details as a dictionary
        return {
            'url': url,
            'name': name,
            'price': price,
            'timestamp': datetime.datetime.now()
        }
    except AttributeError as e:
        print(f"Error extracting product details: {e}")
        return None

In [69]:
import os

""" 
Extract details for all products in each category.
For: C-zone Website
As of: 19th March 2025
"""
components_done = ['cpu-processors', 'memory', 'motherboards','video-card-video-devices']
components = ['computer-cases', 'power-supplies', 'fan-pc-cooling', 'sound-cards', 'optical-drives']
output_dir = 'data/parts/czone/'
os.makedirs(output_dir, exist_ok=True)

for component in components:
    product_list = []
    seen_products = set()  # To track unique product URLs
    i = 1

    while True:
        # Scrape page
        url = f'https://czone.my/czone/computer-components/core-components/{component}.html?p={i}'
        page = requests.get(url)
        if page.status_code != 200:
            print(f"Failed to fetch {url}, status code: {page.status_code}")
            break

        soup = BeautifulSoup(page.text, 'html')

        # Extract data
        products_container = soup.find('div', class_='products wrapper grid products-grid')
        if not products_container:
            print(f"No products found on page {i} for component {component}")
            break

        products = products_container.findAll('li', class_='item product product-item')
        if len(products) == 0:
            break

        for product in products:
            product_details = extract_product_details(product)
            if product_details:
                product_url = product_details['url']  # Use URL as a unique identifier
                if product_url in seen_products:
                    print(f"Duplicate product detected: {product_url}. Stopping.")
                    break  # Stop processing further pages
                seen_products.add(product_url)
                product_list.append(product_details)
        else:
            # Continue to the next page if no duplicates were found
            i += 1
            continue

        # Break the outer loop if a duplicate is found
        break

    df = pd.DataFrame(product_list)
    df['Note'] = None

    # Save the DataFrame to a CSV file
    df.to_csv(f'{output_dir}{component}.csv', index=False, encoding='utf-8')

Duplicate product detected: https://czone.my/czone/nzxt-h9-computer-case.html. Stopping.
Duplicate product detected: https://czone.my/czone/mag a750bn pcie5-1739103631.html. Stopping.
Duplicate product detected: https://czone.my/czone/idc-hub-fh-07-1739103466.html. Stopping.
Duplicate product detected: https://czone.my/czone/cdkhb-1737817497.html. Stopping.
Duplicate product detected: https://czone.my/czone/mr193bk-1739103713.html. Stopping.


In [67]:
url = 'https://czone.my/czone/computer-components/core-components/computer-cases.html?p=1'
page = requests.get(url)
soup = BeautifulSoup(page.text, 'html')

products = soup.find('div', class_='products wrapper grid products-grid')
products = products.findAll('li', class_='item product product-item')
products_list = [extract_product_details(product) for product in products]
df = pd.DataFrame(products_list)

display(df)

Unnamed: 0,url,name,price,timestamp
0,https://czone.my/czone/phanteks-nv5s-computer-...,Phanteks NV5S Computer Case,449.0,2025-03-19 14:37:43.252984
1,https://czone.my/czone/deepcool-ch260-matx-com...,Deepcool CH260 MATX Computer Case,259.0,2025-03-19 14:37:43.254101
2,https://czone.my/czone/tecware-timber-m-tg-com...,Tecware Timber M TG Computer Case,179.0,2025-03-19 14:37:43.254101
3,https://czone.my/czone/pa401/bk/wood/tg-174192...,Asus ProArt PA401 Wood Edition Computer Case ...,560.0,2025-03-19 14:37:43.254101
4,https://czone.my/czone/pc-o11vpw-1739103813.html,Lian Li PC-O11 Vision Compact Computer Case,1920.0,2025-03-19 14:37:43.254610
5,https://czone.my/czone/twca-vxprm-bkar-1739107...,"Tecware VX Prism M Computer Case (mATX, TG, AR...",269.0,2025-03-19 14:37:43.254610
6,https://czone.my/czone/lian-li-a4-h2o-computer...,Lian Li A4-H2O Computer Case,679.0,2025-03-19 14:37:43.254610
7,https://czone.my/czone/twca-fus2a-bk-173910708...,"Tecware Fusion 2 Air Computer Case (mATX, TG, ...",229.0,2025-03-19 14:37:43.255006
8,https://czone.my/czone/twca-edm2-bkol-17391070...,"Tecware Edge M2 TG Computer Case (mATX, TG, AR...",169.0,2025-03-19 14:37:43.255006
9,https://czone.my/czone/gfg-p2storm-1739103401....,"Gaming Freak P2 Storm TG Computer Case (mATX, ...",189.0,2025-03-19 14:37:43.255006


In [26]:
url = f'https://brightstarcomp.com/collections/pc-components?sort_by=title-ascending&page=30&filter.v.price.gte=&filter.v.price.lte=&filter.p.product_type=Processor'
page = requests.get(url)
soup = BeautifulSoup(page.text, 'html')

products = soup.find('div', class_='right-products')
products = products.find_all('div', class_='product-item')
len(products)

0

In [23]:
df.to_csv('cpu_test.csv', index=False)

## Data Cleaning

### CPU

In [20]:
cpu = pd.read_csv('data/parts/CPU_Data.csv')
bsc_cpu = pd.read_csv('data/parts/brightstarcomp/cpu.csv')
czone_cpu = pd.read_csv('data/parts/czone/cpu-processors.csv')

In [32]:
print(cpu.columns)
print(bsc_cpu.columns)

Index(['Brand', 'Name', 'Chipset', 'Score', 'Price'], dtype='object')
Index(['url', 'name', 'price', 'timestamp', 'Note'], dtype='object')


In [21]:
def parse_cpu_details(product_string):
    """
    Parses a product string into Brand, Name, and Chipset.

    Args:
        product_string (str): The product string to parse.

    Returns:
        dict: A dictionary containing Brand, Name, and Chipset.
    """
    # Split the string into parts
    parts = product_string.split()
    
    # Extract Brand (first word)
    brand = parts[0]
    
    # Extract Chipset (if it contains "LGA" or "AM" as a standalone part)
    chipset = next((part for part in parts if part.startswith("LGA") or part.startswith("AM") and part != "AMD"), None)
    
    # Extract Name (everything between Brand and Chipset)
    if chipset:
        chipset_index = parts.index(chipset)
        name = " ".join(parts[1:chipset_index])
    else:
        name = " ".join(parts[1:])
    
    return {
        "Brand": brand,
        "Name": name,
        "Chipset": chipset
    }
# Name
name_strings = bsc_cpu['name'].tolist() + czone_cpu['name'].tolist()
new_cpu_df = pd.DataFrame([parse_cpu_details(name) for name in name_strings])
# Price
bsc_price_string = bsc_cpu['price'].tolist() 
cleaned_price_string = [price.replace('RM', '').replace(',', '').strip() for price in bsc_price_string]
price_strings = cleaned_price_string + czone_cpu['price'].tolist()
new_cpu_df['Price'] = price_strings
# URL
new_cpu_df['URL'] = bsc_cpu['url'].tolist() + czone_cpu['url'].tolist()
# Timestamp
new_cpu_df['Timestamp'] = bsc_cpu['timestamp'].tolist() + czone_cpu['timestamp'].tolist()
# Note
new_cpu_df['Note'] = None
display(new_cpu_df)

Unnamed: 0,Brand,Name,Chipset,Price,URL,Timestamp,Note
0,AMD,Athlon 3000G,AM4,219.0,https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:10:07.371057,
1,AMD,Ryzen 3 3200G,AM4,299.0,https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:10:07.371057,
2,AMD,Ryzen 5 5500,AM4,409.0,https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:10:07.371057,
3,AMD,Ryzen 5 5600,AM4,499.0,https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:10:07.371057,
4,AMD,Ryzen 5 5600G,AM4,549.0,https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:10:07.371057,
5,AMD,Ryzen 5 5600GT,AM4,599.0,https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:10:07.372059,
6,AMD,Ryzen 5 5600X,AM4,589.0,https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:10:07.372059,
7,AMD,Ryzen 5 7500F,AM5,749.0,https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:10:07.372059,
8,AMD,Ryzen 5 7600,AM5,899.0,https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:10:07.372059,
9,AMD,Ryzen 5 7600X,AM5,999.0,https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:10:07.372059,


In [22]:
merged_cpu = pd.DataFrame(pd.merge(cpu, new_cpu_df, how='outer'))
# Remove Chipset blankspace
merged_cpu['Chipset'] = [str(chipset).replace(' ', '') for chipset in merged_cpu['Chipset']]
# Set null score to -1
merged_cpu['Score'] = [
    int(score) if pd.notnull(score) else -1 for score in merged_cpu['Score']
]
merged_cpu['Name'] = merged_cpu['Name'].str.replace('Processor', '')
merged_cpu = merged_cpu.where(pd.notnull(merged_cpu), None)
display(merged_cpu)

Unnamed: 0,Brand,Name,Chipset,Score,Price,URL,Timestamp,Note
0,AMD,Athlon 3000G,AM4,-1,219.0,https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:10:07.371057,
1,AMD,Ryzen 3 1200,AM4,6282,100.0,,,
2,AMD,Ryzen 3 3100,AM4,11587,210.0,,,
3,AMD,Ryzen 3 3200G,AM4,7124,300.0,,,
4,AMD,Ryzen 3 3200G,AM4,-1,299.0,https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:10:07.371057,
5,AMD,Ryzen 3 3200G,,-1,259.0,https://czone.my/czone/yd3200c5fhbox-173910723...,2025-03-19 14:21:03.566474,
6,AMD,Ryzen 5 1400,AM4,7747,200.0,,,
7,AMD,Ryzen 5 1600,AM4,12278,180.0,,,
8,AMD,Ryzen 5 3400G,AM4,9242,340.0,,,
9,AMD,Ryzen 5 3400GE,AM4,8879,320.0,,,


Commented out because manually cleaned (don't want to overwrite)

In [None]:
# merged_cpu.to_csv('data/parts/CPU_Data(new).csv', index=False, encoding='utf-8')

### Case

In [25]:
pd.set_option('display.max_rows', 60) # Default 60, otherwise None (to see all)

In [85]:
pc_case = pd.read_csv('data/parts/Case_Data.csv')
bsc_case = pd.read_csv('data/parts/brightstarcomp/case.csv')
czone_case = pd.read_csv('data/parts/czone/computer-cases.csv')

In [86]:
print(f'{pc_case.columns}\n{bsc_case.columns}\n{czone_case.columns}')


Index(['Brand', 'Model', 'Size', 'Color', 'Price'], dtype='object')
Index(['url', 'name', 'price', 'timestamp', 'Note'], dtype='object')
Index(['url', 'name', 'price', 'timestamp', 'Note'], dtype='object')


In [None]:
def parse_case_details(product_string):
    # Split the string into parts
    parts = product_string.split()
    brands = ['1st Player', 'Aerocool', 'Aigo', 'Antec', 'Asus', 'BeQuiet', 'Cooler Master',
              'Corsair', 'Deepcool', 'DeepCool', 'E-SUPERCASE', 'Fractal Design', 'GIGABYTE',
              'Gigabyte', 'HYTE', 'Hyte', 'Invasion', 'Lian Li', 'MAGNIUMGEAR', 'MONTECH', 'Montech', 'MSI', 
              'NZXT', 'Phanteks', 'Segotep', 'Slevcase', 'SSUPD', 'Tecware', 'Thermaltake',
              'X-Five', 'Zalman', 'Gaming Freak', 'Jonsbo', 'Silverstone']
    
    # Extract Brand (check for multi-word matches)
    brand = next((brand for brand in brands if " ".join(parts[:len(brand.split())]) == brand), None)
    
    # Extract Name (everything after Brand)
    if brand:
        brand_index = len(brand.split())  # Get the number of words in the brand
        name = " ".join(parts[brand_index:])
    else:
        name = " ".join(parts)

    # Clean name
    strings = ['Computer Case', 'PC Chassis', 'Pc Chassis', 'Chassis', 'PC Case', 'Pc Case', 'Case']
    for string in strings:
        name = name.replace(string, '').strip()
    
    # Extract extra data (content inside parentheses)
    if '(' in product_string and ')' in product_string:
        start = product_string.find('(')
        end = product_string.find(')')
        extra_data = product_string[start + 1:end]

        # Remove extra data from name
        name = name.replace(product_string[start:end+1], '').strip()
    else:
        extra_data = None

    # Extract size (if available) - ensure exact matches
    sizes = ['Micro-ATX', 'Mini-ITX', 'E-ATX','EATX', 'mATX','MATX', 'ATX', 'ITX']
    size = None
    for s in sizes:
        if f"{s}" in f"{product_string}":
            size = s
            break  # Exit the loop as soon as a match is found

    # Remove size from name if found
    if size:
        name = name.replace(size, '').strip()
    else:
        size = None

    # Extract tower type (if available)
    towers = ['Mid-Tower', 'Mid Tower', 'Full-Tower', 'Full Tower', 'Super-Tower']
    tower = next((t for t in towers if t in product_string), None)

    # Append tower type to extra data if found
    if tower:
        name = name.replace(tower, '').strip()
        if extra_data:
            extra_data += f", {tower}"
        else:
            extra_data = tower

    return {
        "Brand": brand,
        "Name": name,
        "Size": size,
        "Price": None,
        "URL": None,
        "Timestamp": None,
        "Note": extra_data
    }

case_name_strings = bsc_case['name'].tolist() + czone_case['name'].tolist()
new_case_df = pd.DataFrame([parse_case_details(name) for name in case_name_strings])
new_case_df['Price'] = bsc_case['price'].tolist() + czone_case['price'].tolist()
new_case_df['URL'] = bsc_case['url'].tolist() + czone_case['url'].tolist()
new_case_df['Timestamp'] = bsc_case['timestamp'].tolist() + czone_case['timestamp'].tolist()

display(new_case_df)

In [128]:
new_case_df.to_csv('data/parts/Case_Data(new-excl-old).csv', index=False, encoding='utf-8')

In [None]:
### Replacing timestamps (cuz it got broken idk how)

timestamp_library = {
    10:07.4 : 2025-03-18 22:10:07.371057	
}

### MOBO

In [17]:
pd.set_option('display.max_rows', 60) # Default 60, otherwise None (to see all)

In [4]:
pc_mobo = pd.read_csv('data/parts/MOBO_Data.csv')
bsc_mobo = pd.read_csv('data/parts/brightstarcomp/motherboard.csv')
czone_mobo = pd.read_csv('data/parts/czone/motherboards.csv')

In [5]:
print(f'{pc_mobo.columns}\n{bsc_mobo.columns}\n{czone_mobo.columns}')

Index(['Brand', 'Name', 'Socket', 'Size', 'RAM Slot', 'NVME Slot', 'Wifi',
       'Price'],
      dtype='object')
Index(['url', 'name', 'price', 'timestamp', 'Note'], dtype='object')
Index(['url', 'name', 'price', 'timestamp', 'Note'], dtype='object')


In [None]:
display(bsc_mobo)

In [None]:
display(czone_mobo)

In [None]:
def parse_mobo_details(product_string):
    # Split string
    string = product_string.split()
    
    # Extract brand:
    brand = string[0]

    # Extract name:
    name = " ".join(string[1:])

    # Clean name:
    strings = ['Motherboard', 'Mother Board', 'Mother-Board', 'Motherboard', 'PC Motherboard', 'PC Mother Board', 'PC Mother-Board']
    for string in strings:
        name = name.replace(string, '').strip()

    # Extract socket:
    sockets = ['LGA 1700', 'LGA 1851', 'LGA1700', 'LGA1851', 'AM4', 'AM5']
    socket = next((s for s in sockets if s in product_string), None)
    if socket:
        name = name.replace(socket, '').strip()
    else:
        socket = None

    # Extract size:
    sizes = ['Micro-ATX', 'Mini-ITX', 'E-ATX','EATX', 'mATX','MATX', 'ATX', 'ITX']
    size = next((s for s in sizes if s in product_string), None)
    if size:
        name = name.replace(size, '').strip()
    else:
        size = None
        
    #Extract wifi:
    wifi = next((w for w in ['Wifi', 'Wi-Fi', 'WiFi', 'WIFI'] if w in product_string), None)
    if wifi:
        wifi = 1
    else:
        wifi = None

    # Extract extra data
    extra_data = next((d for d in ['DDR4', 'DDR5'] if d in product_string), None)
    if extra_data:
        name = name.replace(extra_data, '').strip()
    else:
        extra_data = None

    return {
        'Brand': brand,
        'Name': name,
        'Socket': socket,
        'Size': size,
        'RAM Slot': None,
        'NVME Slot': None,
        'Wifi': wifi,
        'Price': None,
        "URL": None,
        "Timestamp": None,
        "Note": extra_data
    }

mobo_name_strings = bsc_mobo['name'].tolist() + czone_mobo['name'].tolist()
new_mobo_df = pd.DataFrame([parse_mobo_details(name) for name in mobo_name_strings])
new_mobo_df['Price'] = bsc_mobo['price'].tolist() + czone_mobo['price'].tolist()
new_mobo_df['URL'] = bsc_mobo['url'].tolist() + czone_mobo['url'].tolist()
new_mobo_df['Timestamp'] = bsc_mobo['timestamp'].tolist() + czone_mobo['timestamp'].tolist()

display(new_mobo_df)

In [9]:
new_mobo_df.to_csv('data/parts/MOBO_Data(new-excl-old).csv', index=False, encoding='utf-8')

In [18]:
new_mobo_df = pd.read_csv('data/parts/MOBO_Data(new-excl-old).csv')
display(new_mobo_df)

Unnamed: 0,Brand,Name,Socket,Size,RAM Slot,NVME Slot,Wifi,Price,URL,Timestamp,Note
0,ASRock,B650 Steel Legend WiFi,AM5,ATX,,,1.0,"RM 1,049.00",https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:07:26,
1,ASRock,B760 Pro RS,LGA1700,ATX,,,,RM 669.00,https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:07:26,
2,ASRock,B760M PG Lightning/D4 M-,LGA1700,ATX,,,,RM 499.00,https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:07:26,
3,ASRock,PG Z790 RIPTIDE WIFI,LGA1700,ATX,,,1.0,"RM 1,099.00",https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:07:27,
4,ASRock,Phantom Gaming X870 Riptide WiFi,AM5,ATX,,,1.0,"RM 1,499.00",https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:07:27,
...,...,...,...,...,...,...,...,...,...,...,...
257,MSI,X870-P WiFi,,,,,1.0,1399,https://czone.my/czone/pro x870-p wifi-1739103...,2025-03-19 14:21:24,DDR5
258,MSI,X870E Carbon WiFi,,,,,1.0,2299,https://czone.my/czone/mpg x870e carbon wifi-1...,2025-03-19 14:21:24,DDR5
259,MSI,X870E Edge Ti WiFi,,,,,1.0,1899,https://czone.my/czone/mpg x870e edge ti wifi-...,2025-03-19 14:21:21,DDR5
260,MSI,X870E Tomahawk WiFi,,,,,1.0,1799,https://czone.my/czone/mag x870e tomahawk wifi...,2025-03-19 14:21:21,DDR5


In [19]:
import re

def clean_price(price):
    # Use regex to remove non-numeric characters except for the decimal point
    cleaned_price = re.sub(r'[^\d.]', '', price)
    return float(cleaned_price) if cleaned_price else None

new_mobo_df['Price'] = new_mobo_df['Price'].apply(clean_price)

display(new_mobo_df)

Unnamed: 0,Brand,Name,Socket,Size,RAM Slot,NVME Slot,Wifi,Price,URL,Timestamp,Note
0,ASRock,B650 Steel Legend WiFi,AM5,ATX,,,1.0,1049.0,https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:07:26,
1,ASRock,B760 Pro RS,LGA1700,ATX,,,,669.0,https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:07:26,
2,ASRock,B760M PG Lightning/D4 M-,LGA1700,ATX,,,,499.0,https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:07:26,
3,ASRock,PG Z790 RIPTIDE WIFI,LGA1700,ATX,,,1.0,1099.0,https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:07:27,
4,ASRock,Phantom Gaming X870 Riptide WiFi,AM5,ATX,,,1.0,1499.0,https://brightstarcomp.com/collections/pc-comp...,2025-03-18 22:07:27,
...,...,...,...,...,...,...,...,...,...,...,...
257,MSI,X870-P WiFi,,,,,1.0,1399.0,https://czone.my/czone/pro x870-p wifi-1739103...,2025-03-19 14:21:24,DDR5
258,MSI,X870E Carbon WiFi,,,,,1.0,2299.0,https://czone.my/czone/mpg x870e carbon wifi-1...,2025-03-19 14:21:24,DDR5
259,MSI,X870E Edge Ti WiFi,,,,,1.0,1899.0,https://czone.my/czone/mpg x870e edge ti wifi-...,2025-03-19 14:21:21,DDR5
260,MSI,X870E Tomahawk WiFi,,,,,1.0,1799.0,https://czone.my/czone/mag x870e tomahawk wifi...,2025-03-19 14:21:21,DDR5


In [22]:
## Merge MOBO dataset
notPC_mobo = pd.read_csv('data/parts/MOBO_Data(new-excl-old).csv')

mobo_merged = pd.merge(pc_mobo, new_mobo_df, how='outer')
# Remove Chipset blankspace
mobo_merged['Socket'] = [str(socket).replace(' ', '') for socket in mobo_merged['Socket']]



mobo_merged.to_csv('data/parts/MOBO_Data(new).csv', index=False, encoding='utf-8')

In [None]:
display(mobo_merged)