In [1]:
from sqlalchemy import or_
from sqlalchemy.orm import sessionmaker
from HardwareSwap.Models import Base, engine, Post, PostType, get_or_create
import tqdm

In [2]:
recreate = True
if recreate:
    Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
Session = sessionmaker(bind = engine)

In [3]:
s = Session()
buying = get_or_create(s, PostType, post_type="buying")
selling = get_or_create(s, PostType, post_type="selling")
trading = get_or_create(s, PostType, post_type="trading")
giveaway = get_or_create(s, PostType, post_type="giveaway")
meta = get_or_create(s, PostType, post_type="meta")
official = get_or_create(s, PostType, post_type="official")
alert = get_or_create(s, PostType, post_type="alert")


In [4]:
%load_ext autoreload
%autoreload 1
%aimport HardwareSwap
%aimport HardwareSwap.DownloadData
%aimport HardwareSwap.DownloadData.download_data
from HardwareSwap.DownloadData import download_data, remove_duplicate_rows
import os
import datetime
import pytz
import pandas as pd
from HardwareSwap.Models.Post import get_regex_to_parse_title
import re
import json

In [5]:
json_dir = "/home/neil/RandomProjects/hardwareswap/raw_data/"
pcpartpicker_gpu_dir = os.path.join(json_dir, "pcpartpicker_gpu")
raw_data = download_data.load_dataframe_from_disk(os.path.join(json_dir, "data_*-*.json"), limit=50)

print(f"Found {len(raw_data)} posts")
Post.create_bulk(raw_data, s)
Post.clean(s)

  6%|▌         | 278/4998 [00:00<00:01, 2779.90it/s]

Found 4998 posts


100%|██████████| 4998/4998 [00:02<00:00, 2167.52it/s]


Inserting 4998 items... Done!


In [6]:

pcpartpicker_gpus_fname = os.path.join(pcpartpicker_gpu_dir, "pcpartpicker_gpu.json")
if os.path.isfile(pcpartpicker_gpus_fname):
    with open(pcpartpicker_gpus_fname, 'r') as fp:
        gpus_all = json.load(fp)

In [248]:
df = pd.DataFrame(gpus_all)
df["chipset"] = df.apply(lambda x: x["chipset"].replace("Chipset",""), axis=1)
df["brand"] = ""
df["mfg"] = ""

# set the brands
df.at[df["chipset"].str.contains("GeForce"), "brand"] = "nvidia"
df.at[df["chipset"].str.contains("Quadro"), "brand"] = "nvidia"
df.at[df["chipset"].str.contains("NVS"), "brand"] = "nvidia"
df.at[df["chipset"].str.contains("RTX"), "brand"] = "nvidia"
df.at[df["chipset"].str.contains("Titan"), "brand"] = "nvidia"
df.at[df["chipset"].str.contains("Radeon"), "brand"] = "amd"
df.at[df["chipset"].str.contains("FirePro"), "brand"] = "amd"
df.at[df["chipset"].str.contains("Vega"), "brand"] = "amd"
df.at[df["chipset"].str.contains("FireGL"), "brand"] = "amd"

# manufacture
brands = ["Sapphire","Asus","Gigabyte","EVGA","MSI","PNY","Zotac", "Lenovo", "GALAX","Inno3D","Jaton Video",
          "Galaxy","Sparkle", "ECS","Palit","KFA2" ,"OcUK","Gainward","XFX","BFG","NVIDIA","HP","Leadtek",
          "Zogis","Colorful","Biostar","Corsair","Dell", "HIS", "PowerColor", "VisionTek","Diamond", "Club 3D", "ASRock", "ATI","AMD", "Yeston"]
for brand in brands:
    df.at[df["name"].str.contains(brand), "mfg"] = brand.lower()


# Extract information about the gpus
result_names=["series","prefix","number","variant"]
for col in result_names:
    df[col] = ""
f_geforce = re.compile("(?P<series>(?:GeForce))?\s?(?P<prefix>(?:GTX)|(?:RTX)|(?:GT)|(?:GTS))\s?(?P<number>[123]?[0245679][1-9]0)\s?(?P<variant>(?:Ti))?")
f_quadro = re.compile("(?P<series>(?:Quadro))?\s?(?P<prefix>[KMP])\s?(?P<number>[12345678][02]0{2})\s?(?P<variant>(?:D))?")
f_titan = re.compile("(?P<series>(?:GeForce))?\s?(?P<prefix>(?:GTX)|(?:RTX))\s?(?P<number>(?:Titan))\s?(?P<variant>(?:Z)|(?:X)|(?:Black))?")
f_radeon = re.compile("(?P<series>(?:Radeon Pro)|(?:Radeon))\s?(?P<prefix>(?:RX)|(?:R[579])|(?:HD)|(?:WX))?\s?(?P<number>(?:[34567]?[123456789][03456789][05])|(?:Fury)|(?:VEGA [56][46])|(?:Nano)|(?:VII))?\s?(?P<variant>(?:X2)|(?:XT)|(?:X))?")
f_firepro = re.compile("(?P<series>(?:FirePro))\s?(?P<prefix>(?:[VWRS]))?\s?(?P<number>(?:[2345789]?[01246789][0567]0))\s?(?P<variant>)?")
#f_radeon = re.compile(r"7950")
for idx in range(len(df)):
    chipset = df.at[idx, "chipset"]
    for regex_function in [f_geforce, f_quadro, f_titan, f_radeon, f_firepro]:
        match = regex_function.match(chipset)
        if match is None:
            continue
        series,prefix,number,variant = match.group(*result_names)
        df.at[idx,"series"] = series if not None else ""
        df.at[idx,"prefix"] = prefix if not None else ""
        df.at[idx,"number"] = number if not None else ""
        df.at[idx,"variant"] = variant if not None else ""
        break

#df[(df["brand"]=="nvidia") & (df["model"]=="")]
df[(df["brand"]=="amd" ) & (df["number"]=="")]


Unnamed: 0,name,chipset,price,url,brand,mfg,series,prefix,number,variant
923,ATI FirePro RG220A,FirePro RG220A,239.99,/product/MpyFf7/ati-video-card-100505715,amd,ati,,,,
1020,Sapphire 31004-56-40A,FirePro W4300,0.0,/product/3rDzK8/sapphire-video-card-310045640a,amd,sapphire,,,,
1858,ATI 100-505144,FireGL V7300,119.99,/product/Cs2kcf/ati-video-card-100505144,amd,ati,,,,
3376,AMD 100-506061,Vega Frontier Edition,1750.0,/product/mCH48d/amd-vega-frontier-edition-air-...,amd,amd,,,,
4218,AMD 100-506062,Vega Frontier Edition Liquid,0.0,/product/8QrcCJ/amd-vega-frontier-edition-liqu...,amd,amd,,,,
4497,AMD FirePro RG220A,FirePro RG220A,0.0,/product/FK8H99/amd-video-card-100505715,amd,amd,,,,


In [187]:
chipset = df.at[0, "chipset"]
print(chipset)
f_radeon.match(chipset).groups()

Radeon HD 7950


('Radeon', 'HD', '7950', None)

In [188]:
matches = f_geforce.match("GeForce GT 430")
matches.group("series","prefix","number","variant")

('GeForce', 'GT', '430', None)

In [151]:
for item in set(df[df["brand"]=="amd" ]["chipset"].to_list()):
    print(item)


Radeon RX 6800
Radeon RX VEGA 64
FirePro S9050
Radeon RX 5700
Radeon HD 4670 X2
FirePro W9000
Radeon Pro Duo Polaris
Radeon HD 5670
Radeon Pro WX 3100
Radeon R9 280
FirePro V7900 SDI
FirePro 2450
Radeon Pro VII
Radeon RX 5600 XT
Radeon HD 6870 X2
Radeon HD 7750
FirePro W8000
Radeon HD 7970
Radeon HD 6850
Radeon HD 6770
FirePro V7750
Radeon HD 6970
Radeon RX 460
Radeon HD 4870
Radeon HD 6450
Radeon HD 4830
Radeon RX 580
Radeon RX 560 - 896
Radeon HD 4350
Radeon R7 360
Radeon HD 5550
Radeon RX 5500 XT
FirePro S7000
Radeon HD 7870 GHz Edition
Radeon RX 6700 XT
Radeon HD 6750
Radeon Pro W5500
FirePro W5000
Radeon HD 7790
Radeon R9 390
FirePro 2460
Radeon R9 380X
Radeon HD 4850
Radeon HD 7950
FirePro W4300
Radeon R5 230
Radeon HD 4650
FirePro S9000
Radeon R9 Nano
Radeon R9 Fury
Radeon HD 5570
FireGL V7300
Radeon HD 7990
Radeon R9 390X
Radeon RX 470
Radeon HD 5870
Radeon HD 4890
Radeon HD 7970 GHz Edition
Radeon VII
Radeon HD 6790
Vega Frontier Edition Liquid
Radeon R7 265
Radeon HD 3450
Fir

In [None]:
items = df[df["brand"]=="nvidia"]["chipset"].unique()
items = sorted(items)
for item in items:
    print(item)

In [None]:
ends = [item.split(" ")[-1] for item in items]
for item in sorted(list(set(ends))):
    if not item.isnumeric():
        print(item)
        

In [None]:
Black
Boost
G5
G6
GP100
GS
GSO
GT
GTX+
K2000D
K4000M
LHR
RTX
SE
SUPER
Ti
Titan
V
X
X2
Xp
Z
