In [1]:
from sqlalchemy import or_
from sqlalchemy.orm import sessionmaker
from HardwareSwap.Models import Base, engine, Post, PostType, get_or_create
import tqdm

In [2]:
recreate = True
if recreate:
    Base.metadata.drop_all(engine)
Base.metadata.create_all(engine)
Session = sessionmaker(bind = engine)

In [3]:
s = Session()
buying = get_or_create(s, PostType, post_type="buying")
selling = get_or_create(s, PostType, post_type="selling")
trading = get_or_create(s, PostType, post_type="trading")
giveaway = get_or_create(s, PostType, post_type="giveaway")
meta = get_or_create(s, PostType, post_type="meta")
official = get_or_create(s, PostType, post_type="official")
alert = get_or_create(s, PostType, post_type="alert")


In [4]:
%load_ext autoreload
%autoreload 1
%aimport HardwareSwap
%aimport HardwareSwap.DownloadData
%aimport HardwareSwap.DownloadData.download_data
from HardwareSwap.DownloadData import download_data, remove_duplicate_rows
import os
import datetime
import pytz
import pandas as pd
from HardwareSwap.Models.Post import get_regex_to_parse_title
import re
import json

In [5]:
json_dir = "/home/neil/RandomProjects/hardwareswap/raw_data/"
pcpartpicker_gpu_dir = os.path.join(json_dir, "pcpartpicker_gpu")
raw_data = download_data.load_dataframe_from_disk(os.path.join(json_dir, "data_*-*.json"), limit=50)

print(f"Found {len(raw_data)} posts")
Post.create_bulk(raw_data, s)
Post.clean(s)

  6%|▌         | 278/4998 [00:00<00:01, 2779.90it/s]

Found 4998 posts


100%|██████████| 4998/4998 [00:02<00:00, 2167.52it/s]


Inserting 4998 items... Done!


In [6]:

pcpartpicker_gpus_fname = os.path.join(pcpartpicker_gpu_dir, "pcpartpicker_gpu.json")
if os.path.isfile(pcpartpicker_gpus_fname):
    with open(pcpartpicker_gpus_fname, 'r') as fp:
        gpus_all = json.load(fp)

In [67]:
df = pd.DataFrame(gpus_all)
df["chipset"] = df.apply(lambda x: x["chipset"].replace("Chipset",""), axis=1)
df["brand"] = ""
df["mfg"] = ""
df["series"] = ""
df["model"] = ""

# set the brands
df.at[df["chipset"].str.contains("GeForce"), "brand"] = "nvidia"
df.at[df["chipset"].str.contains("Quadro"), "brand"] = "nvidia"
df.at[df["chipset"].str.contains("NVS"), "brand"] = "nvidia"
df.at[df["chipset"].str.contains("RTX"), "brand"] = "nvidia"
df.at[df["chipset"].str.contains("Titan"), "brand"] = "nvidia"
df.at[df["chipset"].str.contains("Radeon"), "brand"] = "amd"
df.at[df["chipset"].str.contains("FirePro"), "brand"] = "amd"
df.at[df["chipset"].str.contains("Vega"), "brand"] = "amd"
df.at[df["chipset"].str.contains("FireGL"), "brand"] = "amd"

# manufacture
brands = ["Sapphire","Asus","Gigabyte","EVGA","MSI","PNY","Zotac", "Lenovo", "GALAX","Inno3D","Jaton Video",
          "Galaxy","Sparkle", "ECS","Palit","KFA2" ,"OcUK","Gainward","XFX","BFG","NVIDIA","HP","Leadtek",
          "Zogis","Colorful","Biostar","Corsair","Dell", "HIS", "PowerColor", "VisionTek","Diamond", "Club 3D", "ASRock", "ATI","AMD", "Yeston"]
for brand in brands:
    df.at[df["name"].str.contains(brand), "mfg"] = brand.lower()

    
#df = df[df["chipset"].str.contains("Quadro")].copy()
#df = df[df["brand"]=="nvidia"].copy()
#df.reset_index(inplace=True, drop=True)
# Series - Nvidia
series_mapping = {
    "lookup-titan": {"cards": ["GTX Titan"], "suffix":["X", "Xp","Z", "V", "Black", "(Pascal)"]},
    "lookup-titan2": {"cards": ["Titan V", "TITAN RTX", "Titan X (Pascal)", "Titan Xp"], "suffix":[]},
    "RTX 3000": {"cards": ["RTX 3090", "RTX 3080", "RTX 3070","RTX 3060"], "suffix": ["Ti", "LHR", "SUPER"]},
    "RTX 2000": {"cards": ["RTX 2080", "RTX 2070","RTX 2060"], "suffix": ["Ti", "SUPER"]},
    "GTX 1000": {"cards": ["GTX 1080", "GTX 1070","GTX 1060", "GTX 1050", "GTX 1660", "GTX 1650", "GT 1030"], "suffix": ["Ti", "SUPER"]},
    "GTX 900": {"cards": ["GTX 980", "GTX 970", "GTX 960", "GTX 950","GT 940", "GT 930", "GT 920"], "suffix": ["Ti"]},
    "GTX 700": {"cards": ["GTX 780", "GTX 770", "GTX 760", "GTX 750","GT 740", "GT 730", "GT 720", "GT 710"], "suffix": ["Ti"]},
    "GTX 600": {"cards": ["GTX 690", "GTX 680", "GTX 670", "GTX 660","GTX 650", "GT 640", "GT 630", "GT 620", "GT 610"], "suffix": ["Ti"]},
    "GTX 500": {"cards": ["GTX 590", "GTX 580", "GTX 570", "GTX 560", "GTX 550", "GT 520" ], "suffix": ["Ti", "Ti 448", "X2", "SE"]},
    "GTX 400": {"cards": ["GTX 480", "GTX 470", "GTX 465", "GTX 460", "GTS 450", "GT 440", "GT 430"], "suffix": []},
    "GTX 200": {"cards": ["GTX 295", "GTX 285", "GTX 280", "GTX 275", "GTX 270", "GTX 260", "GTS 250", "GT 220", "GeForce 210"], "suffix": []},
    "9000": {"cards": ["GeForce 9800", "GeForce 9600", "GeForce 9500", "GeForce 9400"], "suffix": ["GTX+","GTX","GT","GTS", "GS"]},
    "8000": {"cards": ["GeForce 8500", "GeForce 8400"], "suffix": []},
    "7000": {"cards": ["GeForce 7200"], "suffix": []},
    "5000": {"cards": ["FX 5200"], "suffix": []},
    "AXXX": {"cards": ["RTX A6000", "RTX A4000"], "suffix": []},
    "NVS": {"cards": ["NVS 810", "NVS 420","NVS 450","NVS 295",], "suffix": []},
    "Quadro K": {"cards": ["K5200", "K5000", "K4000", "K4200", "K2200", "K2000", "K1200", "K600", "K620", "K420"], "suffix": []},
    "Quadro M": {"cards": ["M6000", "M5000", "M4000", "M2000"], "suffix": []},
    "Quadro P": {"cards": ["P6000", "P5000", "P4000", "P2000", "P2000D", "P620", "P600", "P1000", "P2200", "P400"], "suffix": []},
    "Quadro RTX": {"cards": ["RTX 8000", "RTX 6000", "RTX 5000", "RTX 4000", ], "suffix": []},
    "Quadro": {"cards": ["Quadro 5000", "Quadro 6000", "Quadro 4000", "Quadro 400", "Quadro 410", "Quadro 2000D",], "suffix": []},
    "lookup-G<>100": {"cards": ["Quadro GV100", "Quadro GP100"], "suffix":[]},
    "FX 1000": {"cards": ["Quadro FX 1800"], "suffix":[]},
    
}
#for series, models in series_mapping.items():
#    for card in models["cards"]:
#        df.at[(df["chipset"].str.contains(card)) & (df["brand"]=="nvidia"), "series"] = series
#        df.at[(df["chipset"].str.contains(card)) & (df["brand"]=="nvidia"), "model"] = card
#        for suffix in models["suffix"]:
#            df.at[(df["chipset"].str.contains(card)) & (df["chipset"].str.endswith(suffix)) & (df["brand"]=="nvidia"), "model"] = f"{card}{suffix}"
        


df[(df["brand"]=="amd") & (df["model"]=="")]
#df[(df["mfg"]=="") ]

Unnamed: 0,name,chipset,price,url,brand,mfg,series,model
0,Sapphire 100352-4L,Radeon HD 7950,0,/product/N9V48d/sapphire-video-card-1003524l,amd,sapphire,,
5,Sapphire 11197-03-40G,Radeon HD 7970,0,/product/3pXfrH/sapphire-video-card-111970340g,amd,sapphire,,
7,Sapphire 11199-10-40G,Radeon HD 7870 GHz Edition,0,/product/73rG3C/sapphire-video-card-111991040g,amd,sapphire,,
8,Sapphire 11199-10-40G,Radeon HD 7870 GHz Edition,0,/product/9mbp99/sapphire-video-card-111991920g,amd,sapphire,,
9,Sapphire 11201-20-20G,Radeon HD 7770 GHz Edition,0,/product/8gV48d/sapphire-video-card-112012020g,amd,sapphire,,
11,MSI R7950-3GD5/OC BE,Radeon HD 7950,0,/product/TgkD4D/msi-video-card-r79503gd5ocbe,amd,msi,,
12,HIS H779FT1GD,Radeon HD 7790,0,/product/z3h9TW/his-video-card-h779ft1gd,amd,his,,
13,Sapphire Tri-X Toxic,Radeon R9 270X,0,/product/8rTmP6/sapphire-video-card-100364txsr,amd,sapphire,,
14,Gigabyte GV-R726XOC-2GD,Radeon R7 260X,0,/product/pHw323/gigabyte-video-card-gvr726xoc2gd,amd,gigabyte,,
15,MSI R7 260X 2GD5 OC,Radeon R7 260X,0,/product/KptCmG/msi-video-card-r7260x2gd5oc,amd,msi,,


In [None]:
items = df[df["brand"]=="nvidia"]["chipset"].unique()
items = sorted(items)
for item in items:
    print(item)

In [None]:
ends = [item.split(" ")[-1] for item in items]
for item in sorted(list(set(ends))):
    if not item.isnumeric():
        print(item)
        

In [None]:
Black
Boost
G5
G6
GP100
GS
GSO
GT
GTX+
K2000D
K4000M
LHR
RTX
SE
SUPER
Ti
Titan
V
X
X2
Xp
Z
