In [2]:
import sqlite3
import json
import pandas as pd

## gpus前處理

In [3]:
# 連接到使用者上傳的 gpus.db
gpus_conn = sqlite3.connect('gpus.db')

# 讀取資料
df = pd.read_sql_query("SELECT * FROM gpus", gpus_conn)
df

Unnamed: 0,id,name,score
0,1,NVIDIA RTX PRO 6000 Blackwell,15814
1,2,NVIDIA GeForce RTX 5090,14469
2,3,NVIDIA GeForce RTX 5090 D,14417
3,4,NVIDIA GeForce RTX 4090,9230
4,5,NVIDIA GeForce RTX 5080,8749
...,...,...,...
269,277,AMD Radeon Graphics (Granite Ridge),129
270,278,NVIDIA GeForce GTX 750 Ti,123
271,279,AMD Radeon Graphics (Raphael),122
272,280,NVIDIA GeForce GTX 1050,120


In [4]:
# 將 score 欄位轉為數值（有些可能是字串）
df['score'] = pd.to_numeric(df['score'], errors='coerce')

# 保留每個 name 的最高分數那筆資料
df_deduplicated = df.sort_values(by='score', ascending=False).drop_duplicates(subset='name', keep='first')

# 清空原始表並寫入去重後的資料
cursor = gpus_conn.cursor()
cursor.execute("DELETE FROM gpus")
gpus_conn.commit()

df_deduplicated.to_sql('gpus', gpus_conn, if_exists='append', index=False)
df_deduplicated

Unnamed: 0,id,name,score
0,1,NVIDIA RTX PRO 6000 Blackwell,15814
134,142,NVIDIA GeForce RTX 5090,14480
135,143,NVIDIA GeForce RTX 5090 D,14425
136,144,NVIDIA GeForce RTX 4090,9236
137,145,NVIDIA GeForce RTX 5080,8762
...,...,...,...
128,136,AMD Radeon Graphics (Granite Ridge),130
129,137,NVIDIA GeForce GTX 750 Ti,124
271,279,AMD Radeon Graphics (Raphael),122
272,280,NVIDIA GeForce GTX 1050,120


## gpu mapping表

In [5]:
# Step 1: Load GPU mapping checklist
with open("3 gpu_mapping_checklist.json", "r", encoding="utf-8") as f:
    mapping = json.load(f)
mapping

{'AMD R7 240 系列': None,
 'AMD RX550': 'AMD Radeon RX 550',
 'AMD RX5500XT': 'AMD Radeon RX 5500 XT',
 'AMD RX560': None,
 'AMD RX5600XT': 'AMD Radeon RX 5600 XT',
 'AMD RX570': 'AMD Radeon RX 570',
 'AMD RX5700': 'AMD Radeon RX 5700',
 'AMD RX5700XT': 'AMD Radeon RX 5700 XT',
 'AMD RX580': 'AMD Radeon RX 580',
 'AMD RX590': 'AMD Radeon RX 590',
 'AMD RX6400': 'AMD Radeon RX 6400',
 'AMD RX6500XT': 'AMD Radeon RX 6500 XT',
 'AMD RX6600': 'AMD Radeon RX 6600',
 'AMD RX6600XT': 'AMD Radeon RX 6600 XT',
 'AMD RX6650XT': 'AMD Radeon RX 6650 XT',
 'AMD RX6700XT': 'AMD Radeon RX 6700 XT',
 'AMD RX6750XT': 'AMD Radeon RX 6750 XT',
 'AMD RX6800': 'AMD Radeon RX 6800',
 'AMD RX6800 首批公版卡': 'AMD Radeon RX 6800',
 'AMD RX6800XT': 'AMD Radeon RX 6800 XT',
 'AMD RX6800XT 首批公版卡': 'AMD Radeon RX 6800 XT',
 'AMD RX6900XT': 'AMD Radeon RX 6900 XT',
 'AMD RX6950XT': 'AMD Radeon RX 6950 XT',
 'AMD Radeon 550 系列': 'AMD Radeon RX 550',
 'AMD Radeon RX 550 系列': 'AMD Radeon RX 550',
 'AMD Radeon RX7600-8G': '

## VGA前處理

In [6]:
# Step 2: Connect to vga.db and update schema
vga_conn = sqlite3.connect("vga.db")
vga_cursor = vga_conn.cursor()

In [7]:
# Step 3: Update pure_chipset column based on mapping
vga_cursor.execute("SELECT rowid, chipset FROM vga")
rows = vga_cursor.fetchall()

In [8]:
for rowid, chipset in rows:
    pure_chipset = mapping.get(chipset)
    vga_cursor.execute(
        "UPDATE vga SET pure_chipset = ? WHERE rowid = ?", (pure_chipset, rowid)
    )

vga_conn.commit()

In [9]:
gpus_df = pd.read_sql_query("SELECT name, score FROM gpus", gpus_conn)
gpus_df

Unnamed: 0,name,score
0,NVIDIA RTX PRO 6000 Blackwell,15814
1,AMD Radeon RX 9070 XT,7251
2,AMD Radeon RX 7900 XTX,6838
3,AMD Radeon RX 9070,6283
4,AMD Radeon RX 7900 XT,5618
...,...,...
130,NVIDIA GeForce GTX 1650,327
131,AMD Radeon RX 6500 XT,249
132,AMD Radeon Vega 7 (Cezanne),225
133,AMD Radeon Graphics (Raphael),122


In [10]:
# Step 5: Update score column in vga table
for _, row in gpus_df.iterrows():
    name, score = row["name"], row["score"]
    vga_cursor.execute("UPDATE vga SET score = ? WHERE pure_chipset = ?", (score, name))

vga_conn.commit()

In [11]:
# Compute CP = score / price
vga_cursor.execute(
    """
    UPDATE vga
    SET CP = CASE
        WHEN score IS NOT NULL AND price IS NOT NULL AND price != 0 THEN CAST(score AS REAL) / price
        ELSE NULL
    END
"""
)
vga_conn.commit()

In [12]:
# 查看刪除前的總筆數
vga_cursor.execute("SELECT COUNT(*) FROM vga")
print(f"刪除前總筆數: {vga_cursor.fetchone()[0]}")

# 刪除 pure_chipset 為 NULL 的資料
vga_cursor.execute("DELETE FROM vga WHERE pure_chipset IS NULL")
vga_conn.commit()

# 查看刪除後的總筆數
vga_cursor.execute("SELECT COUNT(*) FROM vga")
print(f"刪除後總筆數: {vga_cursor.fetchone()[0]}")

刪除前總筆數: 101558
刪除後總筆數: 83203


In [13]:
# Step 6: Output updated vga table
vga_updated_df = pd.read_sql_query("SELECT * FROM vga", vga_conn)
vga_updated_df

Unnamed: 0,date,chipset,product,price,pure_chipset,score,CP
0,20200105,NVIDIA GTX1050 3G,微星 GTX1050 3GT OC飆風(1582MHz/雙/21.5cm/註四年),4390,NVIDIA GeForce GTX 1050,120.0,0.027335
1,20200105,NVIDIA GTX1050 3G,EVGA GTX1050 3GB SC GAMING(1455MHz/單風/14.5cm/6...,4290,NVIDIA GeForce GTX 1050,120.0,0.027972
2,20200105,NVIDIA GTX1050Ti,華碩 PH-GTX1050TI-4G鳳凰版(1392MHz/19.2cm/單風扇/註四年),3990,NVIDIA GeForce GTX 1050 Ti,305.0,0.076441
3,20200105,NVIDIA GTX1050Ti,微星 GTX1050Ti AERO 4G OC(1455MHz/15.5cm/單風扇註四年),3990,NVIDIA GeForce GTX 1050 Ti,305.0,0.076441
4,20200105,NVIDIA GTX1050Ti,技嘉 GTX1050Ti D5 4G(1392MHz/17.2cm/單風扇/註四年),3990,NVIDIA GeForce GTX 1050 Ti,305.0,0.076441
...,...,...,...,...,...,...,...
83198,20251118,AMD Radeon RX9070XT-16G,撼訊 RX9070XT 16G-A 遊蕩者(2970MHz/31cm/三風扇/三年保)*MI...,22990,AMD Radeon RX 9070,6283.0,0.273293
83199,20251118,AMD Radeon RX9070XT-16G,撼訊 RX9070XT 16G-L/OC 暗黑犬(3010MHz/34cm/三風扇/註四年)...,24990,AMD Radeon RX 9070,6283.0,0.251421
83200,20251118,AMD Radeon RX9070XT-16G,撼訊 RX9070XT 16G-L/OC/WHITE 暗黑犬(白)(3010MHz/34cm...,25490,AMD Radeon RX 9070,6283.0,0.246489
83201,20251118,AMD Radeon RX9070XT-16G,撼訊 RX9070XT 16G-L/OC/REVA 夜嵐紀念版(3010MHz/35cm/三...,25490,AMD Radeon RX 9070,6283.0,0.246489


In [14]:
vga_df = pd.read_sql_query("SELECT * FROM vga", vga_conn)

In [15]:
# Define extended exclusion keywords for chipset
chipset_exclude_keywords = [
    'AMD 工作站繪圖卡 (客訂交貨.歡迎議價)',
    'NVIDIA / AMD 外接顯卡轉接盒 (需另購顯卡)',
    'NVIDIA / AMD 顯示卡周邊配件',
    'NVIDIA Quadro 專業繪圖卡 (歡迎議價)',
    'NVIDIA / AMD 外接式顯卡轉接盒',
    'NVIDIA Quadro 專業繪圖卡',
    'NVIDIA 外接式顯卡轉接盒',
    'AMD 工作站繪圖卡'
]

# Re-apply filtering
product_exclude_keywords = [
    '贈', '抽', '送', '加購', '登錄', '活動', '限量', '現省',
    '現折', '現賺', '再加', '加送', '加價購', '送ROG', '延長線'
]

In [16]:
product_mask = ~vga_df['product'].astype(str).apply(
    lambda p: any(keyword in p for keyword in product_exclude_keywords)
)
chipset_mask = ~vga_df['chipset'].astype(str).apply(
    lambda c: any(keyword in c for keyword in chipset_exclude_keywords)
)

In [17]:
# Apply filter and sort
filtered_df = vga_df[product_mask & chipset_mask].sort_values(by='CP', ascending=False)
filtered_df

Unnamed: 0,date,chipset,product,price,pure_chipset,score,CP
80285,20250815,NVIDIA RTX5070-12GB(GDDR7),[合購] INNO3D RTX5070 TWIN X2 OC 合購 金士頓 KC3000 5...,888,NVIDIA GeForce RTX 5070,5256.0,5.918919
80529,20250817,NVIDIA RTX5070-12GB(GDDR7),[合購] INNO3D RTX5070 TWIN X2 OC 合購 金士頓 KC3000 5...,888,NVIDIA GeForce RTX 5070,5256.0,5.918919
80040,20250808,NVIDIA RTX5070-12GB(GDDR7),[合購] INNO3D RTX5070 TWIN X2 OC 合購 金士頓 KC3000 5...,888,NVIDIA GeForce RTX 5070,5256.0,5.918919
81200,20250917,NVIDIA RTX5050-8GB(GDDR6),[紅包500] 買技嘉 RTX5050顯卡+購 GP-P650G(650W)金牌電源(原價2...,1990,NVIDIA GeForce RTX 5050,2317.0,1.164322
81487,20250924,NVIDIA RTX5050-8GB(GDDR6),[紅包500] 買技嘉 RTX5050顯卡+購 GP-P650G(650W)金牌電源(原價2...,1990,NVIDIA GeForce RTX 5050,2317.0,1.164322
...,...,...,...,...,...,...,...
83137,20251118,NVIDIA 專業繪圖卡,麗臺 NVIDIA RTX 6000 Ada(48GB GDDR6 ECC/26.67cm/...,299000,NVIDIA Quadro RTX A6000,,
83138,20251118,NVIDIA 專業繪圖卡,麗臺 NVIDIA RTX A400(4G GDDR6 64bit/CUDA:768/16....,6400,NVIDIA Quadro RTX A6000,,
83139,20251118,NVIDIA 專業繪圖卡,麗臺 NVIDIA RTX A1000(8G GDDR6 128bit/CUDA:2304/...,15300,NVIDIA Quadro RTX A6000,,
83140,20251118,NVIDIA 專業繪圖卡,麗臺 NVIDIA RTX PRO 6000 Blackwell Max-Q 96GB GD...,329000,NVIDIA Quadro RTX A6000,,


In [18]:
filtered_df = filtered_df[
    filtered_df['pure_chipset'].notnull() & 
    (filtered_df['pure_chipset'].str.strip() != '')
]

filtered_df = filtered_df[
    filtered_df['score'].notnull() & 
    (filtered_df['score'] != '')
]

In [19]:
filtered_df

Unnamed: 0,date,chipset,product,price,pure_chipset,score,CP
80285,20250815,NVIDIA RTX5070-12GB(GDDR7),[合購] INNO3D RTX5070 TWIN X2 OC 合購 金士頓 KC3000 5...,888,NVIDIA GeForce RTX 5070,5256.0,5.918919
80529,20250817,NVIDIA RTX5070-12GB(GDDR7),[合購] INNO3D RTX5070 TWIN X2 OC 合購 金士頓 KC3000 5...,888,NVIDIA GeForce RTX 5070,5256.0,5.918919
80040,20250808,NVIDIA RTX5070-12GB(GDDR7),[合購] INNO3D RTX5070 TWIN X2 OC 合購 金士頓 KC3000 5...,888,NVIDIA GeForce RTX 5070,5256.0,5.918919
81200,20250917,NVIDIA RTX5050-8GB(GDDR6),[紅包500] 買技嘉 RTX5050顯卡+購 GP-P650G(650W)金牌電源(原價2...,1990,NVIDIA GeForce RTX 5050,2317.0,1.164322
81487,20250924,NVIDIA RTX5050-8GB(GDDR6),[紅包500] 買技嘉 RTX5050顯卡+購 GP-P650G(650W)金牌電源(原價2...,1990,NVIDIA GeForce RTX 5050,2317.0,1.164322
...,...,...,...,...,...,...,...
45287,20220404,NVIDIA GTX1650 (DDR6),技嘉 GTX1650 D6 4G(1590MHz/17.2cm/單風扇) *參考價 $ 6390,99999,NVIDIA GeForce GTX 1650,327.0,0.003270
44956,20220329,NVIDIA GTX1650 (DDR6),技嘉 GTX1650 D6 4G(1590MHz/17.2cm/單風扇) *參考價 $ 6390,99999,NVIDIA GeForce GTX 1650,327.0,0.003270
45622,20220406,NVIDIA GTX1650 (DDR6),技嘉 GTX1650 D6 4G(1590MHz/17.2cm/單風扇) *參考價 $ 6390,99999,NVIDIA GeForce GTX 1650,327.0,0.003270
44629,20220327,NVIDIA GTX1650 (DDR6),技嘉 GTX1650 D6 4G(1590MHz/17.2cm/單風扇) *參考價 $ 6390,99999,NVIDIA GeForce GTX 1650,327.0,0.003270


In [20]:
# Save to new filtered_df.db
new_conn = sqlite3.connect("filtered_df.db")
filtered_df.to_sql("filtered_df", new_conn, if_exists="replace", index=False)
new_conn.commit()
new_conn.close()