In [1]:
import sqlite3
import json
import pandas as pd

## gpus前處理

In [2]:
# 連接到使用者上傳的 gpus.db
gpus_conn = sqlite3.connect('gpus.db')

# 讀取資料
df = pd.read_sql_query("SELECT * FROM gpus", gpus_conn)
df

Unnamed: 0,id,name,score
0,1,NVIDIA RTX PRO 6000 Blackwell,15814
1,2,NVIDIA GeForce RTX 5090,14469
2,3,NVIDIA GeForce RTX 5090 D,14417
3,4,NVIDIA GeForce RTX 4090,9230
4,5,NVIDIA GeForce RTX 5080,8749
...,...,...,...
135,136,AMD Radeon Graphics (Granite Ridge),130
136,137,NVIDIA GeForce GTX 750 Ti,124
137,138,AMD Radeon Graphics (Raphael),121
138,139,NVIDIA GeForce GTX 1050,120


In [3]:
# 將 score 欄位轉為數值（有些可能是字串）
df['score'] = pd.to_numeric(df['score'], errors='coerce')

# 保留每個 name 的最高分數那筆資料
df_deduplicated = df.sort_values(by='score', ascending=False).drop_duplicates(subset='name', keep='first')

# 清空原始表並寫入去重後的資料
cursor = gpus_conn.cursor()
cursor.execute("DELETE FROM gpus")
gpus_conn.commit()

df_deduplicated.to_sql('gpus', gpus_conn, if_exists='append', index=False)
df_deduplicated

Unnamed: 0,id,name,score
0,1,NVIDIA RTX PRO 6000 Blackwell,15814
1,2,NVIDIA GeForce RTX 5090,14469
2,3,NVIDIA GeForce RTX 5090 D,14417
3,4,NVIDIA GeForce RTX 4090,9230
4,5,NVIDIA GeForce RTX 5080,8749
...,...,...,...
135,136,AMD Radeon Graphics (Granite Ridge),130
136,137,NVIDIA GeForce GTX 750 Ti,124
137,138,AMD Radeon Graphics (Raphael),121
138,139,NVIDIA GeForce GTX 1050,120


## gpu mapping表

In [4]:
# Step 1: Load GPU mapping checklist
with open("3 gpu_mapping_checklist.json", "r", encoding="utf-8") as f:
    mapping = json.load(f)
mapping

{'NVIDIA / AMD 顯示卡周邊配件': None,
 'NVIDIA / AMD 外接顯卡轉接盒 (需另購顯卡)': None,
 'NVIDIA GeForce 210': None,
 'NVIDIA GT710': None,
 'NVIDIA GT730': None,
 'NVIDIA GT1030': None,
 'NVIDIA GTX1050 3G': 'NVIDIA GeForce GTX 1050',
 'NVIDIA GTX1050Ti': 'NVIDIA GeForce GTX 1050 Ti',
 'NVIDIA GTX1060': 'NVIDIA GeForce GTX 1060',
 'NVIDIA GTX1650': 'NVIDIA GeForce GTX 1650',
 'NVIDIA GTX1650 SUPER (DDR6)': 'NVIDIA GeForce GTX 1650 SUPER',
 'NVIDIA GTX1660 (DDR5)': 'NVIDIA GeForce GTX 1660',
 'NVIDIA GTX1660 SUPER (DDR6)': 'NVIDIA GeForce GTX 1660 SUPER',
 'NVIDIA GTX1660Ti': 'NVIDIA GeForce GTX 1660 Ti',
 'NVIDIA RTX2060': 'NVIDIA GeForce RTX 2060',
 'NVIDIA RTX2060 SUPER': 'NVIDIA GeForce RTX 2060 SUPER',
 'NVIDIA RTX2070': 'NVIDIA GeForce RTX 2070',
 'NVIDIA RTX2070 SUPER': 'NVIDIA GeForce RTX 2070 SUPER',
 'NVIDIA RTX2080': 'NVIDIA GeForce RTX 2080',
 'NVIDIA RTX2080 SUPER': 'NVIDIA GeForce RTX 2080 SUPER',
 'NVIDIA RTX2080Ti': 'NVIDIA GeForce RTX 2080 Ti',
 'NVIDIA Quadro 專業繪圖卡 (歡迎議價)': 'NVIDIA Qua

## VGA前處理

In [6]:
# Step 2: Connect to vga.db and update schema
vga_conn = sqlite3.connect("vga.db")
vga_cursor = vga_conn.cursor()

In [8]:
# Step 3: Update pure_chipset column based on mapping
vga_cursor.execute("SELECT rowid, chipset FROM vga")
rows = vga_cursor.fetchall()

In [9]:
for rowid, chipset in rows:
    pure_chipset = mapping.get(chipset)
    vga_cursor.execute(
        "UPDATE vga SET pure_chipset = ? WHERE rowid = ?", (pure_chipset, rowid)
    )

vga_conn.commit()

In [10]:
gpus_df = pd.read_sql_query("SELECT name, score FROM gpus", gpus_conn)
gpus_df

Unnamed: 0,name,score
0,NVIDIA RTX PRO 6000 Blackwell,15814
1,NVIDIA GeForce RTX 5090,14469
2,NVIDIA GeForce RTX 5090 D,14417
3,NVIDIA GeForce RTX 4090,9230
4,NVIDIA GeForce RTX 5080,8749
...,...,...
128,AMD Radeon Graphics (Granite Ridge),130
129,NVIDIA GeForce GTX 750 Ti,124
130,AMD Radeon Graphics (Raphael),121
131,NVIDIA GeForce GTX 1050,120


In [11]:
# Step 5: Update score column in vga table
for _, row in gpus_df.iterrows():
    name, score = row["name"], row["score"]
    vga_cursor.execute("UPDATE vga SET score = ? WHERE pure_chipset = ?", (score, name))

vga_conn.commit()

In [12]:
# Compute CP = score / price
vga_cursor.execute(
    """
    UPDATE vga
    SET CP = CASE
        WHEN score IS NOT NULL AND price IS NOT NULL AND price != 0 THEN CAST(score AS REAL) / price
        ELSE NULL
    END
"""
)
vga_conn.commit()

In [13]:
# Step 6: Output updated vga table
vga_updated_df = pd.read_sql_query("SELECT * FROM vga", vga_conn)
vga_updated_df

Unnamed: 0,date,chipset,product,price,pure_chipset,score,CP
0,20200105,NVIDIA / AMD 顯示卡周邊配件,酷碼 VGA Holder 顯卡用支架 千斤頂顯卡支撐架/(0005-KUH00)*任搭顯卡價,369,,,
1,20200105,NVIDIA / AMD 顯示卡周邊配件,酷碼 ELV8 A.RGB 顯卡支撐架(MAZ-IMGB-N30NA-R1),790,,,
2,20200105,NVIDIA / AMD 顯示卡周邊配件,NVIDIA GEFORCE RTX NVLINK BRIDGE 3-SLOT(間隔 60m...,2790,,,
3,20200105,NVIDIA / AMD 顯示卡周邊配件,NVIDIA GEFORCE RTX NVLINK BRIDGE 4-SLOT(間隔 80m...,2790,,,
4,20200105,NVIDIA / AMD 顯示卡周邊配件,華碩 ROG-NVLINK 4 SLOT橋接器(間隔 80mm/RTX 2080 2080T...,2990,,,
...,...,...,...,...,...,...,...
97385,20250705,AMD Radeon RX9070GRE-12G,[擁抱雙A -顯卡專案] 藍寶石 極地 PURE RX9070GRE GAMING OC 1...,16990,,,
97386,20250705,AMD Radeon RX9070-16G,華擎 RX9070 Challenger 16GB(2520MHz/29cm/三風扇/五年保...,24390,AMD Radeon RX 9070,6283.0,0.257606
97387,20250705,AMD Radeon RX9070-16G,撼訊 RX9070 16G-L/OC 暗黑犬(2590MHz/34cm/三風扇/註冊四年保)...,21990,AMD Radeon RX 9070,6283.0,0.285721
97388,20250705,AMD Radeon RX9070XT-16G,華擎 RX9070XT Taichi 16GB OC(3100MHz/33cm/三風扇) 限購一片,29990,AMD Radeon RX 9070 XT,7251.0,0.241781


In [14]:
vga_df = pd.read_sql_query("SELECT * FROM vga", vga_conn)

In [15]:
# Define extended exclusion keywords for chipset
chipset_exclude_keywords = [
    'AMD 工作站繪圖卡 (客訂交貨.歡迎議價)',
    'NVIDIA / AMD 外接顯卡轉接盒 (需另購顯卡)',
    'NVIDIA / AMD 顯示卡周邊配件',
    'NVIDIA Quadro 專業繪圖卡 (歡迎議價)',
    'NVIDIA / AMD 外接式顯卡轉接盒',
    'NVIDIA Quadro 專業繪圖卡',
    'NVIDIA 外接式顯卡轉接盒',
    'AMD 工作站繪圖卡'
]

# Re-apply filtering
product_exclude_keywords = [
    '贈', '抽', '送', '加購', '登錄', '活動', '限量', '現省',
    '現折', '現賺', '再加', '加送', '加價購', '送ROG', '延長線'
]

In [16]:
product_mask = ~vga_df['product'].astype(str).apply(
    lambda p: any(keyword in p for keyword in product_exclude_keywords)
)
chipset_mask = ~vga_df['chipset'].astype(str).apply(
    lambda c: any(keyword in c for keyword in chipset_exclude_keywords)
)

In [17]:
# Apply filter and sort
filtered_df = vga_df[product_mask & chipset_mask].sort_values(by='CP', ascending=False)
filtered_df

Unnamed: 0,date,chipset,product,price,pure_chipset,score,CP
45328,20210823,NVIDIA RTX3090,❤ 華碩 TUF-RTX3090-24G-GAMING(1725MHz/30cm/三風扇) ...,5990,NVIDIA GeForce RTX 3090,5114.0,0.853756
96664,20250630,INTEL Arc B580 顯示卡,Acer Nitro Intel ARC B580 OC 12GB(2740MHz/27cm...,8490,Intel Arc B580,3062.0,0.360660
97151,20250705,INTEL Arc B580 顯示卡,Acer Nitro Intel ARC B580 OC 12GB(2740MHz/27cm...,8490,Intel Arc B580,3062.0,0.360660
96424,20250628,INTEL Arc B580 顯示卡,Acer Nitro Intel ARC B580 OC 12GB(2740MHz/27cm...,8490,Intel Arc B580,3062.0,0.360660
96195,20250625,INTEL Arc B580 顯示卡,Acer Nitro Intel ARC B580 OC 12GB(2740MHz/27cm...,8490,Intel Arc B580,3062.0,0.360660
...,...,...,...,...,...,...,...
97381,20250705,AMD Radeon RX9070GRE-12G,[擁抱雙A專案] 華擎 RX9070GRE 需搭 華擎Z890/X870(Steel Leg...,15990,,,
97382,20250705,AMD Radeon RX9070GRE-12G,藍寶石 脈動 PULSE RX9070GRE GAMING 12GB(2790MHz/28c...,16990,,,
97383,20250705,AMD Radeon RX9070GRE-12G,[擁抱雙A -顯卡專案] 藍寶石 脈動 PULSE RX9070GRE GAMING 12G...,15990,,,
97384,20250705,AMD Radeon RX9070GRE-12G,藍寶石 極地 PURE RX9070GRE GAMING OC 12GB(2920MHz/3...,17990,,,


In [18]:
filtered_df = filtered_df[
    filtered_df['pure_chipset'].notnull() & 
    (filtered_df['pure_chipset'].str.strip() != '')
]

filtered_df = filtered_df[
    filtered_df['score'].notnull() & 
    (filtered_df['score'] != '')
]

In [19]:
filtered_df

Unnamed: 0,date,chipset,product,price,pure_chipset,score,CP
45328,20210823,NVIDIA RTX3090,❤ 華碩 TUF-RTX3090-24G-GAMING(1725MHz/30cm/三風扇) ...,5990,NVIDIA GeForce RTX 3090,5114.0,0.853756
96664,20250630,INTEL Arc B580 顯示卡,Acer Nitro Intel ARC B580 OC 12GB(2740MHz/27cm...,8490,Intel Arc B580,3062.0,0.360660
97151,20250705,INTEL Arc B580 顯示卡,Acer Nitro Intel ARC B580 OC 12GB(2740MHz/27cm...,8490,Intel Arc B580,3062.0,0.360660
96424,20250628,INTEL Arc B580 顯示卡,Acer Nitro Intel ARC B580 OC 12GB(2740MHz/27cm...,8490,Intel Arc B580,3062.0,0.360660
96195,20250625,INTEL Arc B580 顯示卡,Acer Nitro Intel ARC B580 OC 12GB(2740MHz/27cm...,8490,Intel Arc B580,3062.0,0.360660
...,...,...,...,...,...,...,...
55581,20220327,NVIDIA GTX1650 (DDR6),技嘉 GTX1650 D6 4G(1590MHz/17.2cm/單風扇) *參考價 $ 6390,99999,NVIDIA GeForce GTX 1650,321.0,0.003210
57785,20220412,NVIDIA GTX1650 (DDR6),技嘉 GTX1650 D6 4G(1590MHz/17.2cm/單風扇) *參考價 $ 6390,99999,NVIDIA GeForce GTX 1650,321.0,0.003210
57048,20220407,NVIDIA GTX1650 (DDR6),技嘉 GTX1650 D6 4G(1590MHz/17.2cm/單風扇) *參考價 $ 6390,99999,NVIDIA GeForce GTX 1650,321.0,0.003210
55943,20220329,NVIDIA GTX1650 (DDR6),技嘉 GTX1650 D6 4G(1590MHz/17.2cm/單風扇) *參考價 $ 6390,99999,NVIDIA GeForce GTX 1650,321.0,0.003210


In [20]:
# Save to new filtered_df.db
new_conn = sqlite3.connect("filtered_df.db")
filtered_df.to_sql("filtered_df", new_conn, if_exists="replace", index=False)
new_conn.commit()
new_conn.close()