In [None]:
import pandas as pd
from zipfile import ZipFile
from transformers import AutoTokenizer, BertForSequenceClassification
import torch
from torch.nn.functional import softmax

### 0. 上傳要檢查的 csv 檔 sms.csv ###
###    上傳 train_name 產出的模型 zip ###
# === 1. 解壓縮模型 zip（可略過若已解壓）
# with ZipFile("/content/model_results.zip", 'r') as zip_ref:
    # zip_ref.extractall("results")

# === 2. 載入模型與 tokenizer
dir = "./results/final"
tokenizer = AutoTokenizer.from_pretrained(dir)
model = BertForSequenceClassification.from_pretrained(dir)
model.eval()

# === 3. 讀取 CSV 檔案（包含簡訊文字欄位）
df = pd.read_csv("已驗證0709/naming_0_3w.csv")  # ✅ 你要預測的簡訊 CSV 檔案
assert "text" in df.columns, "⚠️ 請確認你的 CSV 包含 text 欄位"

# === 4. 定義預測函式（回傳類別與類別1機率%）
def predict_person_name_with_prob(text):
    if not isinstance(text, str) or text.strip() == "":
        return pd.Series([0, 0.0])  # 預設不含分類、機率為 0%

    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=128
    )

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        probs = softmax(logits, dim=1).squeeze().tolist()  # 轉為機率
        predicted_label = torch.argmax(logits, dim=1).item()
        prob_person_name = round(probs[1] * 100, 2)  # 類別 1 的機率 (%)

    return pd.Series([predicted_label, prob_person_name])

# === 5. 套用到整個欄位，新增兩欄
df[["has_person_name", "prob_person_name_percent"]] = df["text"].apply(predict_person_name_with_prob)

# === 6. 輸出結果到新的 CSV 檔
df.to_csv("sms_with_name_predictions.csv", index=False)
print("✅ 預測完成，已輸出為 sms_with_name_predictions.csv")


# # 📌 測試範例
# test_samples = [
#     "👧",
#     "沈嘉儀明天見，記得帶毛巾、水、補給品。",
#     "預-Niseko羊蹄山杯送達六張黎店，限量編號A029205310請前往取貨，可至全家APP首頁>包裹查詢",
#     "【大樹速貸】最快5分鐘資金入袋!親愛的王先生，國泰世華銀行幫您馬上解決資金煩惱，數位申貸超簡單。資金快速入袋 https://cathaybk.tw/C3ACZ78YJ 。總費用年百分率3.62%~17.09%。",
# ]

# print("\n📊 預測結果：")
# for text in test_samples:
#     print(f"👉 『{text}』 → {predict_person_name(text)}")


# # 先找出 text 是空字串或空白的 row
# empty_or_null_mask = df["text"].isna()
# empty_text_ids = df[empty_or_null_mask]["Id"]

# # 印出來
# for eid in empty_text_ids:
#     print(f"Empty text at ID: {eid}")

✅ 預測完成，已輸出為 sms_with_name_predictions.csv
