In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import pickle

# 載入先前訓練好的 LabelEncoder 和 MinMaxScaler 模型
with open("label_encoders.pickle", "rb") as f:
    label_encoders = pickle.load(f)

with open("minmax_scaler.pkl", "rb") as f:
    scaler = pickle.load(f)



In [None]:
# 讀取CSV資料
df = pd.read_csv("sample.csv")

# 去除特徵
features_to_keep = [
    "WHOLE_TIME",
    "CITY",
    "LIGHT",
    "Temperature",
    "WS",
    "RH",
    "Precp",
    "ROAD_TYPE_SUB1",
    "SIGNAL_TYPE",
    "VEHICLE_MAIN",
    "OBJ_GENDER",
    "OBJ_AGE",
    "CAMERA_ID",
    "EQUIP_TYPE",
    "ACCIDENT_TYPE"
]



In [None]:

# 使用 df.drop() 將其餘特徵都 drop 掉
df = df.drop(columns=[col for col in df.columns if col not in features_to_keep])

# df["SPEED_LIMIT"] = df["SPEED_LIMIT"].astype(int)
df[["Temperature", "RH", "WS", "Precp"]] = df[
    ["Temperature", "RH", "WS", "Precp"]
].replace(["/", "X", "...", "V", "&", "T"], np.nan)
df[["Temperature", "RH", "WS", "Precp"]] = df[
    ["Temperature", "RH", "WS", "Precp"]
].apply(pd.to_numeric, errors="coerce")
df["CAMERA_ID"] = df["CAMERA_ID"].notna().astype(int)

df["CAMERA_ID"] = df["CAMERA_ID"].fillna(0)
df["CAMERA_ID"] = df["CAMERA_ID"].map({0: "無", 1: "有"})
df["EQUIP_TYPE"] = df["EQUIP_TYPE"].fillna("無")
# df["ACCIDENT_TYPE"] = df["ACCIDENT_TYPE"].map({"A1": 1, "A2": 0})
df["OBJ_GENDER"] = df["OBJ_GENDER"].map({"男": 1, "女": 0})



In [None]:

# 將資料中的"\n"替換為NaN
df.replace("\n", np.nan, inplace=True)

# 刪除包含空值的列
df.dropna(inplace=True)

# 轉換資料
for column in df.select_dtypes(include="object"):
    df[column] = label_encoders[column].transform(df[column])

# 使用MinMaxScaler對數值特徵進行最小-最大標準化
df_numerical = df.select_dtypes(include=["int64", "float64"])
df_numerical.fillna(df_numerical.mean(), inplace=True)

# 在使用 MinMaxScaler 之前，記錄特徵的順序
scaler.fit(df_numerical)

# 將特徵進行最小-最大標準化並轉換資料
df_numerical_scaled = scaler.transform(df_numerical)
df[df_numerical.columns] = df_numerical_scaled

# 定義新的特徵名稱順序
new_feature_order = ['WHOLE_TIME', 'CITY', 'LIGHT', 'Temperature', 'WS', 'RH', 'Precp',
       'ROAD_TYPE_SUB1', 'SIGNAL_TYPE', 'VEHICLE_MAIN', 'OBJ_GENDER',
       'OBJ_AGE', 'CAMERA_ID', 'EQUIP_TYPE'] 



In [None]:
# 使用 reindex 將 DataFrame 重新排序
df = df.reindex(columns=new_feature_order)

# 切割出 X_test，並在測試資料集中只保留訓練時使用的特徵
X_test = df[new_feature_order]

# 在測試資料集中只保留訓練時使用的特徵
X_test = X_test[new_feature_order]



In [None]:
# 載入模型
with open("logistic_regression_model.pkl", "rb") as f:
    model = pickle.load(f)

# 使用模型進行預測
y_prob = model.predict_proba(X_test)[:, 1]  # 生成機率值

# 將預測機率添加回 DataFrame
df["Probability"] = y_prob

# 顯示結果
print(df[["Probability"]])  # 只顯示生成的機率

# 將 DataFrame 儲存成 CSV 檔案
df.to_csv("predicted_results.csv", index=False)