In [None]:
import joblib
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_predict, StratifiedKFold
from sklearn.svm import SVC  # SVMを使用するために必要なライブラリをインポート
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV

#Read CSV
df = pd.read_csv("CSV PATH")
# ラベル情報をTargetで代入
y = df["label"]

# Feature list
X = df.loc[:, ['age', 'sex', 'T', 'DOI', 'RL', 'ly', 'v01', 'pn01', 'CLAM_score']]

# SVM
svm_model = SVC(probability=True, random_state=0)  # probability=Trueで確率予測を有効にする

# Grid search
param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001], 'kernel': ['rbf']} 

# 10-Fold Cross Validation
cv = StratifiedKFold(n_splits=10, random_state=0, shuffle=True)

# AUC、F1、Accuracy
auc_scores = []
f1_scores = []
accuracy_scores = []


fold_models = []

# each fold
for fold, (train_idx, test_idx) in enumerate(cv.split(X, y), start=1):
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    grid = GridSearchCV(svm_model, param_grid, refit=True, verbose=2)
    grid.fit(X_train,y_train)
    
    svm_model = grid.best_estimator_
    
    y_pred_prob = svm_model.predict_proba(X_test)[:, 1]

    auc = roc_auc_score(y_test, y_pred_prob)
    auc_scores.append(auc)

    y_pred = svm_model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    f1_scores.append(f1)

    accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)

    fold_models.append(svm_model)

    # Save
    joblib.dump(svm_model, f'fold_{fold}_model.pkl')

# AUC、F1、Accuracy Mean
print("Mean AUC:", np.mean(auc_scores))
print("Mean F1 Score:", np.mean(f1_scores))
print("Mean Accuracy:", np.mean(accuracy_scores))

# SVMモデルは特徴の重要度を持たないため、特徴の重要度の計算と可視化は不要です。


In [7]:
#Save result
import csv

with open('auc_accuracy_results.csv', 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    csvwriter.writerow(['Fold', 'AUC', 'Accuracy'])
    for fold, (auc, accuracy) in enumerate(zip(auc_scores, accuracy_scores), start=1):
        csvwriter.writerow([fold, auc, accuracy])
        
import joblib

joblib.dump(svm_model, 'final_model.pkl')

['final_model.pkl']

In [None]:
#TEST CSV

import joblib
import pandas as pd
from sklearn.metrics import roc_auc_score, accuracy_score

# Read Dataset
new_df = pd.read_csv("Test CSV PAth")
#new_df = pd.read_csv(r"C:\Users\madac\Desktop\Medical AI Training\CLAM\Tongue LN prediction\Test_109_MM_clinipath8fac_withCLAMscore.csv")

# Feature list
X_new = new_df.loc[:, [  'age', 'sex', 'T', 'DOI', 'RL', 'ly', 'v01', 'pn01', 'CLAM_score']]


y_true = new_df['label']

auc_scores = []
accuracy_scores = []

results = []

# Each fold test
for fold in range(1, 11):  
    loaded_model = joblib.load(f'fold_{fold}_model.pkl')
    y_pred_prob = loaded_model.predict_proba(X_new)[:, 1]
    
    # AUC
    auc = roc_auc_score(y_true, y_pred_prob)
    auc_scores.append(auc)

    # Pred label
    y_pred = (y_pred_prob >= 0.5).astype(int)

    # Accuracy
    accuracy = accuracy_score(y_true, y_pred)
    accuracy_scores.append(accuracy)
    
    fold_result = {
        'Fold': fold,
        'AUC': auc,
        'Accuracy': accuracy
    }
    results.append(fold_result)
    
for fold in range(10):
    print(f"Fold {fold + 1} - AUC: {auc_scores[fold]}, Accuracy: {accuracy_scores[fold]}")

print("Mean AUC:", sum(auc_scores) / len(auc_scores))
print("Mean Accuracy:", sum(accuracy_scores) / len(accuracy_scores))

results_df = pd.DataFrame(results)

results_df.to_csv('Test_results.csv', index=False)