In [None]:
##gridsearch cv
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, train_test_split
import numpy as np

# 定义参数范围
param_grid = {'C': [0.1, 1, 10, 100],  # C 参数的候选值
              'gamma': [0.01, 0.1, 1, 10]}  # gamma 参数的候选值

# 创建 SVM 模型
svmModel = SVC(kernel='rbf')

# 创建 GridSearchCV 对象
grid_search = GridSearchCV(svmModel, param_grid, cv=5)

# 切分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X_data, Y_data, test_size=0.2, random_state=1)

# 在训练集上进行网格搜索和交叉验证
grid_search.fit(X_train, y_train)

# 打印最佳参数组合和最佳得分
print("Best parameters:", grid_search.best_params_)
print("Best cross-validation score:", grid_search.best_score_)

# 在测试集上评估最佳模型
best_model = grid_search.best_estimator_
test_score = best_model.score(X_test, y_test)
print("Test set score with best model:", test_score)


In [None]:
##gridsearch cv 加上繪圖
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, train_test_split
import numpy as np
import matplotlib.pyplot as plt

# 定义参数范围
param_grid = {'C': [0.1, 1, 10, 100],  # C 参数的候选值
              'gamma': [0.01, 0.1, 1, 10]}  # gamma 参数的候选值

# 创建 SVM 模型
svmModel = SVC(kernel='rbf')

# 创建 GridSearchCV 对象
grid_search = GridSearchCV(svmModel, param_grid, cv=5)

# 切分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X_data, Y_data, test_size=0.2, random_state=1)

# 在训练集上进行网格搜索和交叉验证
grid_search.fit(X_train, y_train)

# 提取网格搜索结果
results = grid_search.cv_results_

# 提取训练和测试的得分
mean_train_scores = np.array(results['mean_train_score']).reshape(len(param_grid['C']), len(param_grid['gamma']))
mean_test_scores = np.array(results['mean_test_score']).reshape(len(param_grid['C']), len(param_grid['gamma']))

# 绘图
fig, ax = plt.subplots(figsize=(10, 6))

for idx, val in enumerate(param_grid['C']):
    ax.plot(param_grid['gamma'], mean_train_scores[idx], '-o', label=f'Train C={val}')

for idx, val in enumerate(param_grid['C']):
    ax.plot(param_grid['gamma'], mean_test_scores[idx], '--s', label=f'Test C={val}')

ax.set_xlabel('Gamma', fontsize=14)
ax.set_ylabel('Score', fontsize=14)
ax.set_title('Grid Search Results', fontsize=16)
ax.legend(loc='best', fontsize=12)
plt.show()


In [None]:
##gamma對error作圖
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt

# 定义一个函数用于创建 SVM 模型
def svmModel_fun(C=1.36, kernel='rbf', gamma=1):
    svmModel = SVC(C=C, kernel=kernel, gamma=gamma)
    return svmModel

# 创建空列表用于存储训练误差和测试误差
otrn = []
otst = []

# 定义参数范围以及步长，这里以 gamma 参数为例
change = 10
divisors = np.arange(0.1, change, 0.1)

# 切分训练集和测试集
X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.2, random_state=1)

for g in divisors:
    svm = svmModel_fun(gamma=g)
    svm.fit(X_train, Y_train)
    otrn.append(1 - svm.score(X_train, Y_train))
    otst.append(1 - svm.score(X_test, Y_test))

# 绘图
fig, ax = plt.subplots()
fig.set_size_inches((10, 6))
ax.plot(divisors, otrn, '--s', label='train', color='k')
ax.plot(divisors, otst, '-o', label='test', color='gray')
ax.fill_between(divisors, otrn, otst, color='gray', alpha=.3)
ax.legend(loc=0)
ax.set_xlabel('Gamma value', fontsize=14)
ax.set_ylabel('Error', fontsize=14)
ax.axis(xmin=0, xmax=change)
fig.tight_layout()
plt.show()

In [None]:
####grid search CV olddd(未確認圖)
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.svm import SVC
import matplotlib.pyplot as plt

# 分離特徵和標籤
X_all = data.iloc[:, :-1].values
Y_all = data['Underclocking'].values

# Initialize StratifiedKFold with 5 folds
stratified_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Initialize the SVM classifier
svm_clf = SVC()

# Define the parameter grid for GridSearchCV
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=svm_clf, param_grid=param_grid, cv=stratified_kfold, scoring='accuracy', verbose=2, n_jobs=-1)

# Perform grid search cross validation
grid_search.fit(X_all, Y_all)

# Print the best parameters and best score
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)

# Plot C, gamma, kernel values
fig, axs = plt.subplots(1, 3, figsize=(15, 5))

# Plot C values
C_values = [params['C'] for params in grid_search.cv_results_['params']]
axs[0].plot(C_values, grid_search.cv_results_['mean_test_score'], marker='o')
axs[0].set_xlabel('C Value')
axs[0].set_ylabel('Mean Test Score')
axs[0].set_title('C Value vs Mean Test Score')

# Plot gamma values
gamma_values = [params['gamma'] for params in grid_search.cv_results_['params']]
axs[1].plot(gamma_values, grid_search.cv_results_['mean_test_score'], marker='o')
axs[1].set_xlabel('Gamma Value')
axs[1].set_ylabel('Mean Test Score')
axs[1].set_title('Gamma Value vs Mean Test Score')

# Plot kernel values
kernel_values = [params['kernel'] for params in grid_search.cv_results_['params']]
axs[2].bar(kernel_values, grid_search.cv_results_['mean_test_score'])
axs[2].set_xlabel('Kernel')
axs[2].set_ylabel('Mean Test Score')
axs[2].set_title('Kernel vs Mean Test Score')

plt.tight_layout()
plt.show()


In [None]:
####grid search CV olddd(未確認圖)

from sklearn.model_selection import GridSearchCV, StratifiedKFold, validation_curve
from sklearn.svm import SVC
import matplotlib.pyplot as plt

# 定義SVM模型
svm_model = SVC()

# 定義參數範圍
param_grid = {'C': [0.1, 1, 10, 100],
              'gamma': [1, 0.1, 0.01, 0.001],
              'kernel': ['linear', 'rbf', 'poly']}

# 使用5-fold StratifiedKFold進行GridSearchCV
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
grid_search = GridSearchCV(svm_model, param_grid, cv=kfold, verbose=2)
grid_search.fit(X_train, Y_train)

# 獲得最佳參數
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

# 繪製 validation curve
param_range = [0.1, 1, 10, 100]
train_scores, test_scores = validation_curve(
    SVC(), X_train, Y_train, param_name="C", param_range=param_range,
    cv=kfold, scoring="accuracy", n_jobs=-1)

# 計算平均值和標準差
train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)

# 繪製 validation curve 圖表
plt.figure(figsize=(10, 6))
plt.title("Validation Curve with SVM")
plt.xlabel("C Value")
plt.ylabel("Score")
plt.ylim(0.0, 1.1)
lw = 2

plt.semilogx(param_range, train_mean, label="Training score",
             color="darkorange", lw=lw)

plt.fill_between(param_range, train_mean - train_std,
                 train_mean + train_std, alpha=0.2,
                 color="darkorange", lw=lw)

plt.semilogx(param_range, test_mean, label="Cross-validation score",
             color="navy", lw=lw)

plt.fill_between(param_range, test_mean - test_std,
                 test_mean + test_std, alpha=0.2,
                 color="navy", lw=lw)

plt.legend(loc="best")
plt.show()


In [None]:
#grid-search new(???)
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
import matplotlib.pyplot as plt

# 定義SVM模型
svm_model = SVC()

# 定義參數範圍
param_grid = {'C': [9.0, 9.1, 9.2, 9.3, 9.4, 9.5, 9.6, 9.7, 9.8, 9.9, 10.0, 10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 10.7, 10.8, 10.9, 11.0],
              'gamma': [0.5, 0.49, 0.48, 0.47, 0.46, 0.45, 0.44, 0.43, 0.42, 0.41, 0.40, 0.39, 0.38, 0.37, 0.36, 0.35, 0.34, 0.33, 0.32, 0.31, 0.30, 0.29, 0.28, 0.27, 0.26, 0.25, 0.24, 0.23, 0.22, 0.21, 0.20, 0.19, 0.18, 0.17, 0.16, 0.15],
              'kernel': ['rbf']}

# 使用GridSearchCV進行5-fold Grid Cross Validation
grid_search = GridSearchCV(svm_model, param_grid, cv=5, verbose=2, return_train_score=True)
grid_search.fit(X_train, Y_train)

# 獲得最佳參數
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

# 繪製 C 值對 accuracy 的圖
C_values = [params['C'] for params in grid_search.cv_results_['params']]
mean_test_scores = grid_search.cv_results_['mean_test_score']
mean_train_scores = grid_search.cv_results_['mean_train_score']

plt.figure(figsize=(12, 6))

# Plot C 值對 validation score 的圖
plt.subplot(1, 2, 1)
plt.plot(C_values, mean_test_scores, marker='o', label='Validation Score')
plt.plot(C_values, mean_train_scores, marker='o', label='Training Score')
plt.xlabel('C Values')
plt.ylabel('Mean Score')
plt.title('C Values vs Mean Score')
plt.xticks(C_values)
plt.legend()
plt.grid(True)

# 繪製 gamma 值對 validation score 的圖
gamma_values = [params['gamma'] for params in grid_search.cv_results_['params']]
mean_test_scores = grid_search.cv_results_['mean_test_score']
mean_train_scores = grid_search.cv_results_['mean_train_score']

plt.subplot(1, 2, 2)
plt.plot(gamma_values, mean_test_scores, marker='o', label='Validation Score')
plt.plot(gamma_values, mean_train_scores, marker='o', label='Training Score')
plt.xlabel('Gamma Values')
plt.ylabel('Mean Score')
plt.title('Gamma Values vs Mean Score')
plt.xticks(gamma_values, rotation=45)
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()


In [None]:

##應該和上面87%相四
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import numpy as np

# 定义SVM模型
svm_model = SVC()

# 定义参数范围
param_grid = {'C': [round(i, 2) for i in np.arange(9.5, 10.6, 0.1)],
              'gamma': [round(i, 2) for i in np.arange(0.05, 0.16, 0.01)],
              'kernel': ['rbf']}

# 使用GridSearchCV进行5-fold Grid Cross Validation
grid_search = GridSearchCV(svm_model, param_grid, cv=5, verbose=2, return_train_score=True)
grid_search.fit(X_train, Y_train)

# 获取最佳参数
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

# 绘制 C 值对 accuracy 的图
C_values = [params['C'] for params in grid_search.cv_results_['params']]
mean_test_scores = grid_search.cv_results_['mean_test_score']
mean_train_scores = grid_search.cv_results_['mean_train_score']

plt.figure(figsize=(12, 6))

# Plot C 值对 validation score 的图
plt.subplot(1, 2, 1)
plt.plot(C_values, mean_test_scores, marker='o', label='Validation Score')
plt.plot(C_values, mean_train_scores, marker='o', label='Training Score')
plt.xlabel('C Values')
plt.ylabel('Mean Score')
plt.title('C Values vs Mean Score')
plt.xticks(C_values)
plt.legend()
plt.grid(True)

# 绘制 gamma 值对 validation score 的图
gamma_values = [params['gamma'] for params in grid_search.cv_results_['params']]
mean_test_scores = grid_search.cv_results_['mean_test_score']
mean_train_scores = grid_search.cv_results_['mean_train_score']

plt.subplot(1, 2, 2)
plt.plot(gamma_values, mean_test_scores, marker='o', label='Validation Score')
plt.plot(gamma_values, mean_train_scores, marker='o', label='Training Score')
plt.xlabel('Gamma Values')
plt.ylabel('Mean Score')
plt.title('Gamma Values vs Mean Score')
plt.xticks(gamma_values)
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()


In [None]:
##StratifiedKFold
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Initialize the SVM classifier
svm_clf = SVC()

# Define the parameter grid for GridSearchCV
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': ['scale', 'auto']
}

# Initialize StratifiedKFold with 5 folds
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=svm_clf, param_grid=param_grid, cv=kfold, scoring='accuracy', verbose=1, n_jobs=-1)

# Perform grid search cross validation
grid_search.fit(X_all, Y_all)

# Print the best parameters and best score
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)

# Get the best model from GridSearchCV
best_svm_clf = grid_search.best_estimator_

# Make predictions on the test set using the best model
y_pred = best_svm_clf.predict(X_test)

# Evaluate the best model
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("Best Model Accuracy:", accuracy)
print("Best Model Classification Report:\n", report)