In [1]:
import optuna
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd

In [2]:
train_data2 = pd.read_csv('clean_train_data.csv')
test_data2 = pd.read_csv('clean_test_data.csv')
future_data2 = pd.read_csv('clean_future_test_data.csv')

In [11]:
X_train = train_data2.drop(columns=['id', 'home_team_win'])
y_train = train_data2['home_team_win'].map({1: True, -1: False})
X_test = test_data2.drop(columns=['id'])

In [12]:
def objective(trial, X, y):
    params = {
        'C': trial.suggest_float('C', 1e-10, 1e10, log=True),
        'kernel': trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 'sigmoid']),
        'gamma': trial.suggest_categorical('gamma', ['scale', 'auto']),  # RBF、poly、sigmoid核的gamma
    }
    if params['kernel'] == 'poly':
        params['degree'] = trial.suggest_int('degree', 2, 5)  # poly核的次數

    model = SVC(**params)
    scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
    return np.mean(scores)

study = optuna.create_study(direction='maximize')
study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=20)

print("Best_param:", study.best_params)
print("Best_accuracy:", study.best_value)

best_params = study.best_params
final_model = SVC(**best_params, random_state=42)
final_model.fit(X_train, y_train) 

[I 2024-12-13 15:08:09,998] A new study created in memory with name: no-name-027a8f68-bdbb-401f-ad62-38869a1d112e
[I 2024-12-13 15:13:57,623] Trial 0 finished with value: 0.5482954260179745 and parameters: {'C': 0.009057515271107262, 'kernel': 'poly', 'gamma': 'auto', 'degree': 2}. Best is trial 0 with value: 0.5482954260179745.
[I 2024-12-13 15:15:29,577] Trial 1 finished with value: 0.531038198768793 and parameters: {'C': 6.368331078502351e-09, 'kernel': 'poly', 'gamma': 'scale', 'degree': 5}. Best is trial 0 with value: 0.5482954260179745.
[I 2024-12-13 15:27:10,830] Trial 2 finished with value: 0.5465786673230493 and parameters: {'C': 1810736608.445646, 'kernel': 'rbf', 'gamma': 'scale'}. Best is trial 0 with value: 0.5482954260179745.
[I 2024-12-13 15:28:30,295] Trial 3 finished with value: 0.531038198768793 and parameters: {'C': 3.292101015379789e-10, 'kernel': 'sigmoid', 'gamma': 'auto'}. Best is trial 0 with value: 0.5482954260179745.
[I 2024-12-13 15:33:42,470] Trial 4 finishe

Best_param: {'C': 6296.748648379015, 'kernel': 'rbf', 'gamma': 'scale'}
Best_accuracy: 0.5561570762567093


In [13]:
y_test_pred = final_model.predict(X_test)
predicted_results = y_test_pred > 0

# 儲存結果
output_df = pd.DataFrame({
    'id': range(len(X_test)),
    'home_team_win': predicted_results
})

print("預測結果範例:")
print(output_df.head())
output_df.to_csv('svm_predictions1.csv', index=False)

預測結果範例:
   id  home_team_win
0   0           True
1   1          False
2   2           True
3   3           True
4   4          False


In [3]:
X_train = train_data2.drop(columns=['id', 'home_team_win', 'season'])
y_train = train_data2['home_team_win'].map({1: True, -1: False})
future_X_test = future_data2.drop(columns=['id', 'season'])

In [4]:
def objective(trial, X, y):
    # 定義超參數的搜索空間
    params = {
        'C': trial.suggest_float('C', 1e-3, 1e3, log=True),
        'kernel': trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 'sigmoid']),
        'gamma': trial.suggest_categorical('gamma', ['scale', 'auto']),  # RBF、poly、sigmoid核的gamma
    }
    if params['kernel'] == 'poly':
        params['degree'] = trial.suggest_int('degree', 2, 5)  # poly核的次數

    # 初始化 SVM 模型
    model = SVC(**params)
    
    # 使用交叉驗證評估模型性能
    scores = cross_val_score(model, X, y, cv=5, scoring='accuracy')
    return np.mean(scores)  # 返回平均準確率作為目標

# 創建 Study
study = optuna.create_study(direction='maximize')  # 目標是最大化準確率
study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=15)  # 進行多次試驗

# 輸出最佳結果
print("Best_param:", study.best_params)
print("Best_accuracy:", study.best_value)

# 使用最佳參數重新訓練模型
best_params = study.best_params
final_model = SVC(**best_params, random_state=42)
final_model.fit(X_train, y_train)  # 在整個訓練集上訓練模型

[I 2024-12-13 21:33:46,842] A new study created in memory with name: no-name-f0df4153-5e6b-4905-aa1f-e9c36bb0f874
[I 2024-12-13 21:34:54,107] Trial 0 finished with value: 0.5128778740717065 and parameters: {'C': 0.41028347096191425, 'kernel': 'sigmoid', 'gamma': 'auto'}. Best is trial 0 with value: 0.5128778740717065.
[I 2024-12-13 21:36:49,199] Trial 1 finished with value: 0.531038198768793 and parameters: {'C': 0.007146090948756419, 'kernel': 'rbf', 'gamma': 'auto'}. Best is trial 1 with value: 0.531038198768793.
[I 2024-12-13 21:38:12,236] Trial 2 finished with value: 0.5570607043621272 and parameters: {'C': 0.0018602657601176663, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 2 with value: 0.5570607043621272.
[I 2024-12-13 21:40:12,886] Trial 3 finished with value: 0.5538994550963736 and parameters: {'C': 1.1460503650667724, 'kernel': 'rbf', 'gamma': 'auto'}. Best is trial 2 with value: 0.5570607043621272.
[I 2024-12-13 21:41:52,487] Trial 4 finished with value: 0.554169274031

Best_param: {'C': 0.00102168767177583, 'kernel': 'linear', 'gamma': 'auto'}
Best_accuracy: 0.559681458540749


In [5]:
y_test_pred = final_model.predict(future_X_test)
predicted_results = y_test_pred > 0

# 儲存結果
output_df = pd.DataFrame({
    'id': range(len(future_X_test)),
    'home_team_win': predicted_results
})

print("預測結果範例:")
print(output_df.head())
output_df.to_csv('svm_predictions2.csv', index=False)

預測結果範例:
   id  home_team_win
0   0           True
1   1           True
2   2           True
3   3          False
4   4           True
