Sử dụng các thuật toán tối ưu (VD: GridSearch, RandomSearch) để tune mô hình SVM. Đánh giá toàn diện trên nhiều chỉ số khác nhau: Accuracy, TPR, FPR, F1, MCC, Classification Report, Confusion matrix.

In [23]:
#Khai báo thư viện và kết nối drive để lấy file csv
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [17]:
# Tải 2 tập train và test lên
train_df = pd.read_csv('/content/drive/MyDrive/VIII/AI/train.csv')
test_df = pd.read_csv('/content/drive/MyDrive/VIII/AI/test.csv')

# Bỏ cột 'target' ở cuối
X_train = train_df.iloc[:, :-1]
y_train = train_df.iloc[:, -1]
X_test = test_df.iloc[:, :-1]
y_test = test_df.iloc[:, -1]

In [12]:
model = SVC()
model.fit(X_train, y_train)

predictions = model.predict(X_test)
print(classification_report(y_test, predictions))

              precision    recall  f1-score   support

           0       0.76      0.73      0.74        84
           1       0.79      0.89      0.83        71
           2       0.88      0.82      0.85        84
           3       0.73      0.77      0.75        79
           4       0.77      0.73      0.75        82

    accuracy                           0.79       400
   macro avg       0.79      0.79      0.79       400
weighted avg       0.79      0.79      0.78       400



In [19]:
# Scale
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Tune SVM với gridsearch
param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001], 'kernel': ['rbf']}

grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2)

grid.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.2s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.2s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.2s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.2s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.2s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.1s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.1s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.2s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.2s
[CV] END .......................C=0.1, gamma=0.1, kernel=rbf; total time=   0.1s
[CV] END ......................C=0.1, gamma=0.01, kernel=rbf; total time=   0.1s
[CV] END ......................C=0.1, gamma=0.01

In [20]:
# Trả về kết quả tốt nhất
print("Best parameters found by grid search:")
print(grid.best_params_)
print("Best cross-validation score:", grid.best_score_)

# Evaluate the best model on the test set
print("Score on test set:", grid.score(X_test_scaled, y_test))

Best parameters found by grid search:
{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}
Best cross-validation score: 0.843125
Score on test set: 0.855


In [21]:
#Classification Report
grid_predictions = grid.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, grid_predictions))

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.04      0.07        84
           1       0.18      1.00      0.31        71
           2       1.00      0.02      0.05        84
           3       0.67      0.03      0.05        79
           4       0.00      0.00      0.00        82

    accuracy                           0.20       400
   macro avg       0.57      0.22      0.09       400
weighted avg       0.58      0.20      0.09       400



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [22]:
#Confusion Matrix
y_pred = model.predict(X_test)

conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Confusion Matrix:
[[61  4  1  9  9]
 [ 3 63  0  1  4]
 [ 0  3 69  7  5]
 [10  4  4 61  0]
 [ 6  6  4  6 60]]
