In [1]:
from sklearn import svm, datasets
import sklearn.model_selection as model_selection
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
import pandas as pd

In [2]:
data = pd.read_csv('../train.csv')

In [3]:
x = data.iloc[:, :-1]
y = data.iloc[:, -1]
# print(x.head())
# print(y.head())
print(x.shape, y.shape)

(4800, 26) (4800,)


In [4]:
x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y, test_size=0.2, random_state=51)

In [5]:
parameters = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree': [2, 3, 4, 5],
    'gamma': ['scale', 'auto', 0.1, 1],
    'coef0': [0, 1, 2],
    'shrinking': [True, False]
}


In [6]:
svc = svm.SVC()

# Create GridSearchCV object with cross-validation (e.g., 5-fold cross-validation)
grid_search = model_selection.GridSearchCV(estimator=svc, param_grid=parameters, cv=5, scoring='f1_weighted', n_jobs=-1)


In [9]:
# Perform grid search on the training data
grid_search.fit(x, y)

# Print the best parameters and corresponding accuracy score
print("Best Parameters: ", grid_search.best_params_)
print("Best Accuracy Score: {:.2f}".format(grid_search.best_score_))

Best Parameters:  {'C': 10, 'coef0': 0, 'degree': 2, 'gamma': 'scale', 'kernel': 'rbf', 'shrinking': True}
Best Accuracy Score: 0.89


In [10]:
# Evaluate the best model on the test data
best_model = grid_search.best_estimator_
test_predictions = best_model.predict(x_test)
test_accuracy = accuracy_score(y_test, test_predictions)
print("Test Accuracy Score: {:.2f}".format(test_accuracy))

Test Accuracy Score: 0.99


In [11]:
test = pd.read_csv('../test.csv')
test_features = test.iloc[:, 1:]
rbf_pred = best_model.predict(test_features)

In [None]:
rbf_labels = pd.DataFrame({'Predicted_Labels': rbf_pred})

rbf_labels['ID'] = test['ID']  


rbf_labels.to_csv('150_mins.csv', index=False)

In [7]:
rbf_accuracy = accuracy_score(y_test, rbf_pred)
rbf_f1 = f1_score(y_test, rbf_pred, average='weighted')
print('Accuracy (RBF Kernel): ', "%.2f" % (rbf_accuracy*100))
print('F1 (RBF Kernel): ', "%.2f" % (rbf_f1*100))

Accuracy (RBF Kernel):  90.00
F1 (RBF Kernel):  89.99


In [9]:
test = pd.read_csv('../test.csv')
test_features = test.iloc[:, 1:]
rbf_pred = rbf.predict(test_features)


In [10]:
rbf_labels = pd.DataFrame({'Predicted_Labels': rbf_pred})

rbf_labels['ID'] = test['ID']  


rbf_labels.to_csv('grid_search_rbf.csv', index=False)
