In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import KFold, GridSearchCV

First let's process the data.

In [20]:
with open('spam.txt', 'r') as f:
    arr = []
    for line in f:
        row = line.split()
        row = list(map(lambda x: float(x), row))
        arr.append(row)

In [21]:
df = pd.DataFrame(arr)

In [22]:
ds = df.to_numpy()

In [23]:
X = ds[:,:-1]
Y = ds[:,-1]

The kernel described in the problem is a polynomial kernel. We'll use scikit-learn's built-in cross-validation search to speed up the computations.

In [24]:
# K-fold cross-validation
kfold = KFold(n_splits=3, shuffle=True, random_state=42)

In [25]:
# Support vector classifier with a polynomial kernel
estimator = SVC(kernel='poly')

In [26]:
search = GridSearchCV(estimator=estimator, param_grid={'degree': [1,2,3,4,5]}, cv=kfold, scoring='accuracy')

In [27]:
search.fit(X,Y)

Below we report the results of the cross-validation search.

In [28]:
search.cv_results_

{'mean_fit_time': array([  0.24166036,   0.35994593,   1.07228192,  12.26199198,
        132.41668765]),
 'std_fit_time': array([6.92428072e-03, 4.45976729e-02, 3.64046758e-01, 7.74655200e+00,
        1.09514517e+02]),
 'mean_score_time': array([0.09578729, 0.10144043, 0.1056633 , 0.11254684, 0.11371326]),
 'std_score_time': array([0.00184996, 0.00304956, 0.00406944, 0.00379481, 0.00456879]),
 'param_degree': masked_array(data=[1, 2, 3, 4, 5],
              mask=[False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'degree': 1},
  {'degree': 2},
  {'degree': 3},
  {'degree': 4},
  {'degree': 5}],
 'split0_test_score': array([0.67731421, 0.6440678 , 0.63428944, 0.6303781 , 0.6303781 ]),
 'split1_test_score': array([0.70273794, 0.67535854, 0.66036506, 0.65840939, 0.66101695]),
 'split2_test_score': array([0.72276582, 0.68166993, 0.66666667, 0.67253751, 0.67449446]),
 'mean_test_score': array([0.70093932, 0.66703209, 0.65377372, 0.653775  , 

In [29]:
search.best_params_

{'degree': 1}

In [30]:
search.best_estimator_

It seems the best degree for the polynomial is 1, with a 70% mean accuracy under cross-validation. 