In [1]:
import numpy as np

In [2]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report, log_loss

# Part 1:

Uses `ionosphere.data` and a SVC to classify as good (1) or bad (0).

In [3]:
iodata = np.loadtxt("ionosphere.data", dtype=object, delimiter=",")
iodata

array([['1', '0', '0.99539', ..., '0.18641', '-0.45300', 'g'],
       ['1', '0', '1', ..., '-0.13738', '-0.02447', 'b'],
       ['1', '0', '1', ..., '0.56045', '-0.38238', 'g'],
       ...,
       ['1', '0', '0.94701', ..., '0.92697', '-0.00577', 'g'],
       ['1', '0', '0.90608', ..., '0.87403', '-0.16243', 'g'],
       ['1', '0', '0.84710', ..., '0.85764', '-0.06151', 'g']],
      dtype=object)

In [4]:
for row in range(iodata.shape[0]):
    if iodata[row, -1] == "g":
        iodata[row, -1] = 1
    else:
        iodata[row, -1] = 0
iodata = iodata.astype(np.float64)
print(iodata.dtype)
iodata

float64


array([[ 1.     ,  0.     ,  0.99539, ...,  0.18641, -0.453  ,  1.     ],
       [ 1.     ,  0.     ,  1.     , ..., -0.13738, -0.02447,  0.     ],
       [ 1.     ,  0.     ,  1.     , ...,  0.56045, -0.38238,  1.     ],
       ...,
       [ 1.     ,  0.     ,  0.94701, ...,  0.92697, -0.00577,  1.     ],
       [ 1.     ,  0.     ,  0.90608, ...,  0.87403, -0.16243,  1.     ],
       [ 1.     ,  0.     ,  0.8471 , ...,  0.85764, -0.06151,  1.     ]])

In [5]:
ioX = iodata[:, :-1]
ioY = iodata[:, -1]
print(ioX)
print(ioY)

[[ 1.       0.       0.99539 ... -0.54487  0.18641 -0.453  ]
 [ 1.       0.       1.      ... -0.06288 -0.13738 -0.02447]
 [ 1.       0.       1.      ... -0.2418   0.56045 -0.38238]
 ...
 [ 1.       0.       0.94701 ...  0.00442  0.92697 -0.00577]
 [ 1.       0.       0.90608 ... -0.03757  0.87403 -0.16243]
 [ 1.       0.       0.8471  ... -0.06678  0.85764 -0.06151]]
[1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0.
 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0.
 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0.
 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0.
 1. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1.
 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1.
 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1.
 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1.
 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0. 1. 0

In [6]:
scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
ioX = scaler.fit_transform(ioX)
print(scaler.get_params())
ioX

{'copy': True, 'with_mean': True, 'with_std': True}


array([[ 0.34843328,  0.        ,  0.71237237, ..., -1.05505394,
        -0.3122206 , -0.99959483],
       [ 0.34843328,  0.        ,  0.72164805, ..., -0.11521328,
        -0.93260505, -0.08328554],
       [ 0.34843328,  0.        ,  0.72164805, ..., -0.46409249,
         0.40444328, -0.84859079],
       ...,
       [ 0.34843328,  0.        ,  0.61502805, ...,  0.01601615,
         1.10669878, -0.04330004],
       [ 0.34843328,  0.        ,  0.53267371, ..., -0.06586087,
         1.00526528, -0.37828012],
       [ 0.34843328,  0.        ,  0.41400137, ..., -0.12281796,
         0.9738619 , -0.16248675]])

In [7]:
ioX_train, ioX_test, ioY_train, ioY_test = train_test_split(ioX, ioY, random_state=0, shuffle=True)
print("ioX_train")
print(ioX_train, ioX_train.shape)
print("ioY_train")
print(ioY_train, ioY_train.shape)
print("ioX_test")
print(ioX_test, ioX_test.shape)
print("ioY_test")
print(ioY_test, ioY_test.shape)

ioX_train
[[ 0.34843328  0.         -0.62978403 ...  0.26665853 -0.4356882
   0.40669713]
 [ 0.34843328  0.          0.72164805 ... -0.42199378  0.87022499
  -0.18767547]
 [ 0.34843328  0.          0.71563193 ... -0.44008901 -1.57375903
  -0.78003812]
 ...
 [ 0.34843328  0.          0.68506847 ...  0.32845141  0.45799572
   0.33418868]
 [ 0.34843328  0.          0.61295561 ...  0.40668209 -0.78658603
   0.86524736]
 [-2.86998991  0.         -1.29042961 ...  1.95731486 -0.66938378
  -0.03096227]] (263, 34)
ioY_train
[1. 1. 1. 1. 0. 1. 1. 0. 0. 1. 0. 0. 1. 1. 1. 1. 1. 0. 0. 1. 0. 0. 0. 1.
 1. 1. 1. 0. 1. 0. 1. 1. 1. 1. 0. 0. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0.
 1. 1. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0. 1. 1. 0. 1. 1. 0. 0. 0. 1. 1. 0.
 1. 1. 1. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 0. 0. 1. 1. 1. 0. 1. 1. 0. 1. 0.
 0. 1. 0. 1. 1. 0. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 0. 0. 0. 1.
 0. 1. 1. 0. 0. 1. 1. 0. 1. 1. 1. 1. 0. 0. 0. 1. 1. 1. 0. 1. 1. 0. 1. 1.
 0. 1. 0. 0. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1

In [8]:
gs_params = [
    {"kernel": ["linear"], "C": [0.01, 0.1, 1, 10, 100, 1000]},
    {"kernel": ["poly"], "degree": [2, 3, 4], "C": [0.01, 0.1, 1, 10, 100, 1000], "gamma": [1, 0.1, 0.01, 0.001, 0.0001]},
    {"kernel": ["rbf"], "C": [0.01, 0.1, 1, 10, 100, 1000], "gamma": [1, 0.1, 0.01, 0.001, 0.0001]}
]
coarse_search = GridSearchCV(estimator=SVC(), param_grid=gs_params, cv=5, scoring="f1", n_jobs=-1)
coarse_search.fit(ioX_train, ioY_train)
print("Course Grid Search:\n")
print("Best parameters found for SVC on ionosphere.data:")
print()
print(coarse_search.best_params_)
print()
print("Best Fitting F1 Score:")
print(coarse_search.best_score_)
print()
print("Classification Report for these parameters:")
y_true, y_pred = ioY_test, coarse_search.predict(ioX_test)
print(classification_report(y_true, y_pred))

Course Grid Search:

Best parameters found for SVC on ionosphere.data:

{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}

Best Fitting F1 Score:
0.9456192924163961

Classification Report for these parameters:
              precision    recall  f1-score   support

         0.0       0.95      0.97      0.96        38
         1.0       0.98      0.96      0.97        50

   micro avg       0.97      0.97      0.97        88
   macro avg       0.96      0.97      0.97        88
weighted avg       0.97      0.97      0.97        88





In [9]:
gs_params = [
    {"kernel": ["rbf"], "C": np.arange(0.75, 1.25, 0.01), "gamma": np.arange(0.075, 0.125, 0.001)}
]
fine_search = GridSearchCV(estimator=SVC(), param_grid=gs_params, cv=5, scoring="f1", n_jobs=-1)
coarse_search.fit(ioX_train, ioY_train)
print("Fine Grid Search:\n")
print("Best parameters found for SVC on ionosphere.data:")
print()
print(coarse_search.best_params_)
print()
print("Best Fitting F1 Score:")
print(coarse_search.best_score_)
print()
print("Classification Report for these parameters:")
y_true, y_pred = ioY_test, coarse_search.predict(ioX_test)
print(classification_report(y_true, y_pred))

Fine Grid Search:

Best parameters found for SVC on ionosphere.data:

{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}

Best Fitting F1 Score:
0.9456192924163961

Classification Report for these parameters:
              precision    recall  f1-score   support

         0.0       0.95      0.97      0.96        38
         1.0       0.98      0.96      0.97        50

   micro avg       0.97      0.97      0.97        88
   macro avg       0.96      0.97      0.97        88
weighted avg       0.97      0.97      0.97        88





# Part 2

Uses `vowel-context.data`

In [10]:
vcdata = np.loadtxt("vowel-context.data", dtype=np.float64)
vcdata

array([[ 0.   ,  0.   ,  0.   , ..., -0.874, -0.814,  0.   ],
       [ 0.   ,  0.   ,  0.   , ..., -0.621, -0.488,  1.   ],
       [ 0.   ,  0.   ,  0.   , ..., -0.809, -0.049,  2.   ],
       ...,
       [ 1.   , 14.   ,  1.   , ..., -0.773, -0.5  ,  8.   ],
       [ 1.   , 14.   ,  1.   , ..., -0.863, -0.39 ,  9.   ],
       [ 1.   , 14.   ,  1.   , ..., -1.046,  0.697, 10.   ]])

In [11]:
vcdata = vcdata[:, 3:]
vcdata

array([[-3.639,  0.418, -0.67 , ..., -0.874, -0.814,  0.   ],
       [-3.327,  0.496, -0.694, ..., -0.621, -0.488,  1.   ],
       [-2.12 ,  0.894, -1.576, ..., -0.809, -0.049,  2.   ],
       ...,
       [-3.98 ,  2.459,  0.068, ..., -0.773, -0.5  ,  8.   ],
       [-4.264,  2.925,  0.065, ..., -0.863, -0.39 ,  9.   ],
       [-3.291,  2.324, -0.679, ..., -1.046,  0.697, 10.   ]])

In [12]:
vcX = vcdata[:, :-1]
vcY = vcdata[:, -1]
print(vcX, vcX.shape)
print(vcY, vcY.shape)

[[-3.639  0.418 -0.67  ...  0.529 -0.874 -0.814]
 [-3.327  0.496 -0.694 ...  0.51  -0.621 -0.488]
 [-2.12   0.894 -1.576 ...  0.676 -0.809 -0.049]
 ...
 [-3.98   2.459  0.068 ...  0.521 -0.773 -0.5  ]
 [-4.264  2.925  0.065 ... -0.14  -0.863 -0.39 ]
 [-3.291  2.324 -0.679 ...  0.115 -1.046  0.697]] (990, 10)
[ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10.  0.  1.  2.  3.  4.  5.  6.
  7.  8.  9. 10.  0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10.  0.  1.  2.
  3.  4.  5.  6.  7.  8.  9. 10.  0.  1.  2.  3.  4.  5.  6.  7.  8.  9.
 10.  0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10.  0.  1.  2.  3.  4.  5.
  6.  7.  8.  9. 10.  0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10.  0.  1.
  2.  3.  4.  5.  6.  7.  8.  9. 10.  0.  1.  2.  3.  4.  5.  6.  7.  8.
  9. 10.  0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10.  0.  1.  2.  3.  4.
  5.  6.  7.  8.  9. 10.  0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10.  0.
  1.  2.  3.  4.  5.  6.  7.  8.  9. 10.  0.  1.  2.  3.  4.  5.  6.  7.
  8.  9. 10.  0.  1.  2.  3.  4. 

In [13]:
scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
vcX = scaler.fit_transform(vcX)
print(scaler.get_params())
vcX

{'copy': True, 'with_mean': True, 'with_std': True}


array([[-0.50113464, -1.24609747, -0.22798328, ...,  0.3358522 ,
        -1.00201915, -1.23022154],
       [-0.14191451, -1.17969631, -0.26171064, ...,  0.30269411,
        -0.55806104, -0.69020068],
       [ 1.24776079, -0.84088014, -1.50119087, ...,  0.59239116,
        -0.88795876,  0.03700533],
       ...,
       [-0.89374382,  0.49139954,  0.80913284, ...,  0.3218909 ,
        -0.82478686, -0.71007875],
       [-1.22072625,  0.8881039 ,  0.80491692, ..., -0.83166181,
        -0.98271662, -0.52786312],
       [-0.10046604,  0.37647445, -0.24063104, ..., -0.38664526,
        -1.30384047,  1.2727586 ]])

In [14]:
vcX_train, vcX_test, vcY_train, vcY_test = train_test_split(vcX, vcY, random_state=0, shuffle=True)
print("vcX_train")
print(vcX_train, vcX_train.shape)
print("vcY_train")
print(vcY_train, vcY_train.shape)
print("vcX_test")
print(vcX_test, vcX_test.shape)
print("vcY_test")
print(vcY_test, vcY_test.shape)

vcX_train
[[ 1.31914428 -2.1212307  -1.39017167 ...  1.68660833  0.1947375
  -0.77302596]
 [-0.28468149  0.31943756 -0.4739119  ... -0.57686802 -1.36701238
   1.37546196]
 [ 0.18046253 -0.52334639 -0.40926781 ... -0.06029979 -0.7510863
  -0.59577985]
 ...
 [ 1.50681377 -0.95921041 -1.23699327 ...  0.62205893  0.05786504
  -0.45497687]
 [-1.85281551 -0.71488819  1.96007877 ... -1.36917201 -0.19131303
   2.12917206]
 [ 0.67784424 -0.59315274 -0.91939403 ...  0.20845531 -0.55630626
  -0.39037314]] (742, 10)
vcY_train
[ 0. 10.  2.  8.  4.  8.  6.  7. 10.  9.  0.  1.  8.  6.  6.  8.  7.  6.
  4. 10.  5.  7.  2.  1.  0.  8.  9.  6.  9.  0.  2.  2.  6.  1.  0.  4.
  2.  6.  5.  1. 10.  6.  2.  0. 10.  7.  5.  8.  3.  3.  4.  2.  4.  9.
 10.  3.  7. 10.  5.  5.  3.  1.  2.  6.  7. 10.  4.  3. 10.  7.  9.  6.
  9.  8.  8.  7.  3.  4.  4. 10.  7.  3.  8.  1.  3.  6.  2.  4.  2. 10.
  4.  6.  3.  3.  9.  6. 10.  3.  5.  6. 10.  7.  8. 10.  9.  4.  5.  0.
  5.  5.  0.  1.  1.  4.  1.  8. 10.  1.  

In [15]:
gs_params = [
    {"kernel": ["linear"], "C": [0.01, 0.1, 1, 10, 100, 1000]},
    {"kernel": ["poly"], "degree": [2, 3, 4], "C": [0.01, 0.1, 1, 10, 100, 1000], "gamma": [1, 0.1, 0.01, 0.001, 0.0001]},
    {"kernel": ["rbf"], "C": [0.01, 0.1, 1, 10, 100, 1000], "gamma": [1, 0.1, 0.01, 0.001, 0.0001]}
]
coarse_search = GridSearchCV(estimator=SVC(probability=True), param_grid=gs_params, cv=3, scoring="neg_log_loss", n_jobs=-1)
coarse_search.fit(vcX_train, vcY_train)
print("Course Grid Search:\n")
print("Best parameters found for SVC on vowel-context.data:")
print()
print(coarse_search.best_params_)
print()
print("Best Fitting Log Loss Score:")
print(coarse_search.best_score_)
print()
print("Classification Report for these parameters:")
y_true, y_pred = vcY_test, coarse_search.predict(vcX_test)
print(classification_report(y_true, y_pred))

Course Grid Search:

Best parameters found for SVC on vowel-context.data:

{'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}

Best Fitting Log Loss Score:
-0.22469136800032466

Classification Report for these parameters:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        27
         1.0       1.00      1.00      1.00        20
         2.0       1.00      1.00      1.00        22
         3.0       1.00      1.00      1.00        20
         4.0       0.96      1.00      0.98        25
         5.0       1.00      0.91      0.95        23
         6.0       1.00      1.00      1.00        23
         7.0       1.00      1.00      1.00        24
         8.0       1.00      1.00      1.00        17
         9.0       1.00      1.00      1.00        27
        10.0       0.95      1.00      0.98        20

   micro avg       0.99      0.99      0.99       248
   macro avg       0.99      0.99      0.99       248
weighted avg       0.99      0



In [16]:
gs_params = [
    {"kernel": ["rbf"], "C": np.arange(7.5, 12.5, 0.5), "gamma": np.arange(0.075, 0.125, 0.005)}
]
coarse_search = GridSearchCV(estimator=SVC(probability=True), param_grid=gs_params, cv=3, scoring="neg_log_loss", n_jobs=-1)
coarse_search.fit(vcX_train, vcY_train)
print("Fine Grid Search:\n")
print("Best parameters found for SVC on vowel-context.data:")
print()
print(coarse_search.best_params_)
print()
print("Best Fitting Log Loss Score:")
print(coarse_search.best_score_)
print()
print("Classification Report for these parameters:")
y_true, y_pred = vcY_test, coarse_search.predict(vcX_test)
proba = coarse_search.predict_proba(vcX_test)
print(classification_report(y_true, y_pred))
print()
print("Testing Log Loss Score:")
print(log_loss(y_true, proba))

Fine Grid Search:

Best parameters found for SVC on vowel-context.data:

{'C': 10.5, 'gamma': 0.12000000000000004, 'kernel': 'rbf'}

Best Fitting Log Loss Score:
-0.2163932448525439

Classification Report for these parameters:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        27
         1.0       1.00      1.00      1.00        20
         2.0       1.00      1.00      1.00        22
         3.0       1.00      1.00      1.00        20
         4.0       1.00      1.00      1.00        25
         5.0       1.00      0.96      0.98        23
         6.0       1.00      1.00      1.00        23
         7.0       1.00      1.00      1.00        24
         8.0       1.00      1.00      1.00        17
         9.0       1.00      1.00      1.00        27
        10.0       0.95      1.00      0.98        20

   micro avg       1.00      1.00      1.00       248
   macro avg       1.00      1.00      1.00       248
weighted avg   



# Part 3

Uses `sat.trn` and `sat.tst`

In [17]:
trn = np.loadtxt("sat.trn", dtype=np.float64)
tst = np.loadtxt("sat.tst", dtype=np.float64)
print(trn, trn.shape)
print(tst, tst.shape)

[[ 92. 115. 120. ... 113.  87.   3.]
 [ 84. 102. 106. ... 104.  79.   3.]
 [ 84. 102. 102. ... 104.  79.   3.]
 ...
 [ 68.  75. 108. ... 104.  85.   4.]
 [ 71.  87. 108. ... 104.  85.   4.]
 [ 71.  91. 100. ... 100.  81.   4.]] (4435, 37)
[[ 80. 102. 102. ... 113.  87.   3.]
 [ 76. 102. 102. ... 104.  83.   3.]
 [ 80.  98. 106. ...  96.  75.   4.]
 ...
 [ 56.  68.  91. ...  92.  74.   5.]
 [ 56.  68.  87. ...  92.  70.   5.]
 [ 60.  71.  91. ... 108.  92.   5.]] (2000, 37)


In [18]:
trnX = trn[:, :-1]
trnY = trn[:, -1]
tstX = tst[:, :-1]
tstY = tst[:, -1]

In [19]:
scaler = StandardScaler(copy=True, with_mean=True, with_std=True)
trnX = scaler.fit_transform(trnX)
print(scaler.get_params())
tstX = scaler.transform(tstX)
print("trnX")
print(trnX, trnX.shape)
print("trnY")
print(trnY, trnY.shape)
print("tstX")
print(tstX, tstX.shape)
print("tstY")
print(tstY, tstY.shape)

{'copy': True, 'with_mean': True, 'with_std': True}
trnX
[[ 1.6504898   1.37065618  1.24035744 ...  1.04773951  0.83669871
   0.23370924]
 [ 1.06432744  0.79853645  0.40053984 ...  0.69624747  0.29644556
  -0.18971944]
 [ 1.06432744  0.79853645  0.16059195 ...  0.69624747  0.29644556
  -0.18971944]
 ...
 [-0.10799728 -0.38971222  0.52051378 ...  0.74018398  0.29644556
   0.12785207]
 [ 0.11181361  0.1383983   0.52051378 ...  0.34475544  0.29644556
   0.12785207]
 [ 0.11181361  0.31443514  0.04061801 ...  0.34475544  0.05633305
  -0.08386227]] (4435, 36)
trnY
[3. 3. 3. ... 4. 4. 4.] (4435,)
tstX
[[ 0.77124626  0.79853645  0.16059195 ...  1.04773951  0.83669871
   0.23370924]
 [ 0.47816508  0.79853645  0.16059195 ...  0.87199349  0.29644556
   0.0219949 ]
 [ 0.77124626  0.62249961  0.40053984 ...  0.52050146 -0.18377946
  -0.40143378]
 ...
 [-0.98724082 -0.69777669 -0.49926473 ... -0.0067366  -0.42389197
  -0.45436237]
 [-0.98724082 -0.69777669 -0.73921262 ... -0.0067366  -0.42389197
  -

In [20]:
gs_params = [
    {"kernel": ["linear"], "C": [0.01, 0.1, 1, 10, 100, 1000]},
    {"kernel": ["poly"], "degree": [2, 3, 4], "C": [0.01, 0.1, 1, 10, 100, 1000], "gamma": [1, 0.1, 0.01, 0.001, 0.0001]},
    {"kernel": ["rbf"], "C": [0.01, 0.1, 1, 10, 100, 1000], "gamma": [1, 0.1, 0.01, 0.001, 0.0001]}
]
coarse_search = GridSearchCV(estimator=SVC(probability=True), param_grid=gs_params, cv=2, scoring="neg_log_loss", n_jobs=-1)
coarse_search.fit(trnX, trnY)
print("Course Grid Search:\n")
print("Best parameters found for SVC on sat data:")
print()
print(coarse_search.best_params_)
print()
print("Best Fitting Log Loss Score:")
print(coarse_search.best_score_)
print()
print("Classification Report for these parameters:")
y_true, y_pred = tstY, coarse_search.predict(tstX)
print(classification_report(y_true, y_pred))

Course Grid Search:

Best parameters found for SVC on sat data:

{'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf'}

Best Fitting Log Loss Score:
-0.4617627098415815

Classification Report for these parameters:
              precision    recall  f1-score   support

         1.0       0.95      0.99      0.97       461
         2.0       0.93      0.99      0.96       224
         3.0       0.86      0.96      0.90       397
         4.0       0.74      0.52      0.61       211
         5.0       0.93      0.77      0.84       237
         7.0       0.83      0.87      0.85       470

   micro avg       0.88      0.88      0.88      2000
   macro avg       0.87      0.85      0.86      2000
weighted avg       0.88      0.88      0.87      2000



In [21]:
gs_params = [
    {"kernel": ["rbf"], "C": np.arange(0.075, 0.125, 0.005), "gamma": np.arange(0.075, 0.125, 0.005)}
]
coarse_search = GridSearchCV(estimator=SVC(probability=True), param_grid=gs_params, cv=3, scoring="neg_log_loss", n_jobs=-1)
coarse_search.fit(trnX, trnY)
print("Fine Grid Search:\n")
print("Best parameters found for SVC on sat data:")
print()
print(coarse_search.best_params_)
print()
print("Best Fitting Log Loss Score:")
print(coarse_search.best_score_)
print()
print("Classification Report for these parameters:")
y_true, y_pred = tstY, coarse_search.predict(tstX)
proba = coarse_search.predict_proba(tstX)
print(classification_report(y_true, y_pred))
print()
print("Testing Log Loss Score:")
print(log_loss(y_true, proba))



Fine Grid Search:

Best parameters found for SVC on sat data:

{'C': 0.075, 'gamma': 0.12000000000000004, 'kernel': 'rbf'}

Best Fitting Log Loss Score:
-0.46043514812030534

Classification Report for these parameters:
              precision    recall  f1-score   support

         1.0       0.95      0.99      0.97       461
         2.0       0.89      0.99      0.93       224
         3.0       0.85      0.95      0.90       397
         4.0       0.74      0.51      0.61       211
         5.0       0.94      0.75      0.83       237
         7.0       0.83      0.87      0.85       470

   micro avg       0.88      0.88      0.88      2000
   macro avg       0.87      0.84      0.85      2000
weighted avg       0.87      0.88      0.87      2000


Testing Log Loss Score:
0.31276157784502484
