In [59]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.svm import LinearSVC
from sklearn.discriminant_analysis import StandardScaler
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import loguniform, uniform


In [16]:
wine = load_wine(as_frame=True)
X = wine.data
y = wine.target

print (wine.target_names)

X.head()

['class_0' 'class_1' 'class_2']


Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0


In [17]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [36]:
svm_clf = make_pipeline(
  StandardScaler(),
  LinearSVC(dual=True, random_state=42)
)

# No need for this, the LinearSVC does this automatically
#svm_clf_multi = OneVsRestClassifier(svm_clf)
cross_val_score(svm_clf, X_train, y_train).mean()


0.9650246305418719

In [33]:
svm_clf.named_steps

{'standardscaler': StandardScaler(),
 'linearsvc': LinearSVC(dual=True, random_state=42)}

In [54]:
param_dist = {
  "linearsvc__C": uniform(1,10) 
}

rnd_search = RandomizedSearchCV(svm_clf, param_dist, n_iter=100, cv=5, random_state=42)
rnd_search.fit(X_train, y_train)




In [55]:
score = pd.DataFrame(rnd_search.cv_results_).sort_values("mean_test_score", ascending=False)
score.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_linearsvc__C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.002292,0.000314,0.000673,0.000106,4.745401,{'linearsvc__C': 4.745401188473625},0.965517,0.965517,1.0,0.964286,0.964286,0.971921,0.01405,1
54,0.001549,0.000168,0.000446,3.8e-05,6.979,{'linearsvc__C': 6.978999788110851},0.965517,0.965517,1.0,0.964286,0.964286,0.971921,0.01405,1
69,0.001683,0.000207,0.000455,3.4e-05,10.868869,{'linearsvc__C': 10.868869366005173},0.965517,0.965517,1.0,0.964286,0.964286,0.971921,0.01405,1
67,0.001635,0.000206,0.000494,0.000131,9.02197,{'linearsvc__C': 9.021969807540398},0.965517,0.965517,1.0,0.964286,0.964286,0.971921,0.01405,1
65,0.001494,0.000111,0.000411,2.1e-05,6.426961,{'linearsvc__C': 6.426960831582485},0.965517,0.965517,1.0,0.964286,0.964286,0.971921,0.01405,1


In [56]:
best_estimator = rnd_search.best_estimator_
best_estimator.fit(X_train, y_train)


In [62]:
y_pred = best_estimator.predict(X_test)
print (classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        14
           2       1.00      1.00      1.00         8

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36



1.0