## Import Dataset

In [1]:
import pandas as pd
import numpy as np
data=pd.read_csv("diamonds.csv")
data.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


## Label Encoding

In [2]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
data.cut=le.fit_transform(data.cut)
data.color=le.fit_transform(data.color)
data.clarity=le.fit_transform(data.clarity)

## Multiclass inspection

In [3]:
data.cut.value_counts()

2    21551
3    13791
4    12082
1     4906
0     1610
Name: cut, dtype: int64

In [4]:
data.columns

Index(['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y',
       'z'],
      dtype='object')

## Split Train and Test

In [5]:
x=data[['carat','color','clarity','depth','table', 'price', 'x', 'y',
       'z']]
y=data.cut
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=1)

## Classification Pipelines

In [6]:
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

pipe1=Pipeline([("scaling",StandardScaler()),
               ("svm_linear",SVC(kernel="linear"))])
pipe2=Pipeline([("scaling",StandardScaler()),
               ("svm_linear",SVC(kernel="rbf"))])
pipe3=Pipeline([("scaling",StandardScaler()),
               ("svm_linear",SVC(kernel="poly"))])
pipe4=Pipeline([("scaling",StandardScaler()),
               ("svm_linear",SVC(kernel="sigmoid"))])

## Finding Best Pipe

In [None]:
pipelines=[pipe1,pipe2,pipe3,pipe4]
labls={0:"linear",1:"rbf",2:"poly",3:"sigmoid"}

for pipe in pipelines:
    pipe.fit(X_train,y_train)
    
for idx, model in enumerate(pipelines):
    print(labls[idx],'Pipeline test accuracy:',model.score(X_test,y_test))

## Grid Parameters

In [None]:
C=[0.1,0.5,2.0,3.0]
gamma=["auto","scale"]
params=dict(C=C,gamma=gamma)

## Grid Search

In [None]:
from sklearn.model_selection import GridSearchCV
clf=GridSearchCV(SVC(kernel="rbf"),params)
best_model=clf.fit(X_train,y_train)

In [None]:
best_model.best_estimator_

## Training and Test Accuracy

In [None]:
print("Train Accuracy",best_model.score(x_train,y_train))
print("Test Accuracy",best_model.score(x_test,y_test))