In [3]:
import pandas as pd
import numpy as np

In [4]:
df = pd.read_csv('Iris.csv')
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [5]:
df.isnull().sum()

Id               0
SepalLengthCm    0
SepalWidthCm     0
PetalLengthCm    0
PetalWidthCm     0
Species          0
dtype: int64

In [6]:
X = df.drop(columns=['Id','Species'])
y = df['Species']

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3)

In [9]:
from sklearn.svm import SVC

model = SVC()
model.fit(X_train, y_train)

In [10]:
predictions = model.predict(X_test)

In [11]:
from sklearn.metrics import classification_report

cm = classification_report(y_test, predictions)
print(cm)

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        14
Iris-versicolor       0.80      1.00      0.89        12
 Iris-virginica       1.00      0.84      0.91        19

       accuracy                           0.93        45
      macro avg       0.93      0.95      0.93        45
   weighted avg       0.95      0.93      0.93        45



## Scaling before GridSearchCV

In [28]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Hyper parameter tuning of SVM using GridSearchCV

In [14]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'C':[0.1,10,100],
    'gamma':[1,0.1,0.01,0.001],
    'kernel':['rbf','linear','poly']
}

grid = GridSearchCV(model, param_grid, refit=True, verbose=2, cv=5)
grid.fit(X_train_scaled, y_train)

print('Best parameters found:', grid.best_params_)
print('Best cross-validation accuracy:', grid.best_score_)

best_model = grid.best_estimator_

Fitting 5 folds for each of 36 candidates, totalling 180 fits
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.0s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.0s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.0s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.0s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time=   0.0s
[CV] END ........................C=0.1, gamma=1, kernel=poly; total time=   0.0s
[CV] END ........................C=0.1, gamma=1

In [15]:
y_pred = grid.predict(X_test_scaled)
print(classification_report(y_test, y_pred))

                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        14
Iris-versicolor       0.80      1.00      0.89        12
 Iris-virginica       1.00      0.84      0.91        19

       accuracy                           0.93        45
      macro avg       0.93      0.95      0.93        45
   weighted avg       0.95      0.93      0.93        45



## Plotting decision boundaries

In [29]:
df = pd.DataFrame(X_train_scaled)
df = df.drop(columns=[2,3], axis=1)
X_train_scaled = df.to_numpy()

In [30]:
def plot_decision_boundary(model, X, y, title):
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 500),
                         np.linspace(y_min, y_max, 500))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    plt.contourf(xx, yy, Z, alpha=0.3, cmap=plt.cm.coolwarm)
    scatter = plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm, edgecolors='k')
    plt.xlabel("Petal Length (standardized)")
    plt.ylabel("Petal Width (standardized)")
    plt.title(title)
    plt.legend(handles=scatter.legend_elements()[0], labels=iris.target_names)
    plt.show()

# Plot
plot_decision_boundary(grid.best_estimator_, X_train_scaled, y_train, "SVM Decision Boundary (Training Data)")

ValueError: X has 2 features, but SVC is expecting 4 features as input.