# Linear Support Vector Machines: Hard vs Soft Margin

## Objective
Understand how SVMs classify data by maximizing the margin and how
the soft-margin formulation handles noisy and non-separable data.


In [None]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import make_classification
from sklearn.svm import SVC

In [None]:
X, y = make_classification(
    n_samples=100,
    n_features=2,
    n_redundant=0,
    n_informative=2,
    n_clusters_per_class=1,
    class_sep=2.0,
    random_state=42,
)

y = np.where(y == 0, -1, 1)

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=y, cmap="bwr")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.title("Linearly Separable Data")
plt.show()


In [None]:
hard_svm = SVC(kernel="linear", C=1e6)
hard_svm.fit(X, y)


In [None]:
def plot_svm_decision_boundary(model, X, y):
    w = model.coef_[0]
    b = model.intercept_[0]

    x0 = np.linspace(X[:, 0].min()-1, X[:, 0].max()+1, 200)
    x1 = -(w[0] * x0 + b) / w[1]

    margin = 1 / np.linalg.norm(w)
    x1_up = x1 + margin
    x1_down = x1 - margin

    plt.scatter(X[:, 0], X[:, 1], c=y, cmap="bwr")
    plt.plot(x0, x1, "k-", label="Decision boundary")
    plt.plot(x0, x1_up, "k--")
    plt.plot(x0, x1_down, "k--")
    plt.legend()
    plt.show()


In [None]:
plot_svm_decision_boundary(hard_svm, X, y)
