In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### 1. Core Idea

Support Vector Machines (SVM) is a supervised learning algorithm used for **classification and regression**.
Unlike many algorithms that look for any separating boundary, SVM finds the **optimal boundary** that separates classes with the **maximum margin**.

The **margin** is the distance between the decision boundary and the closest data points from each class.
These closest points are called **support vectors**. They fully determine the position of the boundary.

---

### 2. Hard Margin vs Soft Margin

Real-world data is rarely perfectly separable, so SVM allows two formulations:

**Hard Margin**

* Assumes perfect separation.
* No points are allowed inside the margin or on the wrong side.
* Very sensitive to noise.

**Soft Margin**

* Allows some misclassification and margin violations.
* More robust and widely used.

The trade-off is controlled by the **C parameter**:

* **High C**
  The model strongly penalizes errors. It tries to classify every point correctly, which can lead to overfitting.

* **Low C**
  The model allows more misclassification but keeps a wider margin, improving generalization.

---

### 3. The Kernel Trick

Many datasets are not linearly separable in their original feature space.
SVM solves this by mapping data into a **higher-dimensional space** where separation becomes easier.

Instead of explicitly creating new features, SVM uses the **kernel trick**, which computes inner products in high-dimensional space efficiently without performing the actual transformation. This makes nonlinear classification practical.

---

### 4. Common Kernels

Different kernels model different data patterns:

* **Linear Kernel**
  Best when the data is approximately linearly separable.

* **Polynomial Kernel**
  Useful for data with curved boundaries.

* **RBF (Radial Basis Function)**
  The most commonly used nonlinear kernel. It creates flexible decision boundaries by giving higher influence to nearby points.

---

### 5. Advantages and Limitations

**Advantages**

* Works well in high-dimensional feature spaces.
* Effective for complex nonlinear relationships using kernels.
* Depends only on support vectors, making it robust to many irrelevant points.

**Limitations**

* Computationally expensive for large datasets.
* Sensitive to noise when C is too high.
* Requires careful tuning of hyperparameters (C, kernel, gamma).

---

### Summary

SVM is a powerful algorithm for **small to medium-sized datasets with complex boundaries**. With the right kernel and parameter tuning, it can achieve very high performance, but it is not suitable for very large Kaggle datasets due to its computational cost.

---



In [None]:
import matplotlib.pyplot as plt
from scipy import stats

# use seaborn plotting defaults
import seaborn as sns; sns.set()
from matplotlib.axes._axes import _log as matplotlib_axes_logger
from mpl_toolkits import mplot3d
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from matplotlib.colors import ListedColormap

In [None]:
from sklearn.datasets import make_blobs
X,y = make_blobs(n_samples=50,centers=2,
                random_state=0,cluster_std=0.60)
plt.scatter(X[:,0],X[:,1],c=y,s=50,cmap='winter')

In [None]:
model = SVC(kernel='linear',C=1)
model.fit(X,y)

In [None]:
def plot_svc_decision_function(model, ax=None, plot_support=True):
    """Plot the decision function for a 2D SVC"""
    if ax is None:
        ax = plt.gca()
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    
    # create grid to evaluate model
    x = np.linspace(xlim[0], xlim[1], 30)
    y = np.linspace(ylim[0], ylim[1], 30)
    Y, X = np.meshgrid(y, x)
    xy = np.vstack([X.ravel(), Y.ravel()]).T
    P = model.decision_function(xy).reshape(X.shape)
    
    # plot decision boundary and margins
    ax.contour(X, Y, P, colors='k',
               levels=[-1, 0, 1], alpha=0.5,
               linestyles=['--', '-', '--'])
    
    # plot support vectors
    if plot_support:
        ax.scatter(model.support_vectors_[:, 0],
                   model.support_vectors_[:, 1],
                   s=300, linewidth=1, facecolors='none');
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='winter')
plot_svc_decision_function(model);

In [None]:
def plot_svm(N=10, ax=None):
    X, y = make_blobs(n_samples=200, centers=2,
                      random_state=0, cluster_std=0.60)
    X = X[:N]
    y = y[:N]
    model = SVC(kernel='linear', C=1E10)
    model.fit(X, y)
    
    ax = ax or plt.gca()
    ax.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='winter')
    ax.set_xlim(-1, 4)
    ax.set_ylim(-1, 6)
    plot_svc_decision_function(model, ax)

fig, ax = plt.subplots(1, 2, figsize=(16, 6))
fig.subplots_adjust(left=0.0625, right=0.95, wspace=0.1)
for axi, N in zip(ax, [60, 120]):
    plot_svm(N, axi)
    axi.set_title('N = {0}'.format(N))


Working with Almost Linearly Separable Dataset

In [None]:
X, y = make_blobs(n_samples=100, centers=2,
                  random_state=0, cluster_std=1.2)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='winter');

In [None]:
X, y = make_blobs(n_samples=100, centers=2,
                  random_state=0, cluster_std=0.8)

fig, ax = plt.subplots(1, 2, figsize=(16, 6))
fig.subplots_adjust(left=0.0625, right=0.95, wspace=0.1)

for axi, C in zip(ax, [100.0, 0.01]):
    model = SVC(kernel='linear', C=C).fit(X, y)
    axi.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='winter')
    plot_svc_decision_function(model, axi)
    axi.scatter(model.support_vectors_[:, 0],
                model.support_vectors_[:, 1],
                s=300, lw=1, facecolors='none');
    axi.set_title('C = {0:.1f}'.format(C), size=14)

In [None]:
from sklearn.datasets import make_circles
X, y = make_circles(100, factor=.1, noise=.1)

plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='bwr')

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)

In [None]:
classifier = SVC(kernel="linear")
classifier.fit(X_train, y_train.ravel())
y_pred = classifier.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

In [None]:
zero_one_colourmap = ListedColormap(('blue', 'red'))
def plot_decision_boundary(X, y, clf):
    X_set, y_set = X, y
    X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, 
                                 stop = X_set[:, 0].max() + 1, 
                                 step = 0.01),
                       np.arange(start = X_set[:, 1].min() - 1, 
                                 stop = X_set[:, 1].max() + 1, 
                                 step = 0.01))
  
    plt.contourf(X1, X2, clf.predict(np.array([X1.ravel(), 
                                             X2.ravel()]).T).reshape(X1.shape),
               alpha = 0.75, 
               cmap = zero_one_colourmap)
    plt.xlim(X1.min(), X1.max())
    plt.ylim(X2.min(), X2.max())
    for i, j in enumerate(np.unique(y_set)):
        plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1],
                c = (zero_one_colourmap)(i), label = j)
    plt.title('SVM Decision Boundary')
    plt.xlabel('X1')
    plt.ylabel('X2')
    plt.legend()
    return plt.show()

In [None]:
plot_decision_boundary(X, y, classifier)

In [None]:
def plot_3d_plot(X, y):
    r = np.exp(-(X ** 2).sum(1))
    ax = plt.subplot(projection='3d')
    ax.scatter3D(X[:, 0], X[:, 1], r, c=y, s=100, cmap='bwr')
    ax.set_xlabel('X1')
    ax.set_ylabel('X2')
    ax.set_zlabel('y')
    return ax

In [None]:
plot_3d_plot(X,y)

In [None]:

rbf_classifier = SVC(kernel="rbf")
rbf_classifier.fit(X_train, y_train)
y_pred = rbf_classifier.predict(X_test)

In [None]:
accuracy_score(y_test, y_pred)

In [None]:
plot_decision_boundary(X, y, rbf_classifier)

In [None]:
poly_classifier = SVC(kernel="poly",degree=2)
poly_classifier.fit(X_train, y_train)
y_pred = poly_classifier.predict(X_test)

In [None]:
accuracy_score(y_test, y_pred)

In [None]:
plot_decision_boundary(X, y, poly_classifier)