/
Linear_SVM_Soft_Margin_using_GD.py
120 lines (87 loc) · 3.3 KB
/
Linear_SVM_Soft_Margin_using_GD.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
class LinearSVMUsingSoftMargin:
def __init__(self, C=1.0):
self._support_vectors = None
self.C = C
self.beta = None
self.b = None
self.X = None
self.y = None
# n is the number of data points
self.n = 0
# d is the number of dimensions
self.d = 0
def __decision_function(self, X):
return X.dot(self.beta) + self.b
def __cost(self, margin):
return (1 / 2) * self.beta.dot(self.beta) + self.C * np.sum(np.maximum(0, 1 - margin))
def __margin(self, X, y):
return y * self.__decision_function(X)
def fit(self, X, y, lr=1e-3, epochs=500):
# Initialize Beta and b
self.n, self.d = X.shape
self.beta = np.random.randn(self.d)
self.b = 0
# Required only for plotting
self.X = X
self.y = y
loss_array = []
for _ in range(epochs):
margin = self.__margin(X, y)
misclassified_pts_idx = np.where(margin < 1)[0]
d_beta = self.beta - self.C * y[misclassified_pts_idx].dot(X[misclassified_pts_idx])
self.beta = self.beta - lr * d_beta
d_b = - self.C * np.sum(y[misclassified_pts_idx])
self.b = self.b - lr * d_b
loss = self.__cost(margin)
loss_array.append(loss)
self._support_vectors = np.where(self.__margin(X, y) <= 1)[0]
def predict(self, X):
return np.sign(self.__decision_function(X))
def score(self, X, y):
P = self.predict(X)
return np.mean(y == P)
def plot_decision_boundary(self):
plt.scatter(self.X[:, 0], self.X[:, 1], c=self.y, s=50, cmap=plt.cm.Paired, alpha=.7)
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
# create grid to evaluate model
xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = self.__decision_function(xy).reshape(XX.shape)
# plot decision boundary and margins
ax.contour(XX, YY, Z, colors=['r', 'b', 'r'], levels=[-1, 0, 1], alpha=0.5,
linestyles=['--', '-', '--'], linewidths=[2.0, 2.0, 2.0])
# highlight the support vectors
ax.scatter(self.X[:, 0][self._support_vectors], self.X[:, 1][self._support_vectors], s=100,
linewidth=1, facecolors='none', edgecolors='k')
plt.show()
def load_data(cols):
iris = sns.load_dataset("iris")
iris = iris.tail(100)
le = preprocessing.LabelEncoder()
y = le.fit_transform(iris["species"])
X = iris.drop(["species"], axis=1)
if len(cols) > 0:
X = X[cols]
return X.values, y
if __name__ == '__main__':
# make sure the targets are (-1, +1)
cols = ["petal_length", "petal_width"]
X, y = load_data(cols)
y[y == 0] = -1
# scale the data
scaler = StandardScaler()
X = scaler.fit_transform(X)
# now we'll use our custom implementation
model = LinearSVMUsingSoftMargin(C=15.0)
model.fit(X, y)
print("train score:", model.score(X, y))
model.plot_decision_boundary()