In [2]:
import numpy as np
import matplotlib.pyplot as plt

In [3]:
np.random.seed(2)
n = 10
x1 = np.random.uniform(-5, 5, n)
x2 = x1 + np.random.uniform(2, 4, n) * np.random.choice([-1, 1], n)
y = []
colors = []
for i in range(n):
    colors.append('r' if x1[i] > x2[i] else 'b')
    if x1[i] > x2[i]:
        y.append(0)
    else:
        y.append(1)
print(y)

[0, 0, 0, 1, 1, 1, 1, 0, 0, 0]


In [42]:
plt.clf()
plt.xlim(-5, 2), plt.ylim(-8, 3)
plt.scatter(x2, x1, c=colors)
plt.axis('scaled')
plt.plot([-5, 2], [-11.19433011, 4.57531497])
plt.plot([-5, 2], [-6.77427117384, 8.99546558026], c='k')
plt.plot([-5, 10], [-15.6041871921, 18.1905676917], c='k')
#plt.show()
plt.savefig('support.pdf', bbox_inches='tight')

In [18]:
n_features = [100, 500, 1000, 2500, 5000, 10000, 15000, 20000]
tfidf = [0.73444, 0.83364, 0.86092, 0.88272, 0.88588, 0.88964, 0.8932, 0.8944]
bvec = [0.72532, 0.83132, 0.85664, 0.87316, 0.84644, 0.8566, 0.86228, 0.86644]
fvec = [0.73324, 0.83312, 0.85816, 0.87436, 0.84312, 0.854, 0.86184, 0.86464]
plt.plot(n_features, bvec, 'r-o', label='Binarna vektorizacija')
plt.plot(n_features, fvec, 'g-o', label='Frekvencijska vektorizacija')
plt.plot(n_features, tfidf, 'b-o', label='TF-IDF vektorizacija')
plt.legend()
plt.savefig('vec_model.pdf')
plt.clf()

In [32]:
grams = ['(1, 1)', '(1, 2)', '(1, 3)', '(1, 4)', '(2, 2)', '(2, 3)', '(3, 3)']
y_pos = np.arange(len(grams))
results = [0.8944, 0.89892, 0.89908, 0.89884, 0.83892, 0.83464, 0.71508]

rects = plt.bar(y_pos, results, align='center', alpha=0.5)
plt.xticks(y_pos, grams)

for i, rect in enumerate(rects):
        height = rect.get_height()
        plt.text(rect.get_x() + rect.get_width()/2., 1*height,
                results[i],
                ha='center', va='bottom')
plt.savefig('grams.pdf')
plt.clf()

In [33]:
from sklearn.svm import LinearSVC
svm = LinearSVC()

train = np.vstack((x2, x1)).T
svm.fit(train, y)

w = svm.coef_
b = svm.intercept_

y1 = (- w[0, 0] / w[0, 1]) * -5 + (-b / w[0, 1])
y2 = (- w[0, 0] / w[0, 1]) * 2 + (-b / w[0, 1])
print(y1)
print(y2)

c1 = x1[2] - (- w[0, 0] / w[0, 1]) * x2[2]
y1 = (- w[0, 0] / w[0, 1]) * -5 + c1
y2 = (- w[0, 0] / w[0, 1]) * 2 + c1
print(y1)
print(y2)

c1 = x1[4] - (- w[0, 0] / w[0, 1]) * x2[4]
y1 = (- w[0, 0] / w[0, 1]) * -5 + c1
y2 = (- w[0, 0] / w[0, 1]) * 10 + c1
print(y1)
print(y2) 

print(train)

[-11.19513243]
[ 4.57575318]
-6.77480087383
8.99608473861
-15.6041871921
18.1905676917
[[-3.88231864 -0.64005098]
 [-7.79902187 -4.74073768]
 [-1.77253511  0.49662478]
 [ 2.38038017 -0.64677607]
 [ 1.57255775 -0.79632198]
 [ 1.87401851 -1.69665179]
 [ 0.75443693 -2.95351366]
 [-1.79576401  1.19270966]
 [-5.69657623 -2.00345326]
 [-4.4910182  -2.33172725]]


In [3]:
np.random.seed(1)
x1 = np.random.randn(100)
x2 = np.random.randn(100)
X = np.vstack((x1, x2)).T
y = []
h = .02

for i in range(100):
    d = X[i, 0] * X[i, 0] + X[i, 1] * X[i, 1]
    if d <= 1:
        y.append(0)
    else:
        y.append(1)

from sklearn import svm
linear = svm.LinearSVC().fit(X, y)
radial = svm.SVC(kernel='rbf').fit(X, y)

x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))

titles = ['Linearna jezgra', 'Radijalna jezgra']

for i, clf in enumerate((linear, radial)):
    plt.subplot(1, 2, i + 1)
    plt.subplots_adjust(wspace=0.4, hspace=0.4)
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8)

    # Plot also the training points
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.coolwarm)
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.xticks(())
    plt.yticks(())
    plt.title(titles[i])

plt.savefig('kernel.pdf')