In [1]:
from warnings import catch_warnings, simplefilter
with catch_warnings():
    simplefilter('ignore')
    import numpy as np
    from scipy.sparse import lil_matrix
    from sklearn.svm import SVC
    from sklearn.metrics import classification_report
    from sklearn.metrics import accuracy_score
    from tensorflow.keras.datasets import imdb

In [2]:
def make_sparse(x, n_samples, n_features):
    sparse = lil_matrix((n_samples, n_features))
    for iter in range(0, n_samples):
        for item in x[iter]:
            sparse[iter, item] = 1
    return sparse

In [3]:
max_features = 10000
print('Loading data...')
with catch_warnings():
    simplefilter('ignore')
    (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
    x_train = make_sparse(x_train, 25000, max_features)
    x_test = make_sparse(x_test, 25000, max_features)
print(25000, 'train sequences')
print(25000, 'test sequences')

Loading data...
25000 train sequences
25000 test sequences


In [4]:
print('Build model...')
clf = SVC(kernel='rbf', probability=True)

Build model...


In [5]:
print('Train...')
clf.fit(x_train, y_train)
predictions = clf.predict(x_test)
print("SVM")
print(classification_report(y_test, predictions))
print('Test accuracy:', accuracy_score(y_test, predictions))

Train...
SVM
              precision    recall  f1-score   support

           0       0.89      0.88      0.88     12500
           1       0.88      0.89      0.89     12500

    accuracy                           0.89     25000
   macro avg       0.89      0.89      0.89     25000
weighted avg       0.89      0.89      0.89     25000

Test accuracy: 0.88536
