In [1]:
import numpy as np
import pandas as pd
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.preprocessing import MinMaxScaler

In [4]:
X_train = pd.read_csv('../data/liar_dataset/text_seq_data/word_seq_train.csv')
X_valid = pd.read_csv('../data/liar_dataset/text_seq_data/word_seq_valid.csv')
X_test = pd.read_csv('../data/liar_dataset/text_seq_data/word_seq_test.csv')

In [5]:
y_train = pd.read_csv('../data/liar_dataset/label_seq_data/train_label.csv')
y_valid = pd.read_csv('../data/liar_dataset/label_seq_data/valid_label.csv')
y_test = pd.read_csv('../data/liar_dataset/label_seq_data/test_label.csv')

In [6]:
X_train = MinMaxScaler().fit_transform(np.array(X_train))
X_valid = MinMaxScaler().fit_transform(np.array(X_valid))
X_test = MinMaxScaler().fit_transform(np.array(X_test))
y_train = np.ravel(y_train)
y_valid = np.ravel(y_valid)
y_test = np.ravel(y_test)

## Multiclass classification

best: rbf
* valid : 0.25
* test  : 0.20


In [7]:
clf = svm.SVC(kernel='linear')
clf.fit(X_train,y_train)
y_pred_valid = clf.predict(X_valid)
y_pred_test = clf.predict(X_test)

print('Kernel: linear')
print('Validation')
print(classification_report(y_valid,y_pred_valid))
print()
print('Testing')
print(classification_report(y_test,y_pred_test))


Kernel: linear
Validation
              precision    recall  f1-score   support

          -3       0.00      0.00      0.00        98
          -2       0.24      0.57      0.34       241
          -1       0.25      0.00      0.01       220
           1       0.25      0.48      0.33       233
           2       0.22      0.15      0.18       233
           3       0.00      0.00      0.00       158

    accuracy                           0.24      1183
   macro avg       0.16      0.20      0.14      1183
weighted avg       0.19      0.24      0.17      1183


Testing
              precision    recall  f1-score   support

          -3       0.00      0.00      0.00        83
          -2       0.20      0.54      0.29       222
          -1       0.25      0.01      0.01       195
           1       0.20      0.35      0.26       246
           2       0.19      0.12      0.15       222
           3       0.00      0.00      0.00       199

    accuracy                           0.2

  _warn_prf(average, modifier, msg_start, len(result))


In [10]:
clf = svm.SVC(kernel='poly')
clf.fit(X_train,y_train)
y_pred_valid_poly = clf.predict(X_valid)
y_pred_test_poly = clf.predict(X_test)

print('Kernel: poly')
print('Validation')
print(classification_report(y_valid,y_pred_valid_poly))
print()
print('Testing')
print(classification_report(y_test,y_pred_test_poly))
print()

Kernel: poly
Validation
              precision    recall  f1-score   support

          -3       0.00      0.00      0.00        98
          -2       0.21      0.59      0.31       241
          -1       0.17      0.02      0.04       220
           1       0.24      0.33      0.28       233
           2       0.23      0.13      0.17       233
           3       0.12      0.03      0.04       158

    accuracy                           0.22      1183
   macro avg       0.16      0.18      0.14      1183
weighted avg       0.19      0.22      0.17      1183


Testing
              precision    recall  f1-score   support

          -3       0.00      0.00      0.00        83
          -2       0.19      0.58      0.29       222
          -1       0.11      0.02      0.03       195
           1       0.22      0.28      0.25       246
           2       0.23      0.14      0.17       222
           3       0.10      0.02      0.03       199

    accuracy                           0.20 

  _warn_prf(average, modifier, msg_start, len(result))


In [8]:
clf = svm.SVC(kernel='rbf')
clf.fit(X_train,y_train)
y_pred_valid_rbf = clf.predict(X_valid)
y_pred_test_rbf = clf.predict(X_test)

print('Kernel: rbf')
print('Validation')
print(classification_report(y_valid,y_pred_valid_rbf))
print()
print('Testing')
print(classification_report(y_test,y_pred_test_rbf))

Kernel: rbf
Validation
              precision    recall  f1-score   support

          -3       0.00      0.00      0.00        98
          -2       0.25      0.45      0.32       241
          -1       0.00      0.00      0.00       220
           1       0.25      0.52      0.33       233
           2       0.24      0.27      0.25       233
           3       0.00      0.00      0.00       158

    accuracy                           0.25      1183
   macro avg       0.12      0.21      0.15      1183
weighted avg       0.15      0.25      0.18      1183


Testing
              precision    recall  f1-score   support

          -3       0.00      0.00      0.00        83
          -2       0.20      0.41      0.27       222
          -1       0.00      0.00      0.00       195
           1       0.20      0.38      0.26       246
           2       0.20      0.23      0.21       222
           3       0.00      0.00      0.00       199

    accuracy                           0.20  

  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
clf = svm.SVC(kernel='sigmoid')
clf.fit(X_train,y_train)
y_pred_valid_s = clf.predict(X_valid)
y_pred_test_s = clf.predict(X_test)

print('Kernel: sigmoid')
print('Validation')
print(classification_report(y_valid,y_pred_valid_s))
print()
print('Testing')
print(classification_report(y_test,y_pred_test_s))


Kernel: sigmoid
Validation
              precision    recall  f1-score   support

          -3       0.08      0.14      0.10        98
          -2       0.20      0.24      0.22       241
          -1       0.17      0.00      0.01       220
           1       0.25      0.10      0.14       233
           2       0.20      0.09      0.12       233
           3       0.15      0.48      0.23       158

    accuracy                           0.16      1183
   macro avg       0.17      0.18      0.14      1183
weighted avg       0.19      0.16      0.14      1183


Testing
              precision    recall  f1-score   support

          -3       0.08      0.18      0.11        83
          -2       0.18      0.23      0.20       222
          -1       0.00      0.00      0.00       195
           1       0.27      0.09      0.14       246
           2       0.21      0.09      0.13       222
           3       0.16      0.42      0.24       199

    accuracy                           0.

In [11]:
x_train_precomp = np.dot(X_train,X_train.T)
x_test_precomp = np.dot(X_test,X_train.T)
x_valid_precomp = np.dot(X_valid,X_train.T)
clf = svm.SVC(kernel='precomputed')
clf.fit(x_train_precomp,y_train)

y_pred_valid_p = clf.predict(x_valid_precomp)
y_pred_test_p = clf.predict(x_test_precomp)

print('Kernel: precomputed')
print('Validation')
print(classification_report(y_valid,y_pred_valid_p))
print()
print('Testing')
print(classification_report(y_test,y_pred_test_p))


Kernel: precomputed
Validation
              precision    recall  f1-score   support

          -3       0.00      0.00      0.00        98
          -2       0.24      0.57      0.34       241
          -1       0.25      0.00      0.01       220
           1       0.25      0.48      0.33       233
           2       0.22      0.15      0.18       233
           3       0.00      0.00      0.00       158

    accuracy                           0.24      1183
   macro avg       0.16      0.20      0.14      1183
weighted avg       0.19      0.24      0.17      1183


Testing
              precision    recall  f1-score   support

          -3       0.00      0.00      0.00        83
          -2       0.20      0.54      0.29       222
          -1       0.25      0.01      0.01       195
           1       0.20      0.35      0.26       246
           2       0.19      0.12      0.15       222
           3       0.00      0.00      0.00       199

    accuracy                         

  _warn_prf(average, modifier, msg_start, len(result))


**Warnings arise as there are some values which exist in y_test/valid but not in the corresponding predicted version.**

In [21]:
print(set(y_valid) - set(y_pred_valid_p))

{-3}


## Binomial classification

best: rbf
*   valid : 0.54 
*   test  : 0.57



In [12]:
y_valid2 = [(1 if y_valid[i]>0 else 0) for i in range(len(y_valid))]
y_test2 = [(1 if y_test[i]>0 else 0) for i in range(len(y_test))]
y_train2 = [(1 if y_train[i]>0 else 0) for i in range(len(y_train))]

In [13]:
clf2 = svm.SVC(kernel='linear')
clf2.fit(X_train,y_train2)
y_pred_valid2 = clf2.predict(X_valid)
y_pred_test2 = clf2.predict(X_test)

print('Kernel: linear')
print('Validation')
print(classification_report(y_valid2,y_pred_valid2))
print()
print('Testing')
print(classification_report(y_test2,y_pred_test2))


Kernel: linear
Validation
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       559
           1       0.53      1.00      0.69       624

    accuracy                           0.53      1183
   macro avg       0.26      0.50      0.34      1183
weighted avg       0.28      0.53      0.36      1183


Testing
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       500
           1       0.57      1.00      0.73       667

    accuracy                           0.57      1167
   macro avg       0.29      0.50      0.36      1167
weighted avg       0.33      0.57      0.42      1167



  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
clf2 = svm.SVC(kernel='poly')
clf2.fit(X_train,y_train2)
y_pred_valid_poly2 = clf2.predict(X_valid)
y_pred_test_poly2 = clf2.predict(X_test)

print('Kernel: poly')
print('Validation')
print(classification_report(y_valid2,y_pred_valid_poly2))
print()
print('Testing')
print(classification_report(y_test2,y_pred_test_poly2))
print()

Kernel: poly
Validation
              precision    recall  f1-score   support

           0       0.19      0.01      0.01       559
           1       0.52      0.97      0.68       624

    accuracy                           0.52      1183
   macro avg       0.36      0.49      0.35      1183
weighted avg       0.37      0.52      0.37      1183


Testing
              precision    recall  f1-score   support

           0       0.38      0.01      0.01       500
           1       0.57      0.99      0.73       667

    accuracy                           0.57      1167
   macro avg       0.47      0.50      0.37      1167
weighted avg       0.49      0.57      0.42      1167




In [16]:
clf2 = svm.SVC(kernel='rbf')
clf2.fit(X_train,y_train2)
y_pred_valid_rbf2 = clf2.predict(X_valid)
y_pred_test_rbf2 = clf2.predict(X_test)

print('Kernel: rbf')
print('Validation')
print(classification_report(y_valid2,y_pred_valid_rbf2))
print()
print('Testing')
print(classification_report(y_test2,y_pred_test_rbf2))

Kernel: rbf
Validation
              precision    recall  f1-score   support

           0       0.62      0.06      0.12       559
           1       0.54      0.96      0.69       624

    accuracy                           0.54      1183
   macro avg       0.58      0.51      0.40      1183
weighted avg       0.58      0.54      0.42      1183


Testing
              precision    recall  f1-score   support

           0       0.45      0.06      0.10       500
           1       0.57      0.95      0.71       667

    accuracy                           0.57      1167
   macro avg       0.51      0.50      0.41      1167
weighted avg       0.52      0.57      0.45      1167



In [17]:
clf2 = svm.SVC(kernel='sigmoid')
clf2.fit(X_train,y_train2)
y_pred_valid_s2 = clf2.predict(X_valid)
y_pred_test_s2 = clf2.predict(X_test)

print('Kernel: sigmoid')
print('Validation')
print(classification_report(y_valid2,y_pred_valid_s2))
print()
print('Testing')
print(classification_report(y_test2,y_pred_test_s2))


Kernel: sigmoid
Validation
              precision    recall  f1-score   support

           0       0.49      0.42      0.45       559
           1       0.54      0.61      0.57       624

    accuracy                           0.52      1183
   macro avg       0.52      0.52      0.51      1183
weighted avg       0.52      0.52      0.52      1183


Testing
              precision    recall  f1-score   support

           0       0.44      0.42      0.43       500
           1       0.58      0.61      0.59       667

    accuracy                           0.53      1167
   macro avg       0.51      0.51      0.51      1167
weighted avg       0.52      0.53      0.52      1167



In [18]:
x_train_precomp = np.dot(X_train,X_train.T)
x_test_precomp = np.dot(X_test,X_train.T)
x_valid_precomp = np.dot(X_valid,X_train.T)
clf2 = svm.SVC(kernel='precomputed')
clf2.fit(x_train_precomp,y_train2)

y_pred_valid_p2 = clf2.predict(x_valid_precomp)
y_pred_test_p2 = clf2.predict(x_test_precomp)

print('Kernel: precomputed')
print('Validation')
print(classification_report(y_valid2,y_pred_valid_p2))
print()
print('Testing')
print(classification_report(y_test2,y_pred_test_p2))


Kernel: precomputed
Validation
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       559
           1       0.53      1.00      0.69       624

    accuracy                           0.53      1183
   macro avg       0.26      0.50      0.34      1183
weighted avg       0.28      0.53      0.36      1183


Testing
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       500
           1       0.57      1.00      0.73       667

    accuracy                           0.57      1167
   macro avg       0.29      0.50      0.36      1167
weighted avg       0.33      0.57      0.42      1167



  _warn_prf(average, modifier, msg_start, len(result))
