In [1]:
import numpy as np
import sklearn
import scipy
import matplotlib.pyplot as plt
from load_data import *
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.datasets import make_classification
import time
from pathlib import Path

## SNIPS

In [2]:
def train_svm(config):
    save_dir = Path('embeddings/snips_word2vec_encodings')
    save_dir = save_dir / config
    X_train = np.load(save_dir / 'X_train.npy')
    y_train = np.load(save_dir / 'y_train.npy')
    X_valid = np.load(save_dir / 'X_valid.npy')
    y_valid = np.load(save_dir / 'y_valid.npy')
    X_test = np.load(save_dir / 'X_test.npy')
    y_test = np.load(save_dir / 'y_test.npy')

    clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
    clf.fit(X_train, y_train)
    
    print(config)
    print("accuracy_train: ", clf.score(X_train, y_train))
    print("accuracy_val: ", clf.score(X_valid, y_valid))
    f1_score_train = sklearn.metrics.f1_score(y_train, clf.predict(X_train), average = 'weighted')
    f1_score_val = sklearn.metrics.f1_score(y_valid, clf.predict(X_valid), average = 'weighted')
    print("f1_score_train: ", f1_score_train)
    print("f1_score_val: ", f1_score_val)
    print()

In [4]:
for config in GENSIM_PRETRAINED_MODELS:
    try:
        train_svm(config)
    except:
        continue

glove-wiki-gigaword-200
accuracy_train:  0.9937375897357569
accuracy_val:  0.9782608695652174
f1_score_train:  0.993741109896011
f1_score_val:  0.9783023344397888

glove-wiki-gigaword-300
accuracy_train:  0.994959523445853
accuracy_val:  0.9797101449275363
f1_score_train:  0.9949600239618936
f1_score_val:  0.9796505892649955

word2vec-google-news-300
accuracy_train:  0.995112265159615
accuracy_val:  0.9753623188405797
f1_score_train:  0.9951126262934119
f1_score_val:  0.9754839805389101

glove-twitter-25
accuracy_train:  0.9483733007484344
accuracy_val:  0.908695652173913
f1_score_train:  0.9484247784383169
f1_score_val:  0.9090702859729031

glove-twitter-50
accuracy_train:  0.9823583320604857
accuracy_val:  0.9652173913043478
f1_score_train:  0.982348029663638
f1_score_val:  0.9650858641342506

glove-twitter-100
accuracy_train:  0.9921338017412555
accuracy_val:  0.9710144927536232
f1_score_train:  0.9921381618515891
f1_score_val:  0.9710012140117622

glove-twitter-200
accuracy_train: 

## ATIS (mini)

In [5]:
def train_svm(config):
    save_dir = Path('embeddings/atis_word2vec_encodings')
    save_dir = save_dir / config
    X_train = np.load(save_dir / 'X_train.npy')
    y_train = np.load(save_dir / 'y_train.npy')
    X_valid = np.load(save_dir / 'X_valid.npy')
    y_valid = np.load(save_dir / 'y_valid.npy')
    X_test = np.load(save_dir / 'X_test.npy')
    y_test = np.load(save_dir / 'y_test.npy')

    clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
    clf.fit(X_train, y_train)
    
    print(config)
    print("accuracy_train: ", clf.score(X_train, y_train))
    print("accuracy_val: ", clf.score(X_valid, y_valid))
    f1_score_train = sklearn.metrics.f1_score(y_train, clf.predict(X_train), average = 'weighted')
    f1_score_val = sklearn.metrics.f1_score(y_valid, clf.predict(X_valid), average = 'weighted')
    print("f1_score_train: ", f1_score_train)
    print("f1_score_val: ", f1_score_val)
    print()

In [6]:
for config in GENSIM_PRETRAINED_MODELS:
    try:
        train_svm(config)
    except:
        continue

glove-wiki-gigaword-200
accuracy_train:  0.9801829268292683
accuracy_val:  0.9462809917355371
f1_score_train:  0.978293492985534
f1_score_val:  0.9397114379873001

glove-wiki-gigaword-300
accuracy_train:  0.9821428571428571
accuracy_val:  0.9421487603305785
f1_score_train:  0.9803982698646521
f1_score_val:  0.9356311365402274

word2vec-google-news-300
accuracy_train:  0.985191637630662
accuracy_val:  0.9586776859504132
f1_score_train:  0.9837651217744696
f1_score_val:  0.9531581023747367

glove-twitter-25
accuracy_train:  0.9383710801393729
accuracy_val:  0.8966942148760331
f1_score_train:  0.9299545300083046
f1_score_val:  0.8813251279643436

glove-twitter-50
accuracy_train:  0.9636324041811847
accuracy_val:  0.9173553719008265
f1_score_train:  0.9587985209882368
f1_score_val:  0.9019482516834882

glove-twitter-100
accuracy_train:  0.9734320557491289
accuracy_val:  0.9297520661157025
f1_score_train:  0.9701177967912332
f1_score_val:  0.9196243628779515

glove-twitter-200
accuracy_trai