### Code for generating 20 News dataset

In [1]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB

import numpy as np
import time

In [2]:
news = fetch_20newsgroups(subset='train')
Text_train = news.data
Y_train = news.target

news_test = fetch_20newsgroups(subset='test')
Text_test = news_test.data
Y_test = news_test.target

In [3]:
processor = CountVectorizer(max_features=5000, stop_words='english')
processor.fit(Text_train)
Sparse_train = processor.transform(Text_train)
Sparse_test = processor.transform(Text_test)

In [4]:
X_train = np.array(Sparse_train.todense())
X_test = np.array(Sparse_test.todense())

In [5]:
labels = news.target_names
vocabulary = processor.vocabulary_

#### Dataset with:

X_train, X_test, Y_train, Y_test, labels, vocabulary

In [6]:
models = {
    'LogisticRegression': LogisticRegression()
    ,'GaussianNB': GaussianNB()
}

In [7]:
classifiers = {}
evaluations = {}

Ls = [0, 10, 100, 1000]
for L in Ls:
    classifiers_ = {}
    evaluations_ = {}
    
    W = np.random.randn(X_train.shape[1], L) / np.sqrt(X_train.shape[1])
    bias = np.random.randn(1, L)

    X_new_train = np.hstack((X_train, 
                               np.tanh(X_train.dot(W) + bias)))
    X_new_test = np.hstack((X_test,
                              np.tanh(X_test.dot(W) + bias)))

    for name, model in models.items():
        print('\nEvaluating model {} with {:d} neurons'.format(name, L))
        start = time.time()
        classifier = model.fit(X_new_train, Y_train)
        end = time.time()
        training_time = end - start
        print("Training time %.3fs"%(training_time))

        start = time.time()
        score = classifier.score(X_new_test, Y_test)
        end = time.time()
        scoring_time = end - start

        classifiers_[name] = classifier
        evaluations_[name] = {}
        evaluations_[name]['training_time'] = training_time
        evaluations_[name]['scoring_time'] = training_time
        evaluations_[name]['score'] = score

        print("Test set score for {}: {:.2f} with {:d} neurons".format(name, score, L))
        print("Scoring time {:.3f}s with {:d} neurons".format(scoring_time, L))
    
    classifiers[L] = classifiers_
    evaluations[L] = evaluations_


Evaluating model LogisticRegression with 0 neurons
Training time 19.253s
Test set score for LogisticRegression: 0.77 with 0 neurons
Scoring time 0.100s with 0 neurons

Evaluating model GaussianNB with 0 neurons
Training time 0.948s
Test set score for GaussianNB: 0.60 with 0 neurons
Scoring time 7.573s with 0 neurons

Evaluating model LogisticRegression with 10 neurons
Training time 20.450s
Test set score for LogisticRegression: 0.77 with 10 neurons
Scoring time 0.091s with 10 neurons

Evaluating model GaussianNB with 10 neurons
Training time 0.932s
Test set score for GaussianNB: 0.60 with 10 neurons
Scoring time 7.623s with 10 neurons

Evaluating model LogisticRegression with 100 neurons
Training time 31.818s
Test set score for LogisticRegression: 0.77 with 100 neurons
Scoring time 0.095s with 100 neurons

Evaluating model GaussianNB with 100 neurons
Training time 0.965s
Test set score for GaussianNB: 0.60 with 100 neurons
Scoring time 7.832s with 100 neurons

Evaluating model Logisti