In [1]:
import tensorflow as tf
import autokeras as ak

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_visible_devices(gpus[-1], 'GPU')

### Load 20newsgroup dataset

In [3]:
import numpy as np
from sklearn.datasets import fetch_20newsgroups


categories = ['rec.autos', 'rec.motorcycles']

news_train = fetch_20newsgroups(subset='train', shuffle=True, random_state=42, categories=categories)
news_test = fetch_20newsgroups(subset='test', shuffle=True, random_state=42, categories=categories)

doc_train, label_train = np.array(news_train.data), np.array(news_train.target)
doc_test, label_test =  np.array(news_test.data), np.array(news_test.target)

In [4]:
print("Unique labels {}. \nNumber of unique labels: {}.\n\n"
      .format(np.unique(label_train), len(np.unique(label_train))))

print("The number of documents for training: {}.".format(len(doc_train)))
print("The number of documents for testing: {}.".format(len(doc_test)))

type(doc_train[0]), doc_train[0]

Unique labels [0 1]. 
Number of unique labels: 2.


The number of documents for training: 1192.
The number of documents for testing: 794.


(numpy.str_,

### Run the TextClassifier

In [10]:
# Initialize the text classifier.
clf = ak.TextClassifier(max_trials=3, overwrite=True) # It tries 3 different models. overwrite the preious history

# Feed the text classifier with training data.
clf.fit(doc_train, label_train, verbose=2)

Trial 3 Complete [00h 14m 41s]
val_loss: 0.6953225135803223

Best val_loss So Far: 0.03908782824873924
Total elapsed time: 00h 15m 18s
INFO:tensorflow:Oracle triggered exit
Epoch 1/13
38/38 - 1s - loss: 0.6981 - accuracy: 0.5092
Epoch 2/13
38/38 - 1s - loss: 0.6098 - accuracy: 0.7206
Epoch 3/13
38/38 - 1s - loss: 0.2573 - accuracy: 0.9178
Epoch 4/13
38/38 - 1s - loss: 0.0717 - accuracy: 0.9773
Epoch 5/13
38/38 - 1s - loss: 0.0178 - accuracy: 0.9941
Epoch 6/13
38/38 - 1s - loss: 0.0064 - accuracy: 0.9992
Epoch 7/13
38/38 - 1s - loss: 0.0037 - accuracy: 0.9992
Epoch 8/13
38/38 - 1s - loss: 0.0031 - accuracy: 0.9992
Epoch 9/13
38/38 - 1s - loss: 0.0023 - accuracy: 0.9992
Epoch 10/13
38/38 - 1s - loss: 0.0017 - accuracy: 0.9992
Epoch 11/13
38/38 - 1s - loss: 0.0017 - accuracy: 0.9992
Epoch 12/13
38/38 - 1s - loss: 0.0018 - accuracy: 0.9992
Epoch 13/13
38/38 - 1s - loss: 0.0023 - accuracy: 0.9992
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are a

### Retrieve best model

In [11]:
best_model = clf.export_model()

In [12]:
best_model.summary()

Model: "functional_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None,)]                 0         
_________________________________________________________________
tf_op_layer_ExpandDims (Tens (None, 1)                 0         
_________________________________________________________________
text_vectorization (TextVect (None, 512)               0         
_________________________________________________________________
embedding (Embedding)        (None, 512, 64)           320064    
_________________________________________________________________
dropout (Dropout)            (None, 512, 64)           0         
_________________________________________________________________
conv1d (Conv1D)              (None, 508, 256)          82176     
_________________________________________________________________
global_max_pooling1d (Global (None, 256)              


### Predict with the best model.


In [13]:
predicted_y = clf.predict(doc_test)
print(predicted_y)

[[0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [1.]
 [0.]
 [1.]
 [1.]
 [0.]
 [0.]
 [1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [1.]
 [0.]
 [1.]
 [1.]
 [1.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]
 [0.]
 [0.]
 [1.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [0.]
 [0.]
 [1.]
 [1.]
 [1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [1.]
 [0.]
 [1.]
 [0.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [1.]
 [1.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [1.]
 [1.]
 [0.]
 [0.]
 [1.]
 [1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [1.]
 [0.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [0.]
 [1.]
 [0.]
 [0.]
 [0.]
 [1.]
 [1.]
 [1.

### Evaluate the best model on the test data.

In [14]:

test_loss, test_acc = clf.evaluate(doc_test, label_test, verbose=0)
print('Test accuracy: ', test_acc)


Test accuracy:  0.9609571695327759


## Reference
[ImageClassifier](/image_classifier),
[AutoModel](/auto_model/#automodel-class),
[ImageBlock](/block/#imageblock-class),
[Normalization](/preprocessor/#normalization-class),
[ImageAugmentation](/preprocessor/#image-augmentation-class),
[ResNetBlock](/block/#resnetblock-class),
[ImageInput](/node/#imageinput-class),
[ClassificationHead](/head/#classificationhead-class).