### Import necessary libraries

In [1]:
import numpy as np
import pandas as pd
import autokeras as ak
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

### Load Amazon Review Dataset

In [2]:
# load the dataset
data = open('amz_review_data/amz_reviews').read()
labels, texts = [], []
for i, line in enumerate(data.split("\n")):
    content = line.split()
    labels.append(content[0][-1])
    texts.append(" ".join(content[1:]))

# create a dataframe using texts and lables
data_set = pd.DataFrame()
data_set['text'] = texts
data_set['label'] = labels

In [3]:
data_set.head()

Unnamed: 0,text,label
0,Stuning even for the non-gamer: This sound tra...,2
1,The best soundtrack ever to anything.: I'm rea...,2
2,Amazing!: This soundtrack is my favorite music...,2
3,Excellent Soundtrack: I truly like this soundt...,2
4,"Remember, Pull Your Jaw Off The Floor After He...",2


### Train Test Split

In [4]:
classes = list(set(labels))

x_train, x_test, y_train, y_test = train_test_split(data_set['text'], data_set['label'], test_size=0.2)

x_train = np.array(x_train)
x_test = np.array(x_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)  

(8000,) (8000,) (2000,) (2000,)


### Fit and predict

In [5]:
# Initialize the automl text classifier.
autoclf = ak.TextClassifier(
    overwrite=True, max_trials=1
)

# Fit the classifier with training data.
autoclf.fit(x_train, y_train, epochs=2)

# Predict lables using the best model.
predicted_y = autoclf.predict(x_test)

Trial 1 Complete [00h 00m 25s]
val_loss: 0.32315245270729065

Best val_loss So Far: 0.32315245270729065
Total elapsed time: 00h 00m 25s
INFO:tensorflow:Oracle triggered exit
Epoch 1/2
Epoch 2/2
INFO:tensorflow:Assets written to: ./text_classifier/best_model/assets


### Compute Metrics

In [6]:
tn, fp, fn, tp=confusion_matrix(y_test, predicted_y).ravel()
print("Precision: %.2f and Recall: %.2f"% (tp/(tp+fp), tp/(tp+fn)))

Precision: 0.87 and Recall: 0.87
