In [None]:
import pathlib
from tensorflow import keras
import tensorflow as tf

def vectorize(vectorizer, text, label):
    text = tf.expand_dims(text, -1)
    return vectorizer(text), label

dataset_url = "https://storage.googleapis.com/download.tensorflow.org/data/stack_overflow_16k.tar.gz"
data_dir = keras.utils.get_file(origin=dataset_url,
                            extract=True,
                            cache_subdir='datasets/stack_overflow')

data_dir = pathlib.Path(data_dir).parent
train_dir = pathlib.Path(data_dir, 'train')
test_dir = pathlib.Path(data_dir, 'test')

In [None]:
BATCH_SIZE = 32
SEED = 42

train = keras.utils.text_dataset_from_directory(
    train_dir,
    validation_split=0.2,
    subset='training',
    batch_size=BATCH_SIZE,
    seed=SEED)

val = keras.utils.text_dataset_from_directory(
    train_dir,
    validation_split=0.2,
    subset='validation',
    batch_size=BATCH_SIZE,
    seed=SEED)

In [None]:
print(f'Numero di classi: {train.class_names}')

In [None]:
VOCAB_SIZE = 1000

vec_layer = keras.layers.TextVectorization(
    max_tokens=VOCAB_SIZE,
    output_mode='binary')

In [None]:
train_ds = train.map(vec_layer, vectorize)
val_ds = val.map(vec_layer, vectorize)

In [None]:
model = keras.models.Sequential()
model.add(
    keras.layers.Dense(
        64,
        activation='relu',
        input_shape=(1000,),
        name='dense_1'))
model.add(
    keras.layers.Dense(
        4,
        activation='softmax',
        name='dense_2'))
model.compile(optimizer='adam',
            loss=keras.losses.SparseCategoricalCrossentropy(),
            metrics=['acc'])

history=model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10)