In [1]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras import preprocessing
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

In [2]:
url = "http://storage.googleapis.com/download.tensorflow.org/data/stack_overflow_16k.tar.gz"
tf.keras.utils.get_file("stack_overflow_16k.tar.gz", url, untar=True, cache_dir='.', cache_subdir='')

Downloading data from http://storage.googleapis.com/download.tensorflow.org/data/stack_overflow_16k.tar.gz


'.\\stack_overflow_16k.tar.gz'

In [4]:
batch_size=32
raw_train_ds=tf.keras.preprocessing.text_dataset_from_directory('train', batch_size=batch_size, validation_split=0.2, subset='training', seed=42)
raw_val_ds=tf.keras.preprocessing.text_dataset_from_directory('train', batch_size=batch_size, validation_split=0.2, subset='validation', seed=42)
raw_test_ds=tf.keras.preprocessing.text_dataset_from_directory('test', batch_size=batch_size)

Found 8000 files belonging to 4 classes.
Using 6400 files for training.
Found 8000 files belonging to 4 classes.
Using 1600 files for validation.
Found 8000 files belonging to 4 classes.


In [5]:
print(raw_train_ds.class_names)

['csharp', 'java', 'javascript', 'python']


In [6]:
for text_batch, label_batch in raw_train_ds.take(1):
    for i in range(5):
        print(text_batch.numpy()[i])
        print(label_batch.numpy()[i])
        print()

b'"my tester is going to the wrong constructor i am new to programming so if i ask a question that can be easily fixed, please forgive me. my program has a tester class with a main. when i send that to my regularpolygon class, it sends it to the wrong constructor. i have two constructors. 1 without perameters..public regularpolygon().    {.       mynumsides = 5;.       mysidelength = 30;.    }//end default constructor...and my second, with perameters. ..public regularpolygon(int numsides, double sidelength).    {.        mynumsides = numsides;.        mysidelength = sidelength;.    }// end constructor...in my tester class i have these two lines:..regularpolygon shape = new regularpolygon(numsides, sidelength);.        shape.menu();...numsides and sidelength were declared and initialized earlier in the testing class...so what i want to happen, is the tester class sends numsides and sidelength to the second constructor and use it in that class. but it only uses the default constructor, w

In [10]:
max_features=5000
embedding_dim=128
sequence_length=500

vectorize_layer=TextVectorization(
max_tokens=max_features, 
output_mode='int', 
output_sequence_length=sequence_length)

In [11]:
text_ds=raw_train_ds.map(lambda x, y:x)
vectorize_layer.adapt(text_ds)

In [12]:
def vectorize_text(text, label):
    text=tf.expand_dims(text, -1)
    return vectorize_layer(text), label

train_ds=raw_train_ds.map(vectorize_text)
val_ds=raw_val_ds.map(vectorize_text)
test_ds=raw_test_ds.map(vectorize_text)

In [13]:
AUTOTUNE=tf.data.experimental.AUTOTUNE
train_ds=train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds=val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds=test_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [14]:
model = tf.keras.Sequential([
  layers.Embedding(max_features + 1, embedding_dim),
  layers.Dropout(0.2),
  layers.GlobalAveragePooling1D(),
  layers.Dropout(0.2),
  layers.Dense(4)])

In [15]:
model.compile(
    loss=losses.SparseCategoricalCrossentropy(from_logits=True), 
    optimizer='adam', 
    metrics=['accuracy'])

In [None]:
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5