In [32]:
import tensorflow as tf
import os
from tensorflow.keras import layers, losses, preprocessing
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

In [2]:
dataset_dir = '/home/fabian/aistudy/tfstudy/ml-basics-with-keras/data/so_response'

In [3]:
os.listdir(dataset_dir)

['train', 'test']

In [4]:
train_dir = os.path.join(dataset_dir, 'train')
os.listdir(train_dir)

['python', 'java', 'javascript', 'csharp']

In [5]:
test_dir = os.path.join(dataset_dir, 'test')
os.listdir(test_dir)

['python', 'java', 'javascript', 'csharp']

In [8]:
raw_train_ds = tf.keras.preprocessing.text_dataset_from_directory(
    '/home/fabian/aistudy/tfstudy/ml-basics-with-keras/data/so_response/train',
    batch_size = 32,
    validation_split = 0.2,
    subset = 'training',
    seed = 42
)

Found 8000 files belonging to 4 classes.
Using 6400 files for training.


In [9]:
batch_size = 32

In [11]:
raw_train_ds = tf.keras.preprocessing.text_dataset_from_directory(
    train_dir, batch_size = batch_size, validation_split = 0.2, subset = 'training', seed = 42
)

Found 8000 files belonging to 4 classes.
Using 6400 files for training.


In [12]:
raw_val_ds = tf.keras.preprocessing.text_dataset_from_directory(
    train_dir, batch_size = batch_size, validation_split = 0.2, subset = 'validation', seed = 42
)

Found 8000 files belonging to 4 classes.
Using 1600 files for validation.


In [13]:
raw_test_ds = tf.keras.preprocessing.text_dataset_from_directory(
    test_dir, batch_size = batch_size
)

Found 8000 files belonging to 4 classes.


In [14]:
print(raw_train_ds.class_names)

['csharp', 'java', 'javascript', 'python']


In [15]:
for text_batch, label_batch in raw_train_ds.take(1):
    for i in range(5):
        print(text_batch.numpy()[i])
        print(label_batch.numpy()[i])
        print()

b'"my tester is going to the wrong constructor i am new to programming so if i ask a question that can be easily fixed, please forgive me. my program has a tester class with a main. when i send that to my regularpolygon class, it sends it to the wrong constructor. i have two constructors. 1 without perameters..public regularpolygon().    {.       mynumsides = 5;.       mysidelength = 30;.    }//end default constructor...and my second, with perameters. ..public regularpolygon(int numsides, double sidelength).    {.        mynumsides = numsides;.        mysidelength = sidelength;.    }// end constructor...in my tester class i have these two lines:..regularpolygon shape = new regularpolygon(numsides, sidelength);.        shape.menu();...numsides and sidelength were declared and initialized earlier in the testing class...so what i want to happen, is the tester class sends numsides and sidelength to the second constructor and use it in that class. but it only uses the default constructor, w

## Vectorization: 
The process of converting an algorithm from operating a single value at a time --> set of values at one time

In [16]:
max_features = 5000
embedding_dim = 128
sequence_length = 500

In [19]:
vectorize_layer = TextVectorization(
    max_tokens = max_features,
    output_mode = 'int',
    output_sequence_length = sequence_length
)

In [20]:
text_ds = raw_train_ds.map(lambda x, y: x)
vectorize_layer.adapt(text_ds)

In [25]:
def vectorize_text(text, label):
    text = tf.expand_dims(text, -1)
    return vectorize_layer(text), label

In [26]:
train_ds = raw_train_ds.map(vectorize_text)
val_ds = raw_val_ds.map(vectorize_text)
test_ds = raw_test_ds.map(vectorize_text)

In [27]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [29]:
train_ds = train_ds.cache().prefetch(buffer_size = AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size = AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size = AUTOTUNE)

In [33]:
model = tf.keras.Sequential([
    layers.Embedding(max_features+1, embedding_dim),
    layers.Dropout(0.2),
    layers.GlobalAveragePooling1D(),
    layers.Dropout(0.2),
    layers.Dense(4)
])

`Dropout`: Ignoring units (i.e. neurons) during the training phase of certain set of neurons which is chosen at random.

In [34]:
model.compile(
    loss = losses.SparseCategoricalCrossentropy(from_logits = True),
    optimizer = 'adam',
    metrics = ['accuracy']
)

In [35]:
history = model.fit(
    train_ds,
    validation_data = val_ds,
    epochs = 5
)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [36]:
loss, accuracy = model.evaluate(test_ds)

print("Loss: ", loss)
print("Accuracy: ", accuracy)

Loss:  0.9777625203132629
Accuracy:  0.7098749876022339
