In [None]:
!pip install -q -U tensorflow
!pip install -q -U tensorflow-text

In [None]:
import collections
import pathlib
import re
import string

import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras import preprocessing
from tensorflow.keras import utils
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

import tensorflow_datasets as tfds
import tensorflow_text as tf_text

In [None]:
data_url = 'https://storage.googleapis.com/download.tensorflow.org/data/stack_overflow_16k.tar.gz'
dataset = utils.get_file(
    'stack_overflow_16k.tar.gz',
    data_url,
    untar=True,
    cache_dir='stack_overflow',
    cache_subdir='')
dataset_dir = pathlib.Path(dataset).parent

In [None]:
list(dataset_dir.iterdir())

[PosixPath('/tmp/.keras/test'),
 PosixPath('/tmp/.keras/README.md'),
 PosixPath('/tmp/.keras/train'),
 PosixPath('/tmp/.keras/stack_overflow_16k.tar.gz.tar.gz')]

In [None]:
train_dir = dataset_dir/'train'

In [None]:
list(train_dir.iterdir())


[PosixPath('/tmp/.keras/train/javascript'),
 PosixPath('/tmp/.keras/train/java'),
 PosixPath('/tmp/.keras/train/python'),
 PosixPath('/tmp/.keras/train/csharp')]

In [None]:
sample_file = train_dir/'python/1725.txt'
with open(sample_file) as f:
  print(f.read())

django import i couldnt create main file into my project file c:userssushant dhore&gt;cd c:userssushant dhoredesktopmyside..c:userssushant dhoredesktopmyside&gt; py -3.7.3 manage.py startapp main.unable to create process using 'c:userssushant dhoreappdatalocalprogramsblankblank37blank.exe -3.7.3 manage.py startapp main'



In [None]:
batch_size = 32
seed = 42
raw_train_ds = preprocessing.text_dataset_from_directory(
    train_dir,
    batch_size = batch_size,
    seed = seed,
    validation_split = 0.2,
    subset = 'training'
)

Found 8000 files belonging to 4 classes.
Using 6400 files for training.


In [None]:
for text_batch, label_batch in raw_train_ds.take(1):
  for i in range(10):
    print('Question:', text_batch.numpy()[i] )
    print('Label: ', label_batch.numpy()[i])

Question: b'"my tester is going to the wrong constructor i am new to programming so if i ask a question that can be easily fixed, please forgive me. my program has a tester class with a main. when i send that to my regularpolygon class, it sends it to the wrong constructor. i have two constructors. 1 without perameters..public regularpolygon().    {.       mynumsides = 5;.       mysidelength = 30;.    }//end default constructor...and my second, with perameters. ..public regularpolygon(int numsides, double sidelength).    {.        mynumsides = numsides;.        mysidelength = sidelength;.    }// end constructor...in my tester class i have these two lines:..regularpolygon shape = new regularpolygon(numsides, sidelength);.        shape.menu();...numsides and sidelength were declared and initialized earlier in the testing class...so what i want to happen, is the tester class sends numsides and sidelength to the second constructor and use it in that class. but it only uses the default cons

In [None]:
label_batch

<tf.Tensor: shape=(32,), dtype=int32, numpy=
array([1, 3, 1, 0, 1, 0, 0, 3, 0, 1, 0, 0, 3, 0, 1, 3, 3, 2, 3, 3, 1, 3,
       3, 3, 3, 2, 1, 0, 0, 2, 3, 3], dtype=int32)>

In [None]:
for i, label in enumerate(raw_train_ds.class_names):
  print("Label", i, "corresponds to", label)

Label 0 corresponds to csharp
Label 1 corresponds to java
Label 2 corresponds to javascript
Label 3 corresponds to python


In [None]:
raw_val_ds = preprocessing.text_dataset_from_directory(
    train_dir,
    validation_split = 0.2,
    batch_size = batch_size,
    subset = 'validation',
    seed = seed
)

Found 8000 files belonging to 4 classes.
Using 1600 files for validation.


In [None]:
test_dir = dataset_dir/'test'

In [None]:
raw_test_ds = preprocessing.text_dataset_from_directory(
    test_dir,
    batch_size = batch_size
)

Found 8000 files belonging to 4 classes.


In [None]:
VOCAB_SIZE = 10000

binary_vectorize_layer = TextVectorization(
    max_tokens = VOCAB_SIZE,
    output_mode = 'binary'
)

In [None]:
MAX_SEQUENCE_LENGTH = 250
int_vectorize_layer = TextVectorization(
    max_tokens = VOCAB_SIZE,
    output_mode = 'int',
    output_sequence_length = MAX_SEQUENCE_LENGTH
)

In [None]:
train_text = raw_train_ds.map(lambda text, labels:text)
binary_vectorize_layer.adapt(train_text)
int_vectorization_layer.adapt(train_text)

In [None]:
def binary_vectorize_text(text, label):
  text = tf.expand_dims(text, -1)
  return binary_vectorize_layer(text), label

In [None]:

def int_vectorize_text(text, label):
  text = tf.expand_dims(text, -1)
  return int_vectorize_layer(text), label

In [None]:
text_batch, label_batch = next(iter(raw_train_ds))
first_question, first_label = text_batch[2], label_batch[2]
print('Question: ', first_question )
print('Label: ', first_label )

Question:  tf.Tensor(b'"exception of type \'system.outofmemoryexception\' was thrown while reading data from text file i m trying to read a text file data with ""|"" separated, i m using below code. i m able to read the data from text file after reading when trying to put datatable data into dataview i m getting exception of type \'system.outofmemoryexception\' was thrown, ..can any one suggest me how to avoid this exception...string filepath = system.configuration.configurationmanager.appsettings[""data""];  ..if (filepath != """").            {.                datatable dt = new datatable(""file"");.                string[] columns1 = null;.                var lines = file.readalllines(filepath);.                int count = lines.length;.                //here taking columns and adding to table.                 if (lines.count() &gt; 0).                {.                    columns1 = lines[0].split(new char[] { \'|\' });.                    foreach (var column in columns1).         

In [None]:
print("'binary' vectorized question:", 
      binary_vectorize_text(first_question, first_label)[0])

'binary' vectorized question: tf.Tensor([[1. 1. 1. ... 0. 0. 0.]], shape=(1, 10000), dtype=float32)


In [None]:
print("'int' vectorized question:", 
      int_vectorize_text(first_question, first_label)[0])

'int' vectorized question: tf.Tensor(
[[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
  1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
  1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
  1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
  1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
  1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]], shape=(1, 250), dtype=int64)


In [None]:
print("1289 ---> ", int_vectorize_layer.get_vocabulary()[128])
print("313 ---> ", int_vectorize_layer.get_vocabulary()[31])
print("Vocabulary size: {}".format(len(int_vectorize_layer.get_vocabulary())))

IndexError: ignored

In [None]:
binary_train_ds = raw_train_ds.map(binary_vectorize_text)
binary_val_ds = raw_val_ds.map(binary_vectorize_text)
binary_test_ds = raw_test_ds.map(binary_vectorize_text)

int_train_ds = raw_train_ds.map(int_vectorize_text)
int_val_ds = raw_val_ds.map(int_vectorize_text)
int_test_ds = raw_test_ds.map(int_vectorize_text)

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

def configure_dataset(dataset):
  return dataset.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
binary_train_ds = configure_dataset(binary_train_ds)
binary_val_ds = configure_dataset(binary_val_ds)
binary_test_ds = configure_dataset(binary_test_ds)

int_train_ds = configure_dataset(int_train_ds)
int_val_ds = configure_dataset(int_val_ds)
int_test_ds = configure_dataset(int_test_ds)

In [None]:
binary_model = tf.keras.Sequential([layers.Dense(4)])
binary_model.compile( 
    loss = losses.SparseCategoricalCrossentropy(from_logits= True),
    optimizer = 'adam',
    metrics = ['accuracy']
)
history = binary_model.fit(
    binary_train_ds, validation_data = binary_val_ds, epochs = 10
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
def create_model(vocab_size, num_labels):
  model = tf.keras.Sequential([
                               layers.Embedding(vocab_size, 64, mask_zero = True ),
                               layers.Conv1D( 64, 5, padding='valid', activation = 'relu', strides = 2),
                               layers.GlobalMaxPooling1D(),
                               layers.Dense(num_labels)
  ])
  return model

In [None]:
int_model = create_model(vocab_size = VOCAB_SIZE+1, num_labels = 4)
int_model.compile(
    loss = losses.SparseCategoricalCrossentropy(from_logits= True),
    optimizer = 'adam',
    metrics = ['accuracy']
)

In [None]:
history = int_model.fit(int_train_ds, validation_data = int_val_ds, epochs = 5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
print(binary_model.summary())

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 4)                 40004     
Total params: 40,004
Trainable params: 40,004
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
print(int_model.summary())

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, None, 64)          640064    
_________________________________________________________________
conv1d (Conv1D)              (None, None, 64)          20544     
_________________________________________________________________
global_max_pooling1d (Global (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 4)                 260       
Total params: 660,868
Trainable params: 660,868
Non-trainable params: 0
_________________________________________________________________
None
