<a href="https://colab.research.google.com/github/gonzalezjulvez/Projects/blob/main/Formacion/Clasificacion_de_texto_Stackoverflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Importamos librerias

import matplotlib.pyplot as plt
import os
import re
import shutil
import string
import tensorflow as tf

from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras import preprocessing
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

## Obtenemos los datos


In [2]:
url = 'http://storage.googleapis.com/download.tensorflow.org/data/stack_overflow_16k.tar.gz'

datasets = tf.keras.utils.get_file('stack_overflow_16k',url, untar=True, cache_dir='.', cache_subdir='')

Downloading data from http://storage.googleapis.com/download.tensorflow.org/data/stack_overflow_16k.tar.gz


In [4]:
batch_size=32
seed=42
raw_train_ds = tf.keras.preprocessing.text_dataset_from_directory('train', batch_size=32, validation_split=0.2, subset='training', seed=seed)
raw_val_ds = tf.keras.preprocessing.text_dataset_from_directory('train', batch_size=32, validation_split=0.2, subset='validation', seed=seed)
raw_test_ds = tf.keras.preprocessing.text_dataset_from_directory('test', batch_size=32)

Found 8000 files belonging to 4 classes.
Using 6400 files for training.
Found 8000 files belonging to 4 classes.
Using 1600 files for validation.
Found 8000 files belonging to 4 classes.


In [5]:
print(raw_train_ds.class_names)

['csharp', 'java', 'javascript', 'python']


In [6]:
for text_batch, label_batch in raw_train_ds.take(1):
  for i in range(3):
    print(f"Review {text_batch.numpy()[i]}")
    print(f"Label {label_batch.numpy()[i]}")

Review b'"my tester is going to the wrong constructor i am new to programming so if i ask a question that can be easily fixed, please forgive me. my program has a tester class with a main. when i send that to my regularpolygon class, it sends it to the wrong constructor. i have two constructors. 1 without perameters..public regularpolygon().    {.       mynumsides = 5;.       mysidelength = 30;.    }//end default constructor...and my second, with perameters. ..public regularpolygon(int numsides, double sidelength).    {.        mynumsides = numsides;.        mysidelength = sidelength;.    }// end constructor...in my tester class i have these two lines:..regularpolygon shape = new regularpolygon(numsides, sidelength);.        shape.menu();...numsides and sidelength were declared and initialized earlier in the testing class...so what i want to happen, is the tester class sends numsides and sidelength to the second constructor and use it in that class. but it only uses the default constru

## Prepare data for training

In [8]:
max_features = 10000
sequence_length = 250
embedding_dim = 128

vectorize_layer = TextVectorization(
    max_tokens=max_features,
    output_mode='int',
    output_sequence_length= sequence_length   
)
text_ds = raw_train_ds.map(lambda x,y :x)
vectorize_layer.adapt(text_ds)

In [10]:
# Vectorizar los datos

def vectorizer_text(texts, label):
  text = tf.expand_dims(texts, -1)
  return vectorize_layer(text), label

train_ds = raw_train_ds.map(vectorizer_text)
val_ds = raw_val_ds.map(vectorizer_text)
test_ds = raw_test_ds.map(vectorizer_text)

In [11]:
# configure datasers to performance

AUTOTUNE = tf.data.experimental.AUTOTUNE

train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

## Construimos el modelo

In [12]:
# Construimos el modelo
model = tf.keras.Sequential([
                             layers.Embedding(input_dim=max_features+1, output_dim=embedding_dim),
                             layers.Dropout(0.2),
                             layers.GlobalAveragePooling1D(),
                             layers.Dropout(0.2),
                             layers.Dense(4)])

model.compile(loss=losses.SparseCategoricalCrossentropy(from_logits=True),
              optimizer='adam',
              metrics = ['accuracy'])

In [13]:
# Entrenamos el modelo

history = model.fit(train_ds, epochs=10, validation_data=val_ds)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [14]:
# Evaluamos el modelo

model.evaluate(test_ds)



[0.594524621963501, 0.7881249785423279]

In [19]:
for text_batch, label_batch in raw_test_ds.take(1):
  for i in range(3):
    print(f"Review {text_batch.numpy()[i]}")
    print(f"Label {label_batch.numpy()[i]}")

Review b'"send screen output to /dev/null in blank i am returning a variable from a function in blank...when i try to retrieve the variable in another function, it prints out the print statements of the original function for a second time...here is my code:..user_name = \'my_user\'.kms_cleint = \'client_info\'.aws_account = \'company-account\'.def create_kms_key(user_name, kms_client):   .    print(""****************************************************************"").    print(""         create kms key for %s                                   "" % user_name).    print(""****************************************************************"").    kms_key_id = 5.    return kms_key_id..def store_secret(user_name, kms_client, secrets_client, aws_account):.    print(""****************************************************************"").    print(""         store secret for %s for aws account: %s                "" % (user_name, aws_account)).    print(""********************************************

In [22]:
export_model = tf.keras.Sequential([
                                    vectorize_layer,
                                    model,
                                    layers.Activation('sigmoid')
])

In [23]:
export_model.compile(loss=losses.SparseCategoricalCrossentropy(from_logits=True),
                     optimizer='adam',
                     metrics =['accuracy'])

In [24]:
loss, accuracy = export_model.evaluate(raw_test_ds)
print(accuracy)

0.7881249785423279
