#text classification dataset

In [1]:
from tensorflow.keras.datasets import reuters
from tensorflow.keras import models, layers
import numpy as np

# 1. Carga el dataset Reuters (clasifica noticias en 46 categorías)
(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10000)

# 2. Vectorización one-hot de las secuencias
def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, seq in enumerate(sequences):
        results[i, seq] = 1.
    return results

x_train = vectorize_sequences(train_data)
x_test  = vectorize_sequences(test_data)

# 3. One-hot de las etiquetas
def to_one_hot(labels, dimension=46):
    results = np.zeros((len(labels), dimension))
    for i, lab in enumerate(labels):
        results[i, lab] = 1.
    return results

y_train = to_one_hot(train_labels)
y_test  = to_one_hot(test_labels)

# 4. Construye un modelo simple de clasificación
model = models.Sequential([
    layers.Dense(64, activation='relu', input_shape=(10000,)),
    layers.Dense(64, activation='relu'),
    layers.Dense(46, activation='softmax')
])

model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# 5. Entrena
history = model.fit(x_train, y_train,
                    epochs=20,
                    batch_size=512,
                    validation_split=0.2)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz
[1m2110848/2110848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 76ms/step - accuracy: 0.3325 - loss: 3.1983 - val_accuracy: 0.6149 - val_loss: 1.9524
Epoch 2/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step - accuracy: 0.6514 - loss: 1.7352 - val_accuracy: 0.6244 - val_loss: 1.5130
Epoch 3/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 57ms/step - accuracy: 0.7146 - loss: 1.2691 - val_accuracy: 0.7145 - val_loss: 1.2959
Epoch 4/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 78ms/step - accuracy: 0.7637 - loss: 1.0717 - val_accuracy: 0.7101 - val_loss: 1.2128
Epoch 5/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 88ms/step - accuracy: 0.7998 - loss: 0.9023 - val_accuracy: 0.7412 - val_loss: 1.1094
Epoch 6/20
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 51ms/step - accuracy: 0.8361 - loss: 0.7612 - val_accuracy: 0.7813 - val_loss: 1.0233
Epoch 7/20
[1m15/15[0m [32m━━━━

In [2]:
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix

# 1. Evalúa loss y accuracy en el conjunto de prueba
test_loss, test_acc = model.evaluate(x_test, y_test, batch_size=512, verbose=1)
print(f"Test loss: {test_loss:.4f}")
print(f"Test accuracy: {test_acc:.4f}")

# 2. Obtén las predicciones (clase con mayor probabilidad)
y_prob = model.predict(x_test, batch_size=512, verbose=1)   # shape (num_samples, 46)
y_pred = np.argmax(y_prob, axis=1)
y_true = np.argmax(y_test, axis=1)

# 3. Reporte de precisión, recall y F1-score por categoría
print("\nClassification Report:")
print(classification_report(y_true, y_pred, digits=4))


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.7728 - loss: 1.1139
Test loss: 1.1548
Test accuracy: 0.7676
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step

Classification Report:
              precision    recall  f1-score   support

           0     0.9000    0.7500    0.8182        12
           1     0.6538    0.8095    0.7234       105
           2     0.7143    0.5000    0.5882        20
           3     0.9322    0.9139    0.9230       813
           4     0.7253    0.9135    0.8086       474
           5     0.0000    0.0000    0.0000         5
           6     0.8182    0.6429    0.7200        14
           7     1.0000    0.3333    0.5000         3
           8     0.7917    0.5000    0.6129        38
           9     0.8500    0.6800    0.7556        25
          10     0.9200    0.7667    0.8364        30
          11     0.7143    0.6627    0.6875        83
          12     1.0000    0.1538    0.2667        13
 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [3]:
# Supongamos que este es tu modelo ya entrenado
model.save("Text_classification.h5")



In [4]:
from google.colab import files
files.download("Text_classification.h5")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>