# Multiclass classification

In [1]:
from tensorflow.keras.datasets import reuters
(train_data, train_labels), (test_data, test_labels) = reuters.load_data(
 num_words=10000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz
[1m2110848/2110848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [2]:
from tensorflow import keras
from tensorflow.keras import layers

In [3]:
len(train_data),len(test_data)

(8982, 2246)

In [4]:
train_labels.shape

(8982,)

In [5]:
train_data[1]

[1,
 3267,
 699,
 3434,
 2295,
 56,
 2,
 7511,
 9,
 56,
 3906,
 1073,
 81,
 5,
 1198,
 57,
 366,
 737,
 132,
 20,
 4093,
 7,
 2,
 49,
 2295,
 2,
 1037,
 3267,
 699,
 3434,
 8,
 7,
 10,
 241,
 16,
 855,
 129,
 231,
 783,
 5,
 4,
 587,
 2295,
 2,
 2,
 775,
 7,
 48,
 34,
 191,
 44,
 35,
 1795,
 505,
 17,
 12]

In [6]:
import numpy as np
def dataset_info(data, labels):
    print("Dataset Information:")
    print("-" * 40)
    print(f"Number of samples: {len(data)}")
    print(f"Number of unique labels: {len(np.unique(labels))}")
    print(f"First sample (decoded): {data[0]}")
    print(f"First label: {labels[0]}")
    print(f"Max sequence length: {max(len(sequence) for sequence in data)}")
    print(f"Min sequence length: {min(len(sequence) for sequence in data)}")
    print(f"Average sequence length: {np.mean([len(sequence) for sequence in data]):.2f}")
    print("-" * 40)

# Display training dataset info
print("Training Data Info:")
dataset_info(train_data, train_labels)

# Display test dataset info
print("Test Data Info:")
dataset_info(test_data, test_labels)

Training Data Info:
Dataset Information:
----------------------------------------
Number of samples: 8982
Number of unique labels: 46
First sample (decoded): [1, 2, 2, 8, 43, 10, 447, 5, 25, 207, 270, 5, 3095, 111, 16, 369, 186, 90, 67, 7, 89, 5, 19, 102, 6, 19, 124, 15, 90, 67, 84, 22, 482, 26, 7, 48, 4, 49, 8, 864, 39, 209, 154, 6, 151, 6, 83, 11, 15, 22, 155, 11, 15, 7, 48, 9, 4579, 1005, 504, 6, 258, 6, 272, 11, 15, 22, 134, 44, 11, 15, 16, 8, 197, 1245, 90, 67, 52, 29, 209, 30, 32, 132, 6, 109, 15, 17, 12]
First label: 3
Max sequence length: 2376
Min sequence length: 13
Average sequence length: 145.54
----------------------------------------
Test Data Info:
Dataset Information:
----------------------------------------
Number of samples: 2246
Number of unique labels: 46
First sample (decoded): [1, 4, 1378, 2025, 9, 697, 4622, 111, 8, 25, 109, 29, 3650, 11, 150, 244, 364, 33, 30, 30, 1398, 333, 6, 2, 159, 9, 1084, 363, 13, 2, 71, 9, 2, 71, 117, 4, 225, 78, 206, 10, 9, 1214, 8, 4, 27

In [7]:
#Decoding
word_index = reuters.get_word_index()
reverse_word_index = dict(
 [(value, key) for (key, value) in word_index.items()])
decoded_newswire = " ".join(
 [reverse_word_index.get(i - 3, "?") for i in train_data[5]])

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters_word_index.json
[1m550378/550378[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [8]:
decoded_newswire

"? the u s agriculture department estimated canada's 1986 87 wheat crop at 31 85 mln tonnes vs 31 85 mln tonnes last month it estimated 1985 86 output at 24 25 mln tonnes vs 24 25 mln last month canadian 1986 87 coarse grain production is projected at 27 62 mln tonnes vs 27 62 mln tonnes last month production in 1985 86 is estimated at 24 95 mln tonnes vs 24 95 mln last month canadian wheat exports in 1986 87 are forecast at 19 00 mln tonnes vs 18 00 mln tonnes last month exports in 1985 86 are estimated at 17 71 mln tonnes vs 17 72 mln last month reuter 3"

In [9]:
# turli xil lenga ega shuning uchun uni encoding qilish kerak
import numpy as np

def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))  # Initialize a matrix of zeros

    # Loop over each sequence
    for i, sequence in enumerate(sequences):
        # Set the corresponding indices to 1
        for j in sequence:
            results[i, j] = 1.
    return results




In [10]:
x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)

In [11]:
import numpy as np

def to_one_hot(labels, dimension=46):
    # Create a matrix of shape (num_samples, dimension) initialized to zeros
    results = np.zeros((len(labels), dimension))

    for i, label in enumerate(labels):
        results[i, label] = 1.  # Set the corresponding label index to 1

    return results

# Assuming `train_labels` and `test_labels` are the labels for training and test sets
y_train = to_one_hot(train_labels)  # Convert training labels to one-hot
y_test = to_one_hot(test_labels)    # Convert test labels to one-hot


In [12]:
from tensorflow.keras.utils import to_categorical
y_train = to_categorical(train_labels)
y_test = to_categorical(test_labels)

In [13]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
model = keras.Sequential([
 layers.Dense(64, activation="relu"),
 layers.Dense(64, activation="relu"),
 layers.Dense(46, activation="softmax")
])

In [14]:
model

<Sequential name=sequential, built=False>

In [15]:
model.compile(optimizer="rmsprop",
 loss="categorical_crossentropy",
 metrics=["accuracy"])

In [16]:
x_val = x_train[:1000]
partial_x_train = x_train[1000:]
y_val = y_train[:1000]
partial_y_train = y_train[1000:]

In [18]:
history = model.fit(partial_x_train,
partial_y_train,
epochs=10,
batch_size=512,
validation_data=(x_val, y_val))

Epoch 1/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 54ms/step - accuracy: 0.8508 - loss: 0.6730 - val_accuracy: 0.7970 - val_loss: 0.9606
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 83ms/step - accuracy: 0.8818 - loss: 0.5498 - val_accuracy: 0.8090 - val_loss: 0.9159
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 79ms/step - accuracy: 0.8957 - loss: 0.4806 - val_accuracy: 0.7990 - val_loss: 0.8987
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 64ms/step - accuracy: 0.9133 - loss: 0.4087 - val_accuracy: 0.8120 - val_loss: 0.8821
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step - accuracy: 0.9204 - loss: 0.3461 - val_accuracy: 0.8200 - val_loss: 0.8629
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step - accuracy: 0.9326 - loss: 0.3000 - val_accuracy: 0.8060 - val_loss: 0.8799
Epoch 7/10
[1m16/16[0m [32m━━━━

In [19]:
model = keras.Sequential([
    layers.Dense(64, activation="relu"),
    layers.Dense(64, activation="relu"),
    layers.Dense(46, activation="softmax")  # 46 classes for multi-class classification
])

# Compile the model
model.compile(optimizer="rmsprop",
              loss="categorical_crossentropy",  # Use categorical crossentropy for multi-class
              metrics=["accuracy"])

# Train the model
model.fit(x_train, y_train, epochs=9, batch_size=512)

# Evaluate the model on the test set
results = model.evaluate(x_test, y_test)
print("Test loss, Test accuracy:", results)

Epoch 1/9
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 41ms/step - accuracy: 0.3769 - loss: 3.0257
Epoch 2/9
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 44ms/step - accuracy: 0.6860 - loss: 1.5133
Epoch 3/9
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - accuracy: 0.7344 - loss: 1.1922
Epoch 4/9
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 62ms/step - accuracy: 0.7989 - loss: 0.9270
Epoch 5/9
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 72ms/step - accuracy: 0.8342 - loss: 0.7689
Epoch 6/9
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 42ms/step - accuracy: 0.8641 - loss: 0.6390
Epoch 7/9
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - accuracy: 0.8873 - loss: 0.5281
Epoch 8/9
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step - accuracy: 0.9132 - loss: 0.4244
Epoch 9/9
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

In [28]:
import copy
test_labels_copy = copy.copy(test_labels)
np.random.shuffle(test_labels_copy)
hits_array = np.array(test_labels) == np.array(test_labels_copy)
hits_array.mean()


0.1923419412288513