In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.metrics import classification_report

In [2]:

from sklearn.datasets import fetch_20newsgroups

documents_train = fetch_20newsgroups(
    subset = "train",
    categories = ["comp.graphics", "comp.windows.x", "rec.autos", "sci.space"]
)

documents_test = fetch_20newsgroups(
    subset = "test",
    categories = ["comp.graphics", "comp.windows.x", "rec.autos", "sci.space"]
)

print(documents_train.data[0])
print(documents_train.target[0])
print(documents_train.target_names)


From: orourke@sophia.smith.edu (Joseph O'Rourke)
Subject: Re: Delaunay Triangulation
Organization: Smith College, Northampton, MA, US
Lines: 22

In article <lsk1v9INN93c@caspian.usc.edu> zyeh@caspian.usc.edu (zhenghao yeh) writes:
>
>Does anybody know what Delaunay Triangulation is?
>Is there any reference to it? 
>Is it useful for creating 3-D objects? If yes, what's the advantage?

There is a vast literature on Delaunay triangulations, literally
hundreds of papers.  A program is even provided with every copy of 
Mathematica nowadays.  You might look at this if you are interested in 
using it for creating 3D objects:

@article{Boissonnat5,
  author = "J.D. Boissonnat",
  title = "Geometric Structures for Three-Dimensional Shape Representation",
  journal = "ACM Transactions on Graphics",
  month = "October",
  year = {1984},
  volume = {3},
  number = {4},
  pages = {266-286}
}


0
['comp.graphics', 'comp.windows.x', 'rec.autos', 'sci.space']


In [3]:
from sklearn.feature_extraction.text import CountVectorizer

vectorizer = CountVectorizer()
vectorizer.fit(documents_train.data)

X_train = vectorizer.transform(documents_train.data)
X_test = vectorizer.transform(documents_test.data)


In [4]:
# turning one-hot encoder
text_vectorizer = layers.TextVectorization(

    max_tokens = 2000,
    output_mode = "int",
    output_sequence_length = 20 # 20 word bags
)

text_vectorizer.adapt(documents_train.data)

In [5]:
X_train = text_vectorizer(documents_train.data)
X_test = text_vectorizer(documents_test.data)


In [6]:
print(X_train.shape)

(2364, 20)


In [7]:
X_train = tf.one_hot(X_train, depth = 2000)
X_test = tf.one_hot(X_test, depth = 2000)
X_train.shape

TensorShape([2364, 20, 2000])

In [9]:
mdl = keras.Sequential()
mdl.add(layers.LSTM(128, input_shape = X_train.shape[1:]))
mdl.add(layers.Dense(128, activation = "relu"))
mdl.add(layers.Dense(4, activation = "softmax"))

  super().__init__(**kwargs)


In [18]:
mdl.compile(optimizer = "adam", loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])
mdl.fit(X_train, documents_train.target, epochs = 5)

Epoch 1/5
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 43ms/step - accuracy: 0.9846 - loss: 0.0489
Epoch 2/5
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 41ms/step - accuracy: 0.9809 - loss: 0.0580
Epoch 3/5
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 42ms/step - accuracy: 0.9932 - loss: 0.0225
Epoch 4/5
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 42ms/step - accuracy: 0.9958 - loss: 0.0130
Epoch 5/5
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 43ms/step - accuracy: 0.9969 - loss: 0.0087


<keras.src.callbacks.history.History at 0x1a1526dfa10>

In [19]:
mdl.evaluate(X_test, documents_test.target)

[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.7783 - loss: 1.7205


[1.6270205974578857, 0.7846251726150513]

## embedded vector technic

sparse matris ->> dense matris

In [26]:
text_vectorizer = layers.TextVectorization(

    max_tokens = 10000,
    output_mode = "int",
    output_sequence_length = 200 # 200 word bags
)

text_vectorizer.adapt(documents_train.data)
X_train = text_vectorizer(documents_train.data)
X_test = text_vectorizer(documents_test.data)
print(X_train.shape)

(2364, 200)


In [31]:
mdl = keras.Sequential()


mdl.add(layers.Embedding(input_dim = 10000, output_dim = 128)) # converting small matrix with embedded technic

mdl.add(layers.LSTM(128))

mdl.add(layers.Dense(128, activation = "relu"))
mdl.add(layers.Dense(4, activation = "softmax"))
mdl.compile(optimizer = "adam", loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])
mdl.fit(X_train, documents_train.target, epochs = 20)

Epoch 1/20
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 130ms/step - accuracy: 0.2892 - loss: 1.3823
Epoch 2/20
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 124ms/step - accuracy: 0.3690 - loss: 1.3633
Epoch 3/20
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 133ms/step - accuracy: 0.3858 - loss: 1.2532
Epoch 4/20
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 132ms/step - accuracy: 0.4495 - loss: 1.0865
Epoch 5/20
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 127ms/step - accuracy: 0.5067 - loss: 0.9973
Epoch 6/20
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 132ms/step - accuracy: 0.5417 - loss: 0.9505
Epoch 7/20
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 130ms/step - accuracy: 0.5852 - loss: 0.8641
Epoch 8/20
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 133ms/step - accuracy: 0.6649 - loss: 0.6258
Epoch 9/20
[1m74/74[0m [32m━━━━

<keras.src.callbacks.history.History at 0x1a152c23860>

In [32]:
mdl.evaluate(X_test, documents_test.target)

[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 39ms/step - accuracy: 0.7666 - loss: 1.0921


[1.0445990562438965, 0.7750952839851379]

In [33]:
print(classification_report(documents_test.target, np.argmax(y_pred, axis = 1)))

              precision    recall  f1-score   support

           0       0.73      0.69      0.71       389
           1       0.73      0.78      0.76       395
           2       0.79      0.81      0.80       396
           3       0.88      0.84      0.85       394

    accuracy                           0.78      1574
   macro avg       0.78      0.78      0.78      1574
weighted avg       0.78      0.78      0.78      1574

