In [1]:
import pandas as pd
import numpy as np
# import matplotlib.pyplot as plt
import seaborn as sns
import nltk
import re
from nltk.corpus import stopwords
# nltk.download('stopwords')
from nltk.stem.porter import PorterStemmer
import tensorflow as tf
from tensorflow import keras
import os
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import one_hot


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
df = pd.read_csv("./datasets/train.csv")

split = 0.8
df = df.sample(frac=1).dropna()
train = df[:int(0.8*df.shape[0])]
test = df[int(0.8*df.shape[0]):]

x_train, y_train = train["title"], train["label"]
x_test, y_test = test["title"], test["label"]

x_train_enc = [one_hot(words,10000) for words in x_train]
x_test_enc = [one_hot(words,10000) for words in x_test]

sent_length=10
x_train_enc_padded = pad_sequences(x_train_enc,padding='pre',maxlen=sent_length)

x_test_enc_padded = pad_sequences(x_test_enc,padding='pre',maxlen=sent_length)

In [3]:
x_train_enc_padded.shape

(14628, 10)

In [4]:
def create_model():
    model = tf.keras.Sequential([
    keras.layers.Dense(512, activation = "relu", input_shape=(10, )),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(10)
    ])
    model.compile(optimizer = "adam", loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
    return model

In [5]:
model = create_model()

In [6]:
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 512)               5632      
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 10)                5130      
                                                                 
Total params: 10762 (42.04 KB)
Trainable params: 10762 (42.04 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [7]:
checkpoint_path = "training_1/checkpoints"
checkpoint_dir = os.path.dirname(checkpoint_path)
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, verbose=1)


In [8]:
model.fit(x_train_enc_padded, y_train, epochs = 20, validation_data=(x_test_enc_padded, y_test), callbacks=[cp_callback])


Epoch 1/20
Epoch 1: saving model to training_1/checkpoints
Epoch 2/20
Epoch 2: saving model to training_1/checkpoints
Epoch 3/20
Epoch 3: saving model to training_1/checkpoints
Epoch 4/20
Epoch 4: saving model to training_1/checkpoints
Epoch 5/20
Epoch 5: saving model to training_1/checkpoints
Epoch 6/20
Epoch 6: saving model to training_1/checkpoints
Epoch 7/20
Epoch 7: saving model to training_1/checkpoints
Epoch 8/20
Epoch 8: saving model to training_1/checkpoints
Epoch 9/20
Epoch 9: saving model to training_1/checkpoints
Epoch 10/20
Epoch 10: saving model to training_1/checkpoints
Epoch 11/20
Epoch 11: saving model to training_1/checkpoints
Epoch 12/20
Epoch 12: saving model to training_1/checkpoints
Epoch 13/20
Epoch 13: saving model to training_1/checkpoints
Epoch 14/20
Epoch 14: saving model to training_1/checkpoints
Epoch 15/20
Epoch 15: saving model to training_1/checkpoints
Epoch 16/20
Epoch 16: saving model to training_1/checkpoints
Epoch 17/20
Epoch 17: saving model to trai

<keras.src.callbacks.History at 0x281755ba0>

In [9]:
loss, acc = model.evaluate(x_test_enc_padded,y_test, verbose=2)
print(f"Untrained model, accuracy: {100*acc:5.2f}%")

115/115 - 0s - loss: 0.4393 - sparse_categorical_accuracy: 0.8048 - 75ms/epoch - 653us/step
Untrained model, accuracy: 80.48%


In [10]:
os.listdir(checkpoint_dir)

['checkpoints.data-00000-of-00001', 'checkpoint', 'checkpoints.index']

In [11]:
model_loaded = create_model()

In [12]:
loss_loaded, acc_loaded = model_loaded.evaluate(x_test_enc_padded, y_test)
print(f"Untrained model, accuracy: {100*acc_loaded:5.2f}%")


Untrained model, accuracy:  0.41%


In [13]:
model_loaded.load_weights(checkpoint_path)

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x282a6a020>

In [14]:
loss_loaded_trained, acc_loaded_trained = model_loaded.evaluate(x_test_enc_padded, y_test)
print(f"Trained model, accuracy: {100*acc_loaded_trained:5.2f}%")

Trained model, accuracy: 80.48%
