In [None]:
import tensorflow as tf

# Check if GPU is available and configured properly
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print(tf.test.is_gpu_available())

# Check the name of the GPU device
print(tf.test.gpu_device_name())

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


Num GPUs Available:  1
True
/device:GPU:0


In [None]:
!pip install transformers

In [None]:
import numpy as np
import tensorflow as tf
from transformers import DistilBertTokenizer, TFDistilBertForSequenceClassification
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, LSTM, Dense, Dropout
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import pandas as pd

In [None]:
def load_ag_news_dataset():
    train_url = "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/train.csv"
    test_url = "https://raw.githubusercontent.com/mhjabreel/CharCnn_Keras/master/data/ag_news_csv/test.csv"

    train_df = pd.read_csv(train_url, header=None, names=["label", "title", "description"])
    test_df = pd.read_csv(test_url, header=None, names=["label", "title", "description"])

    train_df["text"] = train_df["title"] + " " + train_df["description"]
    test_df["text"] = test_df["title"] + " " + test_df["description"]

    train_df["label"] = train_df["label"] - 1
    test_df["label"] = test_df["label"] - 1

    return train_df, test_df

In [None]:
def preprocess_data():
    num_words = 10000
    oov_token = '<OOV>'
    max_length = 200
    padding_type = 'post'
    truncating_type = 'post'

    train_df, test_df = load_ag_news_dataset()

    train_texts, train_labels = train_df["text"].tolist(), train_df["label"].tolist()
    test_texts, test_labels = test_df["text"].tolist(), test_df["label"].tolist()

    tokenizer = Tokenizer(num_words=num_words, oov_token=oov_token)
    tokenizer.fit_on_texts(train_texts)

    train_sequences = tokenizer.texts_to_sequences(train_texts)
    train_padded = pad_sequences(train_sequences, maxlen=max_length, padding=padding_type, truncating=truncating_type)
    test_sequences = tokenizer.texts_to_sequences(test_texts)
    test_padded = pad_sequences(test_sequences, maxlen=max_length, padding=padding_type, truncating=truncating_type)

    train_labels = np.array(train_labels)
    test_labels = np.array(test_labels)

    return train_padded, train_labels, test_padded, test_labels

In [None]:
def create_cnn_lstm_model():
    model = Sequential([
        Embedding(10000, 128, input_length=200),
        Conv1D(64, 5, activation='relu'),
        MaxPooling1D(pool_size=4),
        Dropout(0.2),  # Add dropout here instead of in LSTM
        LSTM(64, dropout=0.0, recurrent_dropout=0.0),  # Remove dropout from LSTM
        Dense(32, activation='relu'),
        Dropout(0.5),
        Dense(4, activation='softmax')
    ])

    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [None]:
from tensorflow.keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten, Dense, Dropout

def create_cnn_model():
    model = Sequential([
        Embedding(10000, 128, input_length=200),
        Conv1D(64, 5, activation='relu'),
        MaxPooling1D(pool_size=4),
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(4, activation='softmax')
    ])

    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [None]:
train_padded, train_labels, test_padded, test_labels = preprocess_data()

X_train, X_val, y_train, y_val = train_test_split(train_padded, train_labels, test_size=0.1, random_state=42)

In [None]:
# Train CNN-LSTM model
cnn_lstm_model = create_cnn_lstm_model()
cnn_lstm_model.fit(X_train, y_train, batch_size=32, epochs=5, validation_data=(X_val, y_val), verbose=1)

In [None]:
# Train CNN model
cnn_model = create_cnn_model()
cnn_model.fit(X_train, y_train, batch_size=32, epochs=5, validation_data=(X_val, y_val), verbose=1)

In [None]:
# Evaluate models on test set
cnn_lstm_preds = cnn_lstm_model.predict(test_padded)
cnn_preds = cnn_model.predict(X_val[..., np.newaxis])

cnn_lstm_acc = accuracy_score(test_labels, np.argmax(cnn_lstm_preds, axis=1))
cnn_acc = accuracy_score(y_val, np.argmax(cnn_preds, axis=1))

cnn_lstm_f1 = f1_score(test_labels, np.argmax(cnn_lstm_preds, axis=1), average='macro')
cnn_f1 = f1_score(y_val, np.argmax(cnn_preds, axis=1), average='macro')

cnn_lstm_prec = precision_score(test_labels, np.argmax(cnn_lstm_preds, axis=1), average='macro')
cnn_prec = precision_score(y_val, np.argmax(cnn_preds, axis=1), average='macro')

cnn_lstm_rec = recall_score(test_labels, np.argmax(cnn_lstm_preds, axis=1), average='macro')
cnn_rec = recall_score(y_val, np.argmax(cnn_preds, axis=1), average='macro')

# Create a table to compare the models
data = {'Model': ['CNN-LSTM', 'CNN'],
        'Accuracy': [cnn_lstm_acc, cnn_acc],
        'F1 Score': [cnn_lstm_f1, cnn_f1],
        'Precision': [cnn_lstm_prec, cnn_prec],
        'Recall': [cnn_lstm_rec, cnn_rec]}

df = pd.DataFrame(data)
print(df)



      Model  Accuracy  F1 Score  Precision    Recall
0  CNN-LSTM  0.910526  0.910131   0.910221  0.910526
1       CNN  0.910250  0.910038   0.910107  0.910017
