In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical

In [None]:
data = pd.read_csv('/content/news_articles.csv', encoding='latin1')

In [None]:
data.drop(columns=['Date', 'Article'], inplace=True)

In [None]:
data.head()

Unnamed: 0,Heading,NewsType
0,sindh govt decides to cut public transport far...,business
1,asia stocks up in new year trad,business
2,hong kong stocks open 0.66 percent lower,business
3,asian stocks sink euro near nine year,business
4,us oil prices slip below 50 a barr,business


In [None]:
# Preprocess the data
X = data['Heading']
y = data['NewsType']


In [None]:
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_categorical,
test_size=0.2, random_state=42)

In [None]:
max_words = 10000
max_length = 200


In [None]:
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)


In [None]:
X_train_pad = pad_sequences(X_train_seq, maxlen=max_length)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_length)


In [None]:
model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=128,
input_length=max_length))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(64))
model.add(Dropout(0.5))
model.add(Dense(len(label_encoder.classes_), activation='softmax'))



In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam',
metrics=['accuracy'])

In [None]:
model.fit(X_train_pad, y_train, batch_size=32, epochs=5,
validation_split=0.2)


Epoch 1/5
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 568ms/step - accuracy: 0.5699 - loss: 0.6630 - val_accuracy: 0.9095 - val_loss: 0.3250
Epoch 2/5
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 597ms/step - accuracy: 0.9706 - loss: 0.1317 - val_accuracy: 0.9420 - val_loss: 0.1546
Epoch 3/5
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 566ms/step - accuracy: 0.9999 - loss: 0.0048 - val_accuracy: 0.9397 - val_loss: 0.1502
Epoch 4/5
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 571ms/step - accuracy: 1.0000 - loss: 0.0013 - val_accuracy: 0.9374 - val_loss: 0.1911
Epoch 5/5
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 608ms/step - accuracy: 1.0000 - loss: 2.4964e-04 - val_accuracy: 0.9513 - val_loss: 0.1883


<keras.src.callbacks.history.History at 0x7cbaff69c340>

In [None]:
loss, accuracy = model.evaluate(X_test_pad, y_test)
print(f'Test Accuracy: {accuracy:.2f}')


[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 135ms/step - accuracy: 0.9635 - loss: 0.1467
Test Accuracy: 0.97
