In [48]:
import numpy as np
import pandas as pd 
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow import keras
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
from tensorflow.keras import models,layers

In [49]:
data = pd.read_csv("bbc-text.csv")

In [50]:
data.head()

Unnamed: 0,category,text
0,tech,tv future in the hands of viewers with home th...
1,business,worldcom boss left books alone former worldc...
2,sport,tigers wary of farrell gamble leicester say ...
3,sport,yeading face newcastle in fa cup premiership s...
4,entertainment,ocean s twelve raids box office ocean s twelve...


In [51]:
data['category'].value_counts()

sport            511
business         510
politics         417
tech             401
entertainment    386
Name: category, dtype: int64

In [52]:
X=data['text']
y=data['category']

In [53]:
X_train, X_test, y_train_cat, y_test_cat = train_test_split(X, y, test_size=0.20, random_state=10)
#Train test split

In [54]:
X_train.reset_index(drop=True, inplace=True)
X_test.reset_index(drop=True, inplace=True)
y_train_cat.reset_index(drop=True, inplace=True)
y_test_cat.reset_index(drop=True, inplace=True)

In [55]:
max_words = 1000
tokenize = keras.preprocessing.text.Tokenizer(num_words=max_words,char_level=False)

In [56]:
tokenize.fit_on_texts(X_train) # fit tokenizer to our training text data
x_train = tokenize.texts_to_matrix(X_train)
x_test = tokenize.texts_to_matrix(X_test)

In [57]:
x_train[0].shape

(1000,)

In [58]:
encoder = LabelEncoder()
encoder.fit(y_train_cat)
y_train = encoder.transform(y_train_cat)
y_test = encoder.transform(y_test_cat)

In [59]:
# Converts the labels to a one-hot representation
num_classes = np.max(y_train) + 1
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [60]:
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
print('y_train shape:', y_train.shape)
print('y_test shape:', y_test.shape)

x_train shape: (1780, 1000)
x_test shape: (445, 1000)
y_train shape: (1780, 5)
y_test shape: (445, 5)


In [61]:
model = models.Sequential()
model.add(layers.Dense(1024,input_shape=(max_words,)))
model.add(layers.Dense(212,activation='relu')) #Use for relu in our model is that there isn't any negative dependacy in our dataset
model.add(layers.Activation('relu'))
model.add(layers.Dense(num_classes,activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [62]:
batch_size = 32
epochs = 2
drop_ratio = 0.5

In [63]:
history = model.fit(x_train, y_train,batch_size=batch_size,epochs=epochs,verbose=1,validation_split=0.1)

Train on 1602 samples, validate on 178 samples
Epoch 1/2
Epoch 2/2


In [64]:
score = model.evaluate(x_test, y_test,batch_size=batch_size, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.20125451650512352
Test accuracy: 0.94382024


In [65]:
text_labels = encoder.classes_

for i in range(10):
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    print("----------------")
    prediction = model.predict(np.array([x_test[i]]))
    predicted_label = text_labels[np.argmax(prediction)]
    print(X_test[i][:50], "...")
    print('Actual label:' + y_test_cat[i])
    print("Predicted label: " + predicted_label + "\n") 

Test loss: 0.20125451650512352
Test accuracy: 0.94382024
----------------
oscar nominees lack pulling power this year s clut ...
Actual label:entertainment
Predicted label: entertainment

Test loss: 0.20125451650512352
Test accuracy: 0.94382024
----------------
wi-fi web reaches farmers in peru a network of com ...
Actual label:tech
Predicted label: tech

Test loss: 0.20125451650512352
Test accuracy: 0.94382024
----------------
stevens named in england line-up england have name ...
Actual label:sport
Predicted label: sport

Test loss: 0.20125451650512352
Test accuracy: 0.94382024
----------------
ireland win eclipses refereeing  errors  the inter ...
Actual label:sport
Predicted label: sport

Test loss: 0.20125451650512352
Test accuracy: 0.94382024
----------------
melzer shocks agassi in san jose second seed andre ...
Actual label:sport
Predicted label: sport

Test loss: 0.20125451650512352
Test accuracy: 0.94382024
----------------
us in eu tariff chaos trade row the us has asked t .

# Testing 

In [93]:
text_inp = input("Enter News")
def predict_news(text_inp):
    max_words = 1000
    tokenize = keras.preprocessing.text.Tokenizer(num_words=max_words,char_level=False)
    X_train_inp=pd.Series(text_inp)
    tokenize.fit_on_texts(X_train) # fit tokenizer to our training text data
    x_train_man = tokenize.texts_to_matrix(X_train_inp)
    prediction = model.predict(np.array([x_train_man[0]]))
    predicted_label = text_labels[np.argmax(prediction)]
    return predicted_label
predict_news(text_inp)

Enter NewsAnd that behaviour seems ... let me find the word ... wrong. Yes, wrong is the word. Wrong. Utterly objectionable. Objectionable works, because sensible prime ministers know that in times of national emergency, you show. You don’t tell, at least not with a crude bit of agitprop on the Liberal party website and pumped out on platforms. You show. Show up, first of all, and you show leadership by actually leading. Not by telling people you are leading. Otherwise you are not a leader. You are an internet meme. Dear Mr Morrison. You are the prime minister, not the Liberal party spruiker. We really do need you to understand that difference.  PM tries to present himself as a leader reborn – but you just can’t take the marketing out of Scotty David Marr David Marr  Read more Now of course governments need to tell citizens what they are doing, and perhaps the argument for doing this increases when there has been a rapid recalibration from the top like the one we’ve seen over the past 

'politics'