In [51]:
from sklearn.datasets import fetch_20newsgroups
data = fetch_20newsgroups()

In [52]:
data.target_names

['alt.atheism',
 'comp.graphics',
 'comp.os.ms-windows.misc',
 'comp.sys.ibm.pc.hardware',
 'comp.sys.mac.hardware',
 'comp.windows.x',
 'misc.forsale',
 'rec.autos',
 'rec.motorcycles',
 'rec.sport.baseball',
 'rec.sport.hockey',
 'sci.crypt',
 'sci.electronics',
 'sci.med',
 'sci.space',
 'soc.religion.christian',
 'talk.politics.guns',
 'talk.politics.mideast',
 'talk.politics.misc',
 'talk.religion.misc']

In [53]:
categories = ['talk.politics.misc',
 'soc.religion.christian',
 'sci.space',
 'comp.graphics']

In [54]:
train = fetch_20newsgroups(subset='train' , categories=categories)
test = fetch_20newsgroups(subset='test' , categories=categories)

print(train.data[8])

print(train.target[8]) # indicates class 0 which is alphabetically sorted hence comp.graphics


From: renggli@masg1.epfl.ch (loris renggli)
Subject: Need graph display/edit
Organization: Math. Dept., Swiss Institute of Technology
Lines: 17

I am looking for a program that is capable of displaying a graph
with nodes and links and with the possibility to edit interactively
the graph : add one node, change one link etc...

Actually, a very _simple_ X11 program would be ok; all I need is to
put some "boxes" (i.e. the nodes ) on a pane and be able to
manipulate them with the mouse (move, add or delete boxes).

Does anyone know if such program is available ?
Thanks for any help !!

------------------------------------------------------------------------
Loris RENGGLI                          phone  : +41-21-6934230
Swiss Federal Institute of Technology  fax    : +41-21-6934303
Math. Dept
CH-1015 Lausanne (Switzerland)         e-mail : renggli@masg1.epfl.ch


0


In [55]:
# convert to numeric data using TfidfVectorizer ( as it has stop words in built)
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline

model = make_pipeline(TfidfVectorizer(), MultinomialNB())


In [56]:
# Train model using fit()
model.fit(train.data, train.target)

In [57]:
# predict test data
label = model.predict(test.data)

In [58]:
# check results
from sklearn.metrics import classification_report
print(classification_report(test.target, label))

              precision    recall  f1-score   support

           0       0.97      0.88      0.93       389
           1       0.92      0.92      0.92       394
           2       0.71      0.98      0.82       398
           3       0.99      0.60      0.75       310

    accuracy                           0.86      1491
   macro avg       0.90      0.85      0.86      1491
weighted avg       0.89      0.86      0.86      1491



In [61]:
# define function to predict category

def predict_category(s, train=train, model=model):
    pred=model.predict([s])
    return train.target_names[pred[0]]


In [62]:
predict_category('NASA launched a stallite')

'sci.space'

In [65]:
predict_category('Jesus is great')

'soc.religion.christian'

In [66]:
predict_category('Graph of a line')

'comp.graphics'

In [67]:
predict_category('Sending a payload to ISS')

'sci.space'

In [68]:
predict_category('Jai Shree Ram') # misclassification , as this is not predicted correctly

'comp.graphics'