In [6]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import cross_val_score
import pickle

In [7]:
train_database = pd.read_csv('train.csv')
#print(train_database)
label_convert = {0: "True", 1: "False"}
train_database['label'] = train_database['label'].replace(label_convert)
#print(train_database)
#print(train_database.label.value_counts())

In [8]:
input_train, input_test, output_train, output_test = train_test_split(train_database['text'], train_database['label'], test_size = 0.25, random_state = 7, shuffle = True)
vectorized_text = TfidfVectorizer(stop_words="english", max_df = 0.75)

In [9]:
vectorTrain = vectorized_text.fit_transform(input_train.values.astype('U'))
vectorTest = vectorized_text.transform(input_test.values.astype('U'))

In [10]:
PasAggrClass = PassiveAggressiveClassifier(max_iter=100)
PasAggrClass.fit(vectorTrain, output_train)

PassiveAggressiveClassifier(max_iter=100)

In [11]:
output_predict = PasAggrClass.predict(vectorTest)
model_score = accuracy_score(output_test, output_predict)
#print(f'Accuracy of the model: {round(model_score*100, 2)}%')
model_score_percentage = round(model_score*100, 2)

In [12]:
train_database_real = pd.read_csv('True.csv')
train_database_real['label'] = 'True'
train_database_swap=[train_database_real['text'][i].replace('WASHINGTON (Reuters) - ','').replace('LONDON (Reuters) - ','').replace('(Reuters) - ','') for i in range(len(train_database_real['text']))]
train_database_real['text'] = train_database_swap
train_database_fake = pd.read_csv('Fake.csv')
train_database_fake['label'] = 'False'
train_database_final = pd.concat([train_database_real, train_database_fake]) 
train_database_final = train_database_final.drop(['subject', 'date'], axis = 1)

In [13]:
print("Paster your article here: ")
userText = input()

Paster your article here: 
"WORLD’S MOST POPULAR Search Engine Places FICTIONAL Story At Top Of News Feed:”Melania Trump is Lonely, Racist and Obsessed With Michelle Obama”","Google is the search engine the majority of people in the world go if they need to look up something. In fact, it is estimated that over 30 BILLION searches are performed on Google every month. Google now has a special feature for mobile users. When you go to search for a topic on Google, a news feed appears under the search bar, with what users expect to be the most popular current stories. But are they the most popular stories, or are they simply Google s contribution to the leftist propaganda machine? With all of the news breaking about Hillary s Uranium One deal with Russia, surely readers wouldn t expect to find some fictional story about Melania being a  racist ,  as the top story on Google s mobile newsfeed or would they?    Here s the story that appeared at the top of Google s news feed today:Newsweek: Mel

In [14]:
def searchForInterest(userText):
    userText_slpit = userText.split()
    politics_arr = ["protest", "President", "officials", "Department", "House", "Parliament", "laws", "against", "politics", "police"]
    sports_arr = ["match", "event", "game", "soccer", "football", "cricket", "tennis", "badminton", "olympics", "sport"]
    educ_arr = ["school", "college", "university", "tuition", "study", "homework", "education", "rights", "teaching", "training", "tutors", "knowledge", "skill", "science", "math", "geography", "history", "civics", "subjects"]
    auto_arr = ["car", "truck", "bike", "two-wheeler", "four-wheeler", "launch", "compact", "SUV", "transportation"]
    covid_arr = ["pandemic", "covid", "coronavirus", "disease", "cases", "vaccine", "vaccination", "spread", "social", "distance", "COVID", "WHO"]
    mili_arr = ["military", "force", "armed", "gun", "war", "kill", "dead", "alive", "injured"]
    UN_arr = ["UN", "United", "nations", "organization", "General", "Assembly", "Secretariat"]
    if any(topic in userText for topic in politics_arr):
        print("It seems your are interested in Politics. Have a look at some of these sites: ")
    elif any(topic in userText for topic in sports_arr):
        print("It seems your are interested in Sports. Have a look at some of these sites: ")
    elif any(topic in userText for topic in educ_arr):
        print("It seems your are interested in Education. Have a look at some of these sites: ")
    elif any(topic in userText for topic in auto_arr):
        print("It seems your are interested in Automobile. Have a look at some of these sites: ")
    elif any(topic in userText for topic in covid_arr):
        print("It seems your are interested in Covid_19. Have a look at some of these sites: ")
    elif any(topic in userText for topic in mili_arr):
        print("It seems your are interested in Military. Have a look at some of these sites: ")
    elif any(topic in userText for topic in UN_arr):  
        print("It seems your are interested in UN. Have a look at some of these sites: ")
    else:
        print("Sorry, we couldn't find any sites which you can view.")
    
    

In [15]:
def searchForLabels(userText):
    print(userText)
    vectorUserText = vectorized_text.transform([userText])
    output_prediction_user = PasAggrClass.predict(vectorUserText)
    searchForInterest(userText)
    return output_prediction_user[0]

In [16]:
searchForLabels(userText)

"WORLD’S MOST POPULAR Search Engine Places FICTIONAL Story At Top Of News Feed:”Melania Trump is Lonely, Racist and Obsessed With Michelle Obama”","Google is the search engine the majority of people in the world go if they need to look up something. In fact, it is estimated that over 30 BILLION searches are performed on Google every month. Google now has a special feature for mobile users. When you go to search for a topic on Google, a news feed appears under the search bar, with what users expect to be the most popular current stories. But are they the most popular stories, or are they simply Google s contribution to the leftist propaganda machine? With all of the news breaking about Hillary s Uranium One deal with Russia, surely readers wouldn t expect to find some fictional story about Melania being a  racist ,  as the top story on Google s mobile newsfeed or would they?    Here s the story that appeared at the top of Google s news feed today:Newsweek: Melania Trump is lonely and ob

'False'

In [25]:
file = "mymodel.pkl"
fileobj = open(file, 'wb')
pickle.dump(PasAggrClass, fileobj)
fileobj.close()
#file = 

In [26]:
loaded_model = pickle.load(open(file, 'rb'))
model_score = accuracy_score(output_test, output_predict)
print(model_score)

0.9626923076923077


In [30]:
score = loaded_model.score(vectorTest, output_test)  
# Print the Score
print("Test score: {0:.2f} %".format(100 * score))  

Test score: 96.27 %
