<a href="https://colab.research.google.com/github/brainy-data/fake_news_detection/blob/main/deployment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Set-up

In [70]:
#Library required
%%capture
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import pandas as pd
import pickle
import re

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('words')
wordnet_lemmatizer = WordNetLemmatizer()

In [71]:
#Import trained model from google drive
!gdown --id 12v6MzTMFmQrEg85pSN2tHCZBpX8JY67h
!gdown --id 1Ux1ecLbHBZZK132ZYY3nWnI1YdUAKFWc

Downloading...
From: https://drive.google.com/uc?id=12v6MzTMFmQrEg85pSN2tHCZBpX8JY67h
To: /content/fake_lgb_model.pickle
100% 218k/218k [00:00<00:00, 32.5MB/s]
Downloading...
From: https://drive.google.com/uc?id=1Ux1ecLbHBZZK132ZYY3nWnI1YdUAKFWc
To: /content/fake_tfidf.pickle
101MB [00:00, 163MB/s] 


In [72]:
#load the model trained above
loaded_tfidf = pickle.load(open("fake_tfidf.pickle", "rb"))
loaded_lgb_model = pickle.load(open("fake_lgb_model.pickle", "rb"))

In [73]:
#Function for text pre-processing
def get_cleaned_data(input_data, mode='df'):
    stop = stopwords.words('english')
    input_df = ''
    if mode != 'df':
        input_df = pd.DataFrame([input_data], columns=['text'])
    else:
        input_df = input_data       
    #lowercase the text
    input_df['text'] = input_df['text'].str.lower()    
    #remove special characters
    input_df['text'] = input_df['text'].apply(lambda elem: re.sub(r"(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?", "", elem))    
    # remove numbers
    input_df['text'] = input_df['text'].apply(lambda elem: re.sub(r"\d+", "", elem))    
    #remove stopwords
    input_df['text'] = input_df['text'].apply(lambda x: ' '.join([word.strip() for word in x.split() if word not in (stop)]))
    input_df['text'] = input_df['text'].apply(lambda words: (wordnet_lemmatizer.lemmatize(words)))
    return input_df

In [74]:
# define function for prediction
def predict(text):
  review_text = text
  cleaned_text= get_cleaned_data(review_text,mode="non-df")
  new_test = loaded_tfidf.transform(cleaned_text['text'])
  pred_prob = loaded_lgb_model.predict_proba(new_test)
  if pred_prob[0][1] >=0.5:
    print('{:.2%} that it is fake'.format(pred_prob[0][1]))
  else:
    print('{:.2%} that it is real'.format(pred_prob[0][0]))

# Examples

In [75]:
# fake_news_testing (from random sentence generator)
# may need to remove ' or " at first to avoid bug
# Source: https://edition.cnn.com/2021/04/20/health/blood-clots-experts-covid-vaccine/index.html
text = ['Her scream silenced the rowdy teenagers. Traveling became almost extinct during the pandemic. She lived on Monkey Jungle Road and that seemed to explain all of her strangeness. Facing his greatest fear, he ate his first marshmallow. Sometimes, all you need to do is completely make an ass of yourself and laugh it off to realise that life isn’t so bad after all. They say that dogs are mans best friend, but this cat was setting out to sabotage that theory. Today is the day I will finally know what brick tastes like. They were excited to see their first sloth.']
predict(text)

94.52% that it is fake


In [76]:
# real_nbews_testing
# may need to remove ' or " at first to avoid bug
# Source: https://edition.cnn.com/2021/04/20/health/blood-clots-experts-covid-vaccine/index.html
text = ['It was just about a year ago that doctors started noticing Covid-19 patients showing up in emergency rooms with strokes, and complained that blood clots were clogging up dialysis machines and other equipment being used to keep coronavirus patients alive.Frantic intensive care unit specialists reported "dramatic" blood clots in the heart, liver and other organs. Autopsies of coronavirus victims in New Orleans showed their lungs were jammed with clots. Some young, seemingly healthy patients were suffering massive strokes from Covid-19. As a blood clot expert, I can tell you its the most blood-clotting disease we have ever seen in our lifetimes," said Dr. Alex Spyropoulos, a professor at the Feinstein Institutes for Medical Research in New York." I have been doing this for a quarter century. I have never seen these levels of blood clots."']
predict(text)

75.50% that it is real


In [77]:
# Run all cells (Ctrl+F9) and then call the function predict with your text / article
# paste your article into variable text
# predict("your_text")

In [None]:
text = [""]
predict(text)