In [3]:
import pandas as pd
import gradio as gr
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import re
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import whisper


In [None]:
# Set the column width to view the text message data.
pd.set_option('max_colwidth', 200)

In [None]:
# Restore models using pickle

# Restore the trained PassiveAggressiveClassifer model
pac_model = pickle.load(open('Resources/pa_classfier.pkl', 'rb'))

# Restore the TD-IDF vectorizer
tfid_vectorizer = pickle.load(open('Resources/tfid_vectorizer.pkl', 'rb'))

print("pac_model and tfid_vectorizer restored")

In [None]:
# Define function to clean articles by convering all text to lower case, removing unnecessary punctuation,
# removing numbers, stopwords, tokenizing, and Lemmatizing the data. 
def clean_text(text):

    # Convert text to lowercase
    text = text.lower()

    # Remove URLs
    text = re.sub(r'https?://\S+|www\.\S+', '', text)

    # Remove HTML tags
    text = re.sub(r'<.*?>', '', text)

    # Remove punctuation
    text = re.sub(r'[^\w\s]', '', text)

    # Remove numbers
    text = re.sub(r'\d+', '', text)

    # Tokenize the text
    words = word_tokenize(text)

    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words]
    
    # Initialize the WordNet lemmatizer
    lemmatizer = WordNetLemmatizer()
    
    # Lemmatize the words
    words = [lemmatizer.lemmatize(word) for word in words]

    # Join the words back into a single string
    cleaned_text = ' '.join(words)
    
    return cleaned_text


In [None]:
# Test clean_text function to confirm that it works as expected 
#test_text = "This is an example sentence with some numbers like 123, and special characters !@#$, as well as URLs https://www.example.com"
#cleaned_text = clean_text(test_text)
#print(cleaned_text)

In [None]:
# Define function to predict article whether a text string is real or fake using TfidfVectorizer and
# PassiveAggressiveClassifier 
def article_prediction(text):

    # Clean text string 
    text_clean = clean_text(text)
    
    # Convert text string to list for the Vectorizer 
    text_clean = [text_clean]
        
    # Vectorize text string 
    text_tfid = tfid_vectorizer.transform(text_clean)
    
    # Predict Real or Fake 
    text_prediction = pac_model.predict(text_tfid)[0]
    
    if text_prediction == 1:
        return 'Real'
    else:
        return 'Fake'


In [None]:
# Test article_prediction function with Real article to confirm that it works as expected 
#pred_test_text = "DUBAI (Reuters) - The United Arab Emirates on Sunday denied a report that \
#Yemen s Houthi group had fired a missile toward a nuclear plant in the UAE, state news agency \
#WAM reported on its Twitter account. It quoted the UAE s emergency and crisis management department \
#as saying the UAE possessed a missile defense system that could deal with any such threats and \
#adding the al-Barakah nuclear plant was secure against all eventualities."

#pred_test = article_prediction(pred_test_text)
#print(pred_test)

In [None]:
# Create function that measures the sentiment of a sentence using SentimentIntensityAnalyzer
def get_sentiment_rating(sentence):
    
    # Create a SentimentIntensityAnalyzer object
    vader_sentiment = SentimentIntensityAnalyzer()

    # The polarity_scores method of SentimentIntensityAnalyzer returns a sentiment dictionary
    # that contains positive, negative, neutral, and compound scores.
    vader_sentiment_dict = vader_sentiment.polarity_scores(sentence)
     
    #print(f'The overall sentiment dictionary is: {vader_sentiment_dict}')
    #print(f'sentence sentiment is rated {vader_sentiment_dict["neg"]*100}% Negative')
    #print(f'sentence sentiment is rated {vader_sentiment_dict["neu"]*100}% Neutral')
    #print(f'sentence sentiment is rated {vader_sentiment_dict["pos"]*100}% Positive')

    # Determine if sentiment is positive, negative or neutral
    if vader_sentiment_dict['compound'] >= 0.05 :
        sentiment_rating = 'Positive'
    elif vader_sentiment_dict['compound'] <= - 0.05 :
        sentiment_rating = 'Negative'
    else :
        sentiment_rating = 'Neutral'
    
    #print(f'Sentence Overall is rated {sentiment_rating}')
    return sentiment_rating


In [None]:
# Test get_sentiment_rating function
#test_sentence = "Google is a great place to search for answers when you don't have any!"
#test_result = get_sentiment_rating(test_sentence)
#test_result

In [None]:
# Create a function that transcribes audio text using Whisper 

def transcribe_text(audio_rec):
    # Create Whisper model
    whisper_model = whisper.load_model('base')
    
    # Transcribe audio recording wav file to text using Whisper
    audio_rec_path = f'Resources/{audio_rec}'
    speech_text = whisper_model.transcribe(audio_rec_path)
    return speech_text['text']


In [None]:
# Test transcribe_text function
#test_text_wisp = transcribe_text('fake_article_audio.wav')
#test_text_wisp

In [None]:
# Create a function that determines the sentiment rating and predicts whether a text string is real or fake. 

def article_sentiment_prediction(sent_text, audio_file, article_text):
    
    # Analyze Sentiment for input text    
    if sent_text:
        # Get sentiment rating
        text_sentiment = get_sentiment_rating(sent_text)
        
        # Create a variable that will hold the prediction of a new text
        text_prediction = ""
        return text_sentiment, text_prediction, sent_text
    
    # Analyze Sentiment and Predict Real or Fake for audio file input
    if audio_file:
        # Transcribe audio file
        audio_text = transcribe_text(audio_file)
        
        # Get sentiment rating
        text_sentiment = get_sentiment_rating(audio_text)
        
        # Create a variable that will hold the prediction of a new text
        text_prediction = article_prediction(audio_text)
        return text_sentiment, text_prediction, audio_text
    
    # Analyze Sentiment and Predict Real or Fake for article input
    if article_text:
        # Get sentiment rating
        text_sentiment = get_sentiment_rating(article_text)
        
        # Create a variable that will hold the prediction of a new text
        text_prediction = article_prediction(article_text)
        return text_sentiment, text_prediction, article_text


In [None]:
# Test #1 article_sentiment_prediction function - Sentiment Rating Only
#test_sentence = "Google is a great place to search for answers when you don't have any!"
#empty_string = ""
#ret_sent, ret_rating, ret_article = article_sentiment_prediction(test_sentence, empty_string, empty_string)
#print(ret_sent)
#print(ret_rating)
#print(ret_article)

In [None]:
# Test #2 article_sentiment_prediction function - audio file
#empty_string = ""
#audio_test = "fake_article_audio.wav"
#ret_sent, ret_rating, ret_article = article_sentiment_prediction(empty_string, audio_test, empty_string)
#print(ret_sent)
#print(ret_rating)
#print(ret_article)

In [None]:
# Test #3 article_sentiment_prediction function - news article
#empty_string = ""
#test_article = "DUBAI (Reuters) - The United Arab Emirates on Sunday denied a report that \
#Yemen s Houthi group had fired a missile toward a nuclear plant in the UAE, state news agency \
#WAM reported on its Twitter account. It quoted the UAE s emergency and crisis management department \
#as saying the UAE possessed a missile defense system that could deal with any such threats and \
#adding the al-Barakah nuclear plant was secure against all eventualities."
#ret_sent, ret_rating, ret_article = article_sentiment_prediction(empty_string, empty_string, test_article)
#print(ret_sent)
#print(ret_rating)
#print(ret_article)

In [None]:
# Create Gradio interface that captures text input or audio files for Sentiment and Real-Fake Article Analysis 
app = gr.Interface(
    fn=article_sentiment_prediction,
    title="Fake News Detector and Sentiment Analyzer",
    inputs = [
        gr.Textbox(label="Enter Text for Sentiment Rating Only."),
        gr.Textbox(label="Enter Audio filename for Real-Fake Analysis and Sentiment Rating."), 
        gr.Textbox(label="Enter Text for Real-Fake Analysis and Sentiment Rating.")],
    outputs = [
        gr.Textbox(label="Sentiment Rating.", show_copy_button=True),
        gr.Textbox(label="Real or Fake Analysis Prediction.", show_copy_button=True),
        gr.Textbox(lines=10, label="Analyzed Text.", show_copy_button=True)])
    
# Launch the app.
app.launch(share=True)

In [None]:
#Close gradio application when done testing
app.close()