In [33]:
def news_title_sentiment(string_buzzword):
    
    # import necessary libraries:
    import pprint
    import requests
    import pandas as pd
    
    # api from news api website: 
    secret_api = '4506abb974f8420e997c5cfa21863b17'
    
    url = 'https://newsapi.org/v2/everything?'
    
    ## get 100 articles from past 30 days: 
    parameters = {
        'q': string_buzzword, # query phrase
        'pageSize': 100,  # maximum is 100
        'apiKey': secret_api # your own API key
    }    
    
    # Make the request
    response = requests.get(url, params=parameters)

    # Convert the response to JSON format and pretty print it
    response_json = response.json()
    #pprint.pprint(response_json)
    
    # extract the titles:
    testDataSet = []
    for i in response_json['articles']:
        testDataSet.append({'text':i['title'], 'label':None})
    
    # get corpus training data:
    trainingData_copied = pd.read_csv("tweetDataFile.csv", header = None, names = ['tweet_id', 'text', 'label', 'topic'])

    # rebalance training data: 
    df = trainingData_copied.copy()
    lst_labels = df['label'].unique()
    count_rows_keep = df['label'].value_counts().min()

    neutral_df = df[df['label'] == 'neutral'].sample(n= count_rows_keep , random_state = 2)
    irrelevant_df = df[df['label'] == 'irrelevant'].sample(n= count_rows_keep , random_state = 1)
    negative_df = df[df['label'] == 'negative'].sample(n= count_rows_keep , random_state = 3)
    positive_df = df[df['label'] == 'positive'].sample(n= count_rows_keep , random_state = 3)

    lst_df = [neutral_df, irrelevant_df, negative_df, positive_df]

    trainingData_copied = pd.concat(lst_df)
    trainingData_copied['label'].value_counts()

    trainingData_copied = trainingData_copied.to_dict('records')

    import re #a library that makes parsing strings and modifying them more efficient
    from nltk.tokenize import word_tokenize
    from string import punctuation 
    from nltk.corpus import stopwords 
    import nltk #Natural Processing Toolkit that takes care of any processing that we need to perform on text 
                #to change its form or extract certain components from it.

    #nltk.download('popular') #We need this if certain nltk libraries are not installed. 

    class PreProcessTweets:
        def __init__(self):
            self._stopwords = set(stopwords.words('english') + list(punctuation) + ['AT_USER','URL'])

        def processTweets(self, list_of_tweets):
            processedTweets=[]
            for tweet in list_of_tweets:
                processedTweets.append((self._processTweet(tweet["text"]),tweet["label"]))
            return processedTweets

        def _processTweet(self, tweet):
            tweet = tweet.lower() # convert text to lower-case
            tweet = re.sub('((www\.[^\s]+)|(https?://[^\s]+))', 'URL', tweet) # remove URLs
            tweet = re.sub('@[^\s]+', 'AT_USER', tweet) # remove usernames
            tweet = re.sub(r'#([^\s]+)', r'\1', tweet) # remove the # in #hashtag
            tweet = word_tokenize(tweet) # remove repeated characters (helloooooooo into hello)
            return [word for word in tweet if word not in self._stopwords]

    tweetProcessor = PreProcessTweets()
    preprocessedTrainingSet = tweetProcessor.processTweets(trainingData_copied)
    preprocessedTestSet = tweetProcessor.processTweets(testDataSet)
    
    # Classify test data using Naive Bayes Classifier:
    
    def buildVocabulary(preprocessedTrainingData):
        all_words = []

        for (words, sentiment) in preprocessedTrainingData:
            all_words.extend(words)

        wordlist = nltk.FreqDist(all_words)
        word_features = wordlist.keys()

        return word_features

    def extract_features(tweet):
        tweet_words = set(tweet)
        features = {}
        for word in word_features:
            features['contains(%s)' % word] = (word in tweet_words)
        return features 

    word_features = buildVocabulary(preprocessedTrainingSet)
    trainingFeatures = nltk.classify.apply_features(extract_features, preprocessedTrainingSet)

    NBayesClassifier = nltk.NaiveBayesClassifier.train(trainingFeatures)

    NBResultLabels = [NBayesClassifier.classify(extract_features(tweet[0])) for tweet in preprocessedTestSet]

    # get the majority vote
    if NBResultLabels.count('positive') > NBResultLabels.count('negative'):
        print("Overall Positive Sentiment")
        print("Positive Sentiment Percentage = " + str(100*NBResultLabels.count('positive')/len(NBResultLabels)) + "%")
    else: 
        print("Overall Negative Sentiment")
        print("Negative Sentiment Percentage = " + str(100*NBResultLabels.count('negative')/len(NBResultLabels)) + "%")
        print("Positive Sentiment Percentage = " + str(100*NBResultLabels.count('positive')/len(NBResultLabels)) + "%")
        print("Number of negative comments = " + str(NBResultLabels.count('negative')))
        print("Number of positive comments = " + str(NBResultLabels.count('positive')))
        print("Number of neutral comments = " + str(NBResultLabels.count('neutral')))
        print("Number of irrelevant comments = " + str(NBResultLabels.count('irrelevant')))

    import plotly.graph_objects as go

    sentiment = ["Negative","Positive","Neutral","Irrelevant" ]

    fig = go.Figure([go.Bar(x=sentiment, y=[str(NBResultLabels.count('negative')), str(NBResultLabels.count('positive')), str(NBResultLabels.count('neutral')),  str(NBResultLabels.count('irrelevant'))])])
    
    fig.update_layout(template = 'simple_white',
        title_text='News Sentiment Results', 
        yaxis=dict(
        title='Percentage (%)',
        titlefont_size=16,
        tickfont_size=14,) ,
        
        
    )
    
    

    fig.show()

In [34]:
#news_title_sentiment('FinTech')

Overall Negative Sentiment
Negative Sentiment Percentage = 17.0%
Positive Sentiment Percentage = 12.0%
Number of negative comments = 17
Number of positive comments = 12
Number of neutral comments = 52
Number of irrelevant comments = 19
