<a href="https://colab.research.google.com/github/danielepia/NLP-Twitter-API/blob/main/Relevant_News.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install advertools

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip install vaderSentiment

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import re
import tweepy
import advertools as adv
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd
import numpy as np
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.cluster.util import cosine_distance
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
import networkx as nx
import random

In [None]:
class TwitterClient(object):

    def __init__(self):

        auth_params = {
            'app_key': 'xxxx',
            'app_secret': 'xxxx',
            'oauth_token': 'xxxx-xxxx',
            'oauth_token_secret': 'xxxx',
            }
  
        adv.twitter.set_auth_params(**auth_params)
  
    def clean_tweet(self, tweet):

        return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t]) |(\w+:\/\/\S+)", " ", tweet).split())
  
    def get_tweet_sentiment_comp(self, tweet):
        analyser = SentimentIntensityAnalyzer()
        analysis = analyser.polarity_scores(tweet)
        val = analysis['compound']
        return val

    def get_tweet_sentiment(self, val):
        if val > 0.5:
            return 'positive'
        elif val < -.5:
            return 'negative'
        else:
            return 'neutral'


    def get_tweets(self, user,field, count = 10):
        alerts_list = {user:field}
    
        for keys,values in alerts_list.items():
            df = adv.twitter.get_user_timeline(screen_name=keys,tweet_mode="extended")
            df = df[df['tweet_full_text'].str.contains(values,regex=True)]
 
        tweets = []

        for tweet in df['tweet_full_text']:
            parsed_tweet = {}
            parsed_tweet['text'] = tweet
            val = self.get_tweet_sentiment_comp(tweet)
            parsed_tweet['sentiment_comp'] = val
            parsed_tweet['sentiment'] = self.get_tweet_sentiment(val)
            tweets.append(parsed_tweet)
        return tweets

In [None]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
class NLP_Process(object):

  def vocabulary(self,corpus):
    voc = set()
    for sentence in corpus:
      sentence = ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t]) |(\w+:\/\/\S+)", " ", sentence).split())
      for word in sentence.split():
        voc.add(word.lower())
    return voc

  def preprocess(self,corpus):

    wh_words = ['who', 'what', 'when', 'why', 'how', 'which', 'where', 'whom']
    stop = set(stopwords.words('english'))

    sent = []
    doc = []

    for word in wh_words:
        stop.remove(word)

    for sentence in corpus:
      sentence = ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t]) |(\w+:\/\/\S+)", " ", sentence).split())
      for word in sentence.split():
        if word.lower() not in stop:
          sent.append(word.lower())
      doc.append(' '.join(sent))
      sent = []
    return doc


  def cosine_similarity(self,vector1, vector2):
    vector1 = np.array(vector1)
    vector2 = np.array(vector2)
    return np.dot(vector1, vector2) / (np.sqrt(np.sum(vector1**2)) * np.sqrt(np.sum(vector2**2)))

  def vectorize_bow(self,corpus):
    vectorizer = CountVectorizer(stop_words='english')
    X_vec = vectorizer.fit_transform(X)
    return X_vec.todense()

  def vectorize_tfid(self,corpus,norm="l2",analyzer='word', ngram_range=(1,3), max_features = 500):
    vectorizer =  TfidfVectorizer(norm=norm,analyzer=analyzer, ngram_range=ngram_range, max_features = max_features)
    tf_idf_matrix = vectorizer.fit_transform(corpus)
    return tf_idf_matrix.toarray()

  def build_similarity_matrix(self,sentences):
    # Create an empty similarity matrix
    similarity_matrix = np.zeros((len(sentences), len(sentences)))
 
    for idx1 in range(len(sentences)):
        for idx2 in range(len(sentences)):
            if idx1 == idx2: #ignore if both are same sentences
                continue 
            similarity_matrix[idx1][idx2] = cosine_distance(sentences[idx1], sentences[idx2])
    return similarity_matrix

In [None]:
def top_news(tweets,top_n = 10):
  nlp = NLP_Process()
  sentences = tweets
  sentences_process = nlp.preprocess(sentences)
  vocabulary = nlp.vocabulary(sentences_process)
  vector_sentences = nlp.vectorize_tfid(sentences_process,max_features=300)
  similarity_matrix = nlp.build_similarity_matrix(vector_sentences)

  sentence_similarity_graph = nx.from_numpy_array(similarity_matrix)
  # print(similarity_matrix.shape)

  
  # scores = nx.pagerank(sentence_similarity_graph,max_iter=5000,tol=1e-02 )
  scores = nx.eigenvector_centrality(sentence_similarity_graph)

  ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)    


  summarize_text = []
  for i in range(top_n):
    print(ranked_sentence[i][1])

      
def main():

    api = TwitterClient()
    

    
    sites = ['reuters','wsj']
    tweet_positive = []
    tweet_negative = []


    for site in sites:
      tweets = pd.DataFrame(api.get_tweets(user=site,field=''))
      for x,val in zip(tweets['text'].values,tweets['sentiment'].values):
        if val == 'positive':
          tweet_positive.append(x)
        elif val == 'negative':
          tweet_negative.append(x)


    # tweet = random.shuffle(list(tweet))

    print('\nPositive News')
    top_news(tweet_positive)
    print('\n\nNegative News')
    top_news(tweet_negative)

    
    


if __name__ == "__main__":
    main()

2022-05-26 14:09:55,001 | INFO | twitter.py:238 | wrapper | get_user_timeline | Requesting: count=200, max_id=None, screen_name=reuters, tweet_mode=extended
2022-05-26 14:10:04,832 | INFO | twitter.py:238 | wrapper | get_user_timeline | Requesting: count=200, max_id=None, screen_name=wsj, tweet_mode=extended



Positive News
When Champagne Louis Roederer set up shop in California’s Anderson Valley four decades ago, it was a bold move. It’s paid off with delicious wines and a bit of a sparkling boomlet in the valley. https://t.co/LdMO5ezWmP
Uvalde, Texas, feels like the only topic worth talking about right now, writes sports columnist @JasonGay https://t.co/BSvZzegqgA
UK imposes 25% energy windfall tax to help households as bills surge https://t.co/9gGf7uC4yU https://t.co/2e4cpqsqyo
Twitter has agreed to pay $150 million to settle allegations it misused private information, like phone numbers, to target advertising after telling users the information would be used for security reasons https://t.co/CDx0E1v1om https://t.co/iCb0UzL6C0
TotalEnergies will buy a 50% stake in Clearway Energy, the latest move by an oil major to expand in wind and solar power https://t.co/zXQMpNh1qr
This self-driving truck is demonstrating an impressive precision by autonomously navigating through a maze of fragile Ch

  sqrt(numpy.dot(u, u)) * sqrt(numpy.dot(v, v))))


Yemen’s economy has been wrecked by years of war and food price inflation doubling in two years. Now, the war in Ukraine and a sudden wheat export ban by India could make a bad situation worse https://t.co/3f3TxtZiha https://t.co/Ny8oh8BI1e
Update: The death toll in the Texas school shooting has risen to at least 18 children and one teacher, officials said https://t.co/08Wm7CSX3f
Update: The 18-year-old alleged gunman in the Texas school shooting has died, Gov. Greg Abbott said https://t.co/siW4n9C9pl
Update: Fourteen children and one teacher were killed in the mass shooting at a Texas elementary school, Gov. Greg Abbott said https://t.co/u22lRMP0nz
Ukrainian prosecutors identified eight Russian service members and mercenaries they said were responsible for the killing of a village mayor, her husband and son, who were discovered partly buried in a shallow grave shortly at the end of March https://t.co/ZH4ielw0df
UK PM Johnson said he took full responsibility but would not quit after a 