 **Name** : Abdul Ameer Nabilla <br>
 **Email**  : abdulameer.ds@gmail.com

In [19]:
# prerequisite install below libraries
!pip install pandas urllib bs4 tweepy configparser lxml
!pip install -q transformers


In [20]:
#Importing Libraries

import pandas as pd
import ssl
from urllib.request import urlopen
from bs4 import BeautifulSoup as soup
import tweepy
import configparser
from transformers import pipeline

sentiment_pipeline = pipeline("sentiment-analysis")

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)


In [21]:
def twitter_news(keyword,columns):
    
    twitter_data=[]
    
    # read configs
    config = configparser.ConfigParser()
    config.read('config.ini')
    
    # accessing API 
    api_key = config['twitter']['api_key']
    api_key_secret = config['twitter']['api_key_secret']

    access_token = config['twitter']['access_token']
    access_token_secret = config['twitter']['access_token_secret']

    # authentication
    auth = tweepy.OAuthHandler(api_key, api_key_secret)
    auth.set_access_token(access_token, access_token_secret)

    api = tweepy.API(auth)
    tweets = tweepy.Cursor(api.search_tweets, q=keyword,count=100, tweet_mode='extended').items()
    
    
    for tweet in tweets:
        twitter_data.append([tweet.created_at,tweet.full_text,tweet.user.screen_name])
    
    twitter_df = pd.DataFrame(twitter_data, columns=columns)
    
    return twitter_df
       
        


In [22]:
def google_news(xml_news_url,columns):
    
    google_data=[]
    
    '''
    Print select details from a html response containing xml
    @param xml_news_url: url to parse
    '''
    context = ssl._create_unverified_context()
    Client = urlopen(xml_news_url, context=context)
    xml_page = Client.read()
    Client.close()

    soup_page = soup(xml_page, "xml")
    news_list = soup_page.findAll("item")
    
    # retrieve only FEB month data
    for news in news_list:
        if "Feb" in news.pubDate.text:
            google_data.append([news.pubDate.text,news.title.text,news.link.text])
    
    google_df=pd.DataFrame(google_data,columns=columns)
    
    return google_df
    
    
    

In [23]:
def sentiment_analysis(data):

  l,l1=[],[]

  for i in data['Tweet content/news headline']:
    l+=[sentiment_pipeline(i)[0]['label']] # Here sentiment_pipeline(i) = {['label':'POSITIVE','score':0.99915]}
    l1+=[sentiment_pipeline(i)[0]['score']]
    
  data['label'] = l
  data['score'] = l1
  
  return data

    

In [24]:
columns = ['Date of news/twitter','Tweet content/news headline','source of news / person name who has tweeted']

twitter_keyword = 'Green Hydrogen'

google_url = 'https://news.google.com/rss/search?q=Green+Hydrogen+when:35d&hl=en-IN&gl=IN&ceid=IN:en'

result = [twitter_news(twitter_keyword,columns),google_news(google_url,columns)]

#concating the twitter and google
data = pd.concat(result,ignore_index=True)

twitter_google_data = sentiment_analysis(data)

twitter_google_data


Unnamed: 0,Date of news/twitter,Tweet content/news headline,source of news / person name who has tweeted,label,score
0,2022-03-05 09:52:32,"RT @AUThackeray: This afternoon, I received a ...",WorliShivsena,NEGATIVE,0.893204
1,2022-03-05 09:51:17,RT @son__of_adam: Kolaborasi yang sangat strat...,Luvaoki,POSITIVE,0.510398
2,2022-03-05 09:51:09,Consumer Electricity Bills Should Not Be Used ...,EcoTopicalNews,NEGATIVE,0.998010
3,2022-03-05 09:49:34,"Made of carbon fiber, the torch is light, resi...",aoOeqqmU72jzISV,POSITIVE,0.993516
4,2022-03-05 09:49:31,RT @son__of_adam: Kolaborasi yang sangat strat...,Lianaaluv,POSITIVE,0.510398
...,...,...,...,...,...
6250,"Tue, 15 Feb 2022 08:00:00 GMT",NorthC to install hydrogen fuel cell backup at...,https://www.datacenterdynamics.com/en/news/nor...,NEGATIVE,0.995869
6251,"Fri, 04 Feb 2022 08:00:00 GMT",The Hydrogen Stream: New fuel cell design to s...,https://www.pv-magazine.com/2022/02/04/the-hyd...,POSITIVE,0.978558
6252,"Wed, 16 Feb 2022 08:00:00 GMT",“Green” hydrogen energy firm floats on AIM at ...,https://www.standard.co.uk/business/green-hydr...,POSITIVE,0.901714
6253,"Tue, 22 Feb 2022 08:00:00 GMT",Reality Check: Europe Must Go Renewable to Esc...,https://rmi.org/europe-must-go-renewable-to-es...,NEGATIVE,0.773236


In [27]:
twitter_google_data.to_csv('Green_Hydrogen_data.csv',index=False)