In [1]:
import tweepy
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from tweepy import API
from tweepy import Cursor

from  geopy.geocoders import Nominatim
import reverse_geocoder as rg

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer 

import re
import csv

import credentials #stored API keys etc

import time

API_key=credentials.API_key
API_secret_key=credentials.API_secret_key
access_token=credentials.access_token
access_token_secret=credentials.access_token_secret


column_headers=['User_Id','Created_at','Country_code','State','Latitude','Longitude','Tweet_text','Sentiment_compound','Overall_sentiment']

'''
#run only the first time
with open('extractedtweets.csv','w') as file:
    writer=csv.DictWriter(file,fieldnames=column_headers)
    writer.writeheader()'''

geolocator = Nominatim(user_agent="http")
sentiment = SentimentIntensityAnalyzer() 

In [2]:
def deEmojify(text):
    #Removing non-ASCII characters (emoji characters)
    if text:
        return text.encode('ascii', 'ignore').decode('ascii')
    else:
        return None

class MyStreamListener(tweepy.StreamListener):
    def on_status(self, tweet):
        #Extracting tweets
        
        if tweet.retweeted:
            # Avoid retweeted info, and only original tweets will be received
            return True
        user_location = deEmojify(tweet.user.location)
        if user_location!=None:
            try:
                id_str = tweet.id_str
                created_at = tweet.created_at
                
                #Pre-processing the text
                text = deEmojify(tweet.text)    
                #Removing RT, links and special characters in the tweet text
                text=' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet.text).split())
                text=' '.join(re.sub("RT"," ",text).split())
                
                #Getting the co-ordinates of the user
                loc = geolocator.geocode(user_location)
                longitude = loc.longitude
                latitude = loc.latitude
                
                #Getting the state and country
                coordinates=(latitude,longitude)
                h=(dict(rg.search(coordinates)[0]))
                country_code=h['cc']
                state=h['admin1'] if h['admin1']!='NCT' else 'New Delhi'
                
                #Sentiment analysis by vaderSentiment
                sentiment_tweet = sentiment.polarity_scores(text)
                tweet_compound=sentiment_tweet['compound']
                Overall_Sentiment='Neutral' if tweet_compound==0 else ('Positive' if tweet_compound>0 else 'Negative')
                
                with open('etweets.csv','a') as file:
                    writer=csv.DictWriter(file,fieldnames=column_headers)
                    new_tweet={
                        'User_Id':id_str,
                        'Created_at':created_at,
                        'Country_code':country_code,
                        'State':state,
                        'Latitude':latitude,
                        'Longitude':longitude,
                        'Tweet_text':text,
                        'Sentiment_compound':tweet_compound,
                        'Overall_sentiment':Overall_Sentiment
                    }
                    writer.writerow(new_tweet)
                
                print(created_at,"\n",text,"\n","user loc:",user_location)
                print(tweet_compound,Overall_Sentiment)
                print(country_code,state)
                
            except:
                return True
            
    def on_error(self, status_code):
        # Since Twitter API has rate limits, this stop srcraping data as it exceed to the thresold.
        if status_code == 420:
            # return False to disconnect the stream
            print("Error")
            time.sleep(60 * 5)
            return 
        
    def on_exception(self, exception):
        print(exception)
        return
    

In [None]:
auth=tweepy.OAuthHandler(API_key,API_secret_key)
auth.set_access_token(access_token,access_token_secret)
api=API(auth)

myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth = api.auth, listener = myStreamListener)
while True:
    try:
        myStream.filter(languages=["en"], track = ["covid-19","covid","lockdown","corona","vaccine","recovery","death","containment"])
                
    except:
        print("Error")
        continue