In [35]:
#Import requisite modules
import sys
import pandas as pd
import datetime
from twitterscraper import query_tweets #if you haven't installed this module, run 'pip install twitterscraper' in your notebook

query_list = [ #This is our sample list, add or subtract as you see fit!
    'COVID',
    'COVID-19',
    'Corona',
    'Coronavirus',
    'Rona',
    'Quarantine',
    '#COVID',
    '#COVID-19',
    '#quarantine',
    '#Quarantine',
    '#covid19'
]
#Credit to Danielle Medellin, DSI11-NYC for the below implementation of custom parameter dictionary support
custom_params = {'Houston':{'city' : 'Houston',
                         'lat'  : 29.760427,
                         'long' : -95.369804,
                         'radius': '15mi',
                         'queries' : ['list','random','words']},
              'Detroit': {'city' : 'Detroit',
                          'lat'  : 42.331429,
                          'long' : -83.045753,
                          'radius' : '10mi',
                         'queries': ['more','weird','words']}
             }

#Get tweets without geolocation
def get_tweets_no_geoloc(query): 
    tweets = {}
    count = 0 #Sets the index generator
    for tweet in query_tweets(query,10,begindate=datetime.date(2019,12,1)): #Runs 100 pings, 
        chirp = {}
        chirp['tweet_id'] = tweet.tweet_id
        chirp['username'] = tweet.username
        chirp['text'] = tweet.text
        chirp['tweet_date'] = tweet.timestamp
        chirp['search_term'] = query
        chirp['long'] = 'n/a' #fills in n/a value for all geolocation columns to avoid feature issues later.
        chirp['lat'] = 'n/a'
        chirp['radius'] = 'n/a'
        tweets.update({count : chirp})
        count += 1
    return tweets

#Get tweets with geolocation
def get_tweets_geoloc(query, lat, long, radius): #Geolocation parameters defined by user in master function
    tweets = {}
    count = 0
    for tweet in query_tweets(f"{query}, geocode:{lat},{long},{radius}",10,begindate=datetime.date(2019,12,1)):
        chirp = {} #Generates tweet dictionary by calling on generated 'tweet' object attributes
        chirp['tweet_id'] = tweet.tweet_id
        chirp['username'] = tweet.username
        chirp['text'] = tweet.text
        chirp['tweet_date'] = tweet.timestamp
        chirp['search_term'] = query
        chirp['lat'] = lat
        chirp['long'] = long
        chirp['radius'] = radius
        tweets.update({count : chirp})
        count += 1 #increments index up by 1 for later dataframe implementation
    return tweets

#Generate dataframe from "tweets" dictionary generated after each query
def make_dataframe(dictionary):
    df = pd.DataFrame.from_dict(dictionary, orient='index') 
    return df
#--------------------------------------------------------------------
#Master function
def get_query_dataframe(list_of_queries):
    geo_switch = input("Are you using geolocation?")
    dataset = pd.DataFrame() #instantiate an empty dataframe
    
    #These parameters are under development
#     year = int(input("Query start year:"))
#     month = int(input("Query start month:"))
#     day = int(input("Query start day:"))
   
    if str.lower(geo_switch) == 'yes': #Tests if lat, long, and radius inputs are needed
        lat = float(input("Input Latitude:")) #Converts string input latitude to float value
        long = float(input("Input Longitude:"))
        radius = input("Input radius and unit:")
    else:
        pass
    
    if str.lower(geo_switch) == 'yes': #Sets function to run with geoloc terms in query
        for query in list_of_queries:
            tweets = get_tweets_geoloc(query,lat,long,radius)
            df = make_dataframe(tweets)
            dataset = pd.concat([dataset,df],ignore_index = True)
    else:
        for query in list_of_queries:
            tweets = get_tweets_no_geoloc(query)
            df = make_dataframe(tweets)
            dataset = pd.concat([dataset,df],ignore_index = True)
    return dataset

In [33]:
scrapedf = get_query_dataframe(query_list)

Are you using geolocation? yes
Input Latitude: 40.758626
Input Longitude: -73.999164
Input radius and unit: 20km


INFO: queries: ['COVID, geocode:40.758626,-73.999164,20km since:2019-12-01 until:2019-12-08', 'COVID, geocode:40.758626,-73.999164,20km since:2019-12-08 until:2019-12-16', 'COVID, geocode:40.758626,-73.999164,20km since:2019-12-16 until:2019-12-24', 'COVID, geocode:40.758626,-73.999164,20km since:2019-12-24 until:2020-01-01', 'COVID, geocode:40.758626,-73.999164,20km since:2020-01-01 until:2020-01-09', 'COVID, geocode:40.758626,-73.999164,20km since:2020-01-09 until:2020-01-17', 'COVID, geocode:40.758626,-73.999164,20km since:2020-01-17 until:2020-01-25', 'COVID, geocode:40.758626,-73.999164,20km since:2020-01-25 until:2020-02-02', 'COVID, geocode:40.758626,-73.999164,20km since:2020-02-02 until:2020-02-10', 'COVID, geocode:40.758626,-73.999164,20km since:2020-02-10 until:2020-02-18', 'COVID, geocode:40.758626,-73.999164,20km since:2020-02-18 until:2020-02-25', 'COVID, geocode:40.758626,-73.999164,20km since:2020-02-25 until:2020-03-04', 'COVID, geocode:40.758626,-73.999164,20km since:

In [None]:
#Import requisite modules
import sys
import pandas as pd
import datetime
from twitterscraper import query_tweets #if you haven't installed this module, run 'pip install twitterscraper' in your notebook

query_list = [ #This is our sample list, add or subtract as you see fit!
    'COVID',
    'COVID-19',
    'Corona',
    'Coronavirus',
    'Rona',
    'Quarantine',
    '#COVID',
    '#COVID-19',
    '#quarantine',
    '#Quarantine',
    '#covid19'
]
#--------------------------------------------------------------------

#Credit to Danielle Medellin, DSI11-NYC for the below implementation of custom parameter dictionary support
custom_params = {'Houston':{'city' : 'Houston',
                         'lat'  : 29.760427,
                         'long' : -95.369804,
                         'radius': '15mi',
                         'queries' : ['list','random','words']},
              'Detroit': {'city' : 'Detroit',
                          'lat'  : 42.331429,
                          'long' : -83.045753,
                          'radius' : '10mi',
                         'queries': ['more','weird','words']}
             }
#--------------------------------------------------------------------

#Get tweets without geolocation
def get_tweets(query): 
    tweets = {}
    count = 0 #Sets the index generator
    for tweet in query_tweets(query,10,begindate=datetime.date(2019,12,1)): #Runs 100 pings, 
        chirp = {}
        chirp['tweet_id'] = tweet.tweet_id
        chirp['username'] = tweet.username
        chirp['text'] = tweet.text
        chirp['tweet_date'] = tweet.timestamp
        chirp['search_term'] = query
        chirp['city'] = 'n/a'
        chirp['long'] = 'n/a' #fills in n/a value for all geolocation columns to avoid feature issues later.
        chirp['lat'] = 'n/a'
        chirp['radius'] = 'n/a'
        tweets.update({count : chirp})
        count += 1
    return tweets
#--------------------------------------------------------------------

#Get tweets with geolocation
def get_tweets_geoloc(query, lat, long, radius): #Geolocation parameters defined by user in master function or dictionary
    tweets = {}
    count = 0
    for tweet in query_tweets(f"{query}, geocode:{lat},{long},{radius}",10,begindate=datetime.date(2019,12,1)):
        chirp = {} #Generates tweet dictionary by calling on generated 'tweet' object attributes
        chirp['tweet_id'] = tweet.tweet_id
        chirp['username'] = tweet.username
        chirp['text'] = tweet.text
        chirp['tweet_date'] = tweet.timestamp
        chirp['search_term'] = query
        chirp['lat'] = lat
        chirp['long'] = long
        chirp['radius'] = radius
        tweets.update({count : chirp})
        count += 1 #increments index up by 1 for later dataframe implementation
    return tweets
#--------------------------------------------------------------------

#Generate dataframe from "tweets" dictionary generated after each query
def make_dataframe(dictionary):
    df = pd.DataFrame.from_dict(dictionary, orient='index') 
    return df
#--------------------------------------------------------------------

#Query function using custom parameters
def get_query_dataframe_cp(custom_params):
    dataset = pd.DataFrame() #instantiate an empty dataframe
        for query in list_of_queries:
            tweets = get_tweets_geoloc(query,lat,long,radius)
            df = make_dataframe(tweets)
            dataset = pd.concat([dataset,df],ignore_index = True)
    return dataset
#---------------------------------------------------------------

#Query function with geolocation but no custom parameters
def get_query_dataframe_geo(list_of_queries):
    dataset = pd.DataFrame() #instantiate an empty dataframe
    for query in list_of_queries:
            tweets = get_tweets_geoloc(query,lat,long,radius)
            df = make_dataframe(tweets)
            dataset = pd.concat([dataset,df],ignore_index = True)
    return dataset
#-------------------------------------------------------------------

#Query function with no custom anything
def get_query_dataframe(list_of_queries):
    dataset = pd.DataFrame() #instantiate an empty dataframe
    for query in list_of_queries:
            tweets = get_tweets(query)
            df = make_dataframe(tweets)
            dataset = pd.concat([dataset,df],ignore_index = True)
    return dataset
#------------------------------------------------------------------

#Master function
def get_dataset():
    custom_params_switch = input("Are you using a custom parameter dictionary?")
    if str.lower(custom_params_switch) == 'yes':
        dataset = get_query_dataframe_cp(custom_params)
    else:
        geo_switch = input("Are you using geolocation")
        if str.lower(geo_switch) == 'yes':
            lat = float(input("Input Latitude:")) #Converts string input latitude to float value
            long = float(input("Input Longitude:"))
            radius = input("Input radius and unit:")
            dataset = get_query_dataframe_geo(query_list, lat, long, radius)
        else:
            dataset = get_query_datarame(query_list)
    return dataset         