In [49]:
#Import requisite modules
import sys
import pandas as pd
import datetime
from twitterscraper import query_tweets #if you haven't installed this module, run 'pip install twitterscraper' in your notebook

In [6]:
query_list = [
    'power outage',
    'power out',
    'transformer AND power',
    'power cut',
    'power failure',
    'power interruption',
    'service interruption',
    'power AND coned',
    'water and power',
    'PG&E AND power',
    'southern california edison',
    'SCE AND power',
    'LADWP',
    'SDG&E',
    'DTE energy AND power',
    'SDG&E AND power',
    'consumers energy AND power',
    'national grid AND power',
    'EPS energy AND power',
    'Duke Energy AND power'
]


# make separate lists for each city?

In [7]:
query_test = [
    'power outage',
    'power AND entergy texas',
    'service interruption'
]

In [8]:
# the following code was built by Team Beta - Adam Cohen, Max Mazel, and Najiha Boosra

#Get tweets without geolocation
def get_tweets_no_geoloc(query): 
    tweets = {}
    count = 0 #Sets the index generator
    for tweet in query_tweets(query,100,begindate=datetime.date(2016,1,1)): #same code as above, but skips geolocation query
        chirp = {}
        chirp['tweet_id'] = tweet.tweet_id
        chirp['username'] = tweet.username
        chirp['text'] = tweet.text
        chirp['tweet_date'] = tweet.timestamp
        chirp['search_term'] = query
        chirp['long'] = 'n/a' #fills in n/a value for all geolocation columns to avoid feature issues later.
        chirp['lat'] = 'n/a'
        chirp['radius'] = 'n/a'
        tweets.update({count : chirp})
        count += 1
    return tweets

#Get tweets with geolocation
def get_tweets_geoloc(query, lat, long, radius): #Geolocation parameters defined by user in master function
    tweets = {}
    count = 0
    for tweet in query_tweets(f"{query}, geocode:{lat},{long},{radius}",100,begindate=datetime.date(2016,1,1)):
        chirp = {} #Generates tweet dictionary by calling on generated 'tweet' object attributes
        chirp['tweet_id'] = tweet.tweet_id
        chirp['username'] = tweet.username
        chirp['text'] = tweet.text
        chirp['tweet_date'] = tweet.timestamp
        chirp['search_term'] = query
        chirp['lat'] = lat
        chirp['long'] = long
        chirp['radius'] = radius
        tweets.update({count : chirp})
        count += 1 #increments index up by 1 for later dataframe implementation
    return tweets

#Generate dataframe from "tweets" dictionary generated after each query
def make_dataframe(dictionary):
    df = pd.DataFrame.from_dict(dictionary, orient='index') 
    return df

#Master function
def get_query_dataframe(list_of_queries):
    geo_switch = input("Are you using geolocation?")
    dataset = pd.DataFrame() #instantiate an empty dataframe
    #These parameters are under development
#     year = int(input("Query start year:"))
#     month = int(input("Query start month:"))
#     day = int(input("Query start day:"))
#     geo_switch = input('Are you using geolocation?')
    if str.lower(geo_switch) == 'yes': #Tests if lat, long, and radius inputs are needed
        lat = float(input("Input Latitude:")) #Converts string input latitude to float value
        long = float(input("Input Longitude:"))
        radius = input("Input radius and unit:")
    else:
        pass
    if str.lower(geo_switch) == 'yes': #Sets function to run with geoloc terms in query
        for query in list_of_queries:
            tweets = get_tweets_geoloc(query,lat,long,radius)
            df = make_dataframe(tweets)
            dataset = pd.concat([dataset,df],ignore_index = True)
    else:
        for query in list_of_queries:
            tweets = get_tweets_no_geoloc(query)
            df = make_dataframe(tweets)
            dataset = pd.concat([dataset,df],ignore_index = True)
    return dataset

In [33]:
query_dict = {'Houston':{'city' : 'Houston',
                         'lat'  : 29.760427,
                         'long' : -95.369804,
                         'radius': '15mi',
                         'queries' : ['list','random','words']},
              'Detroit': {'city' : 'Detroit',
                          'lat'  : 42.331429,
                          'long' : -83.045753,
                          'radius' : '10mi',
                         'queries': ['more','weird','words']}
             }

In [34]:
query_dict['Houston']['queries']

['list', 'random', 'words']

In [35]:
for key in query_dict.keys():
    for word in query_dict[key]['queries']:
        print(key,word)

Houston list
Houston random
Houston words
Detroit more
Detroit weird
Detroit words


In [43]:
def get_full_dataframe(dictionary):
    dataset = pd.DataFrame() #instantiate an empty dataframe
    
    for key in dictionary.keys():
        for query in dictionary[key]['queries']:
            tweets = get_tweets_geoloc(query,dictionary[key]['lat'],dictionary[key]['long'],dictionary[key]['radius'])
            df = make_dataframe(tweets)
            dataset = pd.concat([dataset,df],ignore_index = True)
    return dataset



In [44]:
%%time

testing_df = get_full_dataframe(query_dict)

INFO: queries: ['list, geocode:29.760427,-95.369804,15mi since:2016-01-01 until:2016-03-20', 'list, geocode:29.760427,-95.369804,15mi since:2016-03-20 until:2016-06-07', 'list, geocode:29.760427,-95.369804,15mi since:2016-06-07 until:2016-08-26', 'list, geocode:29.760427,-95.369804,15mi since:2016-08-26 until:2016-11-13', 'list, geocode:29.760427,-95.369804,15mi since:2016-11-13 until:2017-02-01', 'list, geocode:29.760427,-95.369804,15mi since:2017-02-01 until:2017-04-21', 'list, geocode:29.760427,-95.369804,15mi since:2017-04-21 until:2017-07-09', 'list, geocode:29.760427,-95.369804,15mi since:2017-07-09 until:2017-09-27', 'list, geocode:29.760427,-95.369804,15mi since:2017-09-27 until:2017-12-15', 'list, geocode:29.760427,-95.369804,15mi since:2017-12-15 until:2018-03-05', 'list, geocode:29.760427,-95.369804,15mi since:2018-03-05 until:2018-05-23', 'list, geocode:29.760427,-95.369804,15mi since:2018-05-23 until:2018-08-10', 'list, geocode:29.760427,-95.369804,15mi since:2018-08-10 un

CPU times: user 682 ms, sys: 794 ms, total: 1.48 s
Wall time: 3min 27s


In [9]:
get_query_dataframe(query_test)

Are you using geolocation? yes
Input Latitude: 29.760427
Input Longitude: -95.369804
Input radius and unit: 10mi


INFO: queries: ['power outage, geocode:29.760427,-95.369804,10mi since:2016-01-01 until:2016-03-20', 'power outage, geocode:29.760427,-95.369804,10mi since:2016-03-20 until:2016-06-07', 'power outage, geocode:29.760427,-95.369804,10mi since:2016-06-07 until:2016-08-26', 'power outage, geocode:29.760427,-95.369804,10mi since:2016-08-26 until:2016-11-13', 'power outage, geocode:29.760427,-95.369804,10mi since:2016-11-13 until:2017-02-01', 'power outage, geocode:29.760427,-95.369804,10mi since:2017-02-01 until:2017-04-21', 'power outage, geocode:29.760427,-95.369804,10mi since:2017-04-21 until:2017-07-09', 'power outage, geocode:29.760427,-95.369804,10mi since:2017-07-09 until:2017-09-27', 'power outage, geocode:29.760427,-95.369804,10mi since:2017-09-27 until:2017-12-15', 'power outage, geocode:29.760427,-95.369804,10mi since:2017-12-15 until:2018-03-05', 'power outage, geocode:29.760427,-95.369804,10mi since:2018-03-05 until:2018-05-23', 'power outage, geocode:29.760427,-95.369804,10mi 

Unnamed: 0,tweet_id,username,text,tweet_date,search_term,lat,long,radius
0,796386783328137216,lil Mizz Sunshine,Geez campus wide power outage...cause of contr...,2016-11-09 16:19:39,power outage,29.760427,-95.369804,10mi
1,794247534789042176,Luz Rivera,This is happening at work now!! First it was a...,2016-11-03 18:39:03,power outage,29.760427,-95.369804,10mi
2,791793484021850112,"River Oaks, TX News",Power outage briefly shuts down River Oaks The...,2016-10-28 00:07:32,power outage,29.760427,-95.369804,10mi
3,777968084959145984,Houston Now,Aldine ISD students sickened by heat during ho...,2016-09-19 20:30:20,power outage,29.760427,-95.369804,10mi
4,773972241369497600,"Deer Park, TX News",Parent message regarding power outage at Deer ...,2016-09-08 19:52:16,power outage,29.760427,-95.369804,10mi
...,...,...,...,...,...,...,...,...
160,1204482129100496902,Elijah🖖🏼,".@txuenergy my bill was due yesterday, how lon...",2019-12-10 19:24:52,service interruption,29.760427,-95.369804,10mi
161,1164575832745992192,Bowled Up,We needed to have a brief service interruption...,2019-08-22 16:31:29,service interruption,29.760427,-95.369804,10mi
162,1164575832745992192,Bowled Up,We needed to have a brief service interruption...,2019-08-22 16:31:29,service interruption,29.760427,-95.369804,10mi
163,1045760656232861696,Ali,Why do I think being on a real estate team is ...,2018-09-28 19:42:24,service interruption,29.760427,-95.369804,10mi


In [47]:
testing_df.shape

(2302, 8)

In [48]:
testing_df

Unnamed: 0,tweet_id,username,text,tweet_date,search_term,lat,long,radius
0,883799994351972352,1ℵ✺ℵℵ✺ℵ,Last Year's #JxnFreshmanClass2k16 Official Lis...,2017-07-08 21:28:33,list,29.760427,-95.369804,15mi
1,883776940347449345,George Salas,See ya tonight #Htown!!!!! Enter Our Guest Lis...,2017-07-08 19:56:57,list,29.760427,-95.369804,15mi
2,883722139513257984,TheBitchyBusinessBriefs by Sharon Lee Zapata,Top 10 list HOW TO GET YOUR ASS MOVING \n1. Co...,2017-07-08 16:19:11,list,29.760427,-95.369804,15mi
3,883535423150149632,XtremeNitelife.com,"Book your Birthday, Bachelorette or Special Oc...",2017-07-08 03:57:14,list,29.760427,-95.369804,15mi
4,883522054183567364,Era Kay Kent Hill,One thing on my Bucket ListThank You Melissa #...,2017-07-08 03:04:07,list,29.760427,-95.369804,15mi
...,...,...,...,...,...,...,...,...
2297,1228010181516238850,⬡ k808 ☾,my bf and i play words with friends against ea...,2020-02-13 17:36:57,words,42.331429,-83.045753,10mi
2298,1227751479643119616,Sydney Thompson,Currently attending @WEtechAlliance’s Driving ...,2020-02-13 00:28:57,words,42.331429,-83.045753,10mi
2299,1227650661975760896,⬡ k808 ☾,will someone play me in cup pong via imessage ...,2020-02-12 17:48:21,words,42.331429,-83.045753,10mi
2300,1227633151478026240,Sacramento Knoxx,Can I use the big words with you and then swit...,2020-02-12 16:38:46,words,42.331429,-83.045753,10mi
