## Using Tweepy
> Author: [Dawn Graham](https://dawngraham.github.io/)

Versions used:
- Python 3.6.6
- pandas 0.23.4
- tweepy 3.7.0
- twitterscraper 0.9.3

The first time you execute the notebook, add all credentials so that you can save them in the pkl file, then you can remove the secret keys from the notebook because they will just be loaded from the pkl file.

The pkl file contains sensitive information that can be used to take control of your twitter acccount, do not share it.

In [1]:
import pickle
import os

if not os.path.exists('secret_twitter_credentials.pkl'):
    Twitter={}
    Twitter['Consumer Key'] = ''
    Twitter['Consumer Secret'] = ''
    Twitter['Access Token'] = ''
    Twitter['Access Token Secret'] = ''
    with open('secret_twitter_credentials.pkl','wb') as f:
        pickle.dump(Twitter, f)
else:
    Twitter=pickle.load(open('secret_twitter_credentials.pkl','rb'))

In [2]:
from tweepy import OAuthHandler
from tweepy import API

auth = OAuthHandler(Twitter['Consumer Key'], Twitter['Consumer Secret'])
auth.set_access_token(Twitter['Access Token'], Twitter['Access Token Secret'])

api = API(auth)

# If the authentication was successful, you should
# see the name of the account print out
print(api.me().name)

Dawn Graham


In [3]:
# Get timeline tweets from specific user
tweets = api.user_timeline('cnn')

In [4]:
# Get tweets with keyword
tweets = api.search('poweroutage')

In [5]:
# Get tweets with keyword
tweets = api.search('poweroutage', until='2019-07-19')

In [6]:
# See https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/tweet-object.html
tweets_dict = {'created_at':[],
              'id_str':[],
              'text':[],
              'user-screen_name':[],
              'user-id_str':[],
              'user-location':[],
              'user-description':[],
              'user-verified':[],
              'user-followers_count':[],
              'retweet_count':[],
              'favorite_count':[]
              }

In [7]:
for tweet in tweets:
    # Append info to tweets_dict
    tweets_dict['created_at'].append(tweet.created_at)
    tweets_dict['id_str'].append(tweet.id_str)
    tweets_dict['text'].append(tweet.text)
    tweets_dict['user-screen_name'].append(tweet.user.screen_name)
    tweets_dict['user-id_str'].append(tweet.user.id_str)
    tweets_dict['user-location'].append(tweet.user.location)
    tweets_dict['user-description'].append(tweet.user.description)
    tweets_dict['user-verified'].append(tweet.user.verified)
    tweets_dict['user-followers_count'].append(tweet.user.followers_count)
    tweets_dict['retweet_count'].append(tweet.retweet_count)
    tweets_dict['favorite_count'].append(tweet.favorite_count)

In [8]:
import pandas as pd
# Save posts to dataframe
tweets_df = pd.DataFrame(tweets_dict)
tweets_df.set_index('created_at', inplace=True)

In [9]:
tweets_df.head()

Unnamed: 0_level_0,id_str,text,user-screen_name,user-id_str,user-location,user-description,user-verified,user-followers_count,retweet_count,favorite_count
created_at,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019-01-06 22:44:17,1082045214741725186,RT @PowerOutage_us: Over 200k customers are wi...,HurricaneCity,36161186,"Delray beach,Florida",Editor of https://t.co/mEHbHg3aiN statistics &...,False,2670,6,0
2019-01-06 22:40:33,1082044274051403776,#poweroutage struggle is real. https://t.co/Fe...,Chead10,293109355,,"Food Snob, gadgeteer, lover of all things Sports",False,184,0,0
2019-01-06 22:37:51,1082043597791232000,RT @IAFF3520: Do not ride your bike over downe...,BeckyShockMay,65078889,"Pacific Northwest, USA",,False,968,11,0
2019-01-06 22:32:57,1082042362082222080,RT @IAFF3520: Do not ride your bike over downe...,MeetTheAuthor,2654543552,"Seattle, Washington","Today's evolving technology news for readers, ...",False,50873,11,0
2019-01-06 22:18:30,1082038726023204864,RT @IAFF3520: Do not ride your bike over downe...,Lori_Knerr,331235589,,,False,40,11,0


In [10]:
tweets_df.shape

(15, 10)

### Get user info

In [11]:
api.get_user('dawnzillamonstr').location

'Boston, MA'

---
## Using Twitterscraper
Documentation:
- https://pypi.org/project/twitterscraper/0.2.7/
- https://github.com/taspinar/twitterscraper

In [12]:
from twitterscraper import query_tweets
import datetime

In [13]:
# Set up dictionary to collect tweets
tweets_dict = {'timestamp':[],
               'id':[],
               'text':[],
               'user':[],
               'likes':[],
               'replies':[],
               'retweets':[],
               'user_location':[]
              }

# https://twitter.com/search-advanced
query = '@cnn, OR @google'
begin = datetime.date(2015, 1, 2)
end = datetime.date(2015, 1, 3)

def get_query(query, begin, end):
    for tweet in query_tweets(query, limit=1, begindate=begin, enddate=end):
        # Append info to tweets_dict
        tweets_dict['timestamp'].append(tweet.timestamp)
        tweets_dict['id'].append(tweet.id)
        tweets_dict['text'].append(tweet.text)
        tweets_dict['user'].append(tweet.user)
        tweets_dict['likes'].append(tweet.likes)
        tweets_dict['replies'].append(tweet.replies)
        tweets_dict['retweets'].append(tweet.retweets)
        tweets_dict['user_location'].append(api.get_user(tweet.user).location)
        
    tweets = pd.DataFrame(tweets_dict)
    tweets.set_index('timestamp', inplace=True)
    return tweets
        
get_query(query, begin, end)

INFO: queries: ['@cnn, OR @google since:2015-01-02 until:2015-01-03']
INFO: Querying @cnn, OR @google since:2015-01-02 until:2015-01-03
INFO: Got 20 tweets for @cnn,%20OR%20@google%20since%3A2015-01-02%20until%3A2015-01-03.
INFO: Got 20 tweets (20 new).


Unnamed: 0_level_0,id,text,user,likes,replies,retweets,user_location
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-01-02 23:58:28,551165661385613312,@cnnbrk @CNN I hope she recovers. Will be inte...,karen_alexa,0,0,0,Toronto
2015-01-02 23:58:13,551165597707272192,¿Che pasa? RT @servicedepot191: @dacw10 @Holly...,RedScareBot,0,0,0,Wisconsin
2015-01-02 23:56:30,551165168483573761,@AngelMedinaUCLA @gigizanchetta @CNN @CNNEE @t...,opsa22,0,0,0,VENEZUELA
2015-01-02 23:55:45,551164975667224576,@AngelMedinaUCLA @gigizanchetta @CNN @CNNEE @t...,mojed89,0,0,0,"CARACAS, VENEZUELA"
2015-01-02 23:55:43,551164967336960000,@langernutrition @CNN I second that! @foodbabe...,SydneyJeanCrisp,1,0,0,Bunker
2015-01-02 23:55:34,551164933573206016,@CNN i met my wife Misty on Trivia Crack and s...,iamsamriley,0,1,0,Coastal Elite
2015-01-02 23:55:30,551164913847001089,"@CNN No, migration and tribal wars were main c...",1204atlanta,0,0,0,"Deplorable, USA"
2015-01-02 23:54:25,551164644132671488,"@cnnbrk @CNN If he's in LE, I'm sure he knows ...",MixtyMotions,0,0,0,"Shenandoah Valley, Virginia"
2015-01-02 23:54:24,551164637396598784,**UPDATE**@TheRevAl @msnbc @CNN @FoxNews did y...,iteachoneofu,0,0,0,On Another Level
2015-01-02 23:54:13,551164590625538048,@In_A_Flap @dlb703 @picamerican @GOP @megynkel...,PlsHelpOurVets,0,0,1,United States


In [14]:
tweets = pd.DataFrame(tweets_dict)
tweets.set_index('timestamp', inplace=True)
tweets

Unnamed: 0_level_0,id,text,user,likes,replies,retweets,user_location
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-01-02 23:58:28,551165661385613312,@cnnbrk @CNN I hope she recovers. Will be inte...,karen_alexa,0,0,0,Toronto
2015-01-02 23:58:13,551165597707272192,¿Che pasa? RT @servicedepot191: @dacw10 @Holly...,RedScareBot,0,0,0,Wisconsin
2015-01-02 23:56:30,551165168483573761,@AngelMedinaUCLA @gigizanchetta @CNN @CNNEE @t...,opsa22,0,0,0,VENEZUELA
2015-01-02 23:55:45,551164975667224576,@AngelMedinaUCLA @gigizanchetta @CNN @CNNEE @t...,mojed89,0,0,0,"CARACAS, VENEZUELA"
2015-01-02 23:55:43,551164967336960000,@langernutrition @CNN I second that! @foodbabe...,SydneyJeanCrisp,1,0,0,Bunker
2015-01-02 23:55:34,551164933573206016,@CNN i met my wife Misty on Trivia Crack and s...,iamsamriley,0,1,0,Coastal Elite
2015-01-02 23:55:30,551164913847001089,"@CNN No, migration and tribal wars were main c...",1204atlanta,0,0,0,"Deplorable, USA"
2015-01-02 23:54:25,551164644132671488,"@cnnbrk @CNN If he's in LE, I'm sure he knows ...",MixtyMotions,0,0,0,"Shenandoah Valley, Virginia"
2015-01-02 23:54:24,551164637396598784,**UPDATE**@TheRevAl @msnbc @CNN @FoxNews did y...,iteachoneofu,0,0,0,On Another Level
2015-01-02 23:54:13,551164590625538048,@In_A_Flap @dlb703 @picamerican @GOP @megynkel...,PlsHelpOurVets,0,0,1,United States
