# _Gathering Verified Users Tweets_

In [1]:
import os
import csv
import pandas as pd
import tweepy
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
from pathlib import Path

# establish path variable so CSV files are stored in correct location
path = Path(os.getcwd())

In [4]:
def load_env():
    '''
    Load in Twitter API keys & tokens via os environment variables.
    '''
    API_KEY = os.environ.get("API_KEY")
    API_SECRET_KEY = os.environ.get("API_SECRET_KEY")
    ACCESS_TOKEN = os.environ.get("ACCESS_TOKEN")
    ACCESS_TOKEN_SECRET = os.environ.get("ACCESS_TOKEN_SECRET")
    return API_KEY, API_SECRET_KEY, ACCESS_TOKEN, ACCESS_TOKEN_SECRET

# load in environment variables to access Twitter API
API_KEY, API_SECRET_KEY, ACCESS_TOKEN, ACCESS_TOKEN_SECRET = load_env()

In [5]:
# functions to gather Tweets
def is_retweet(tweet):
    '''
    Returns True/False if Tweet is a retweet.
    '''
    if "RT @" in tweet.full_text:
        return True
    else:
        return False
    
def get_tweets(path, user):
    '''
    Function that gathers input user's Tweets and outputs them to a CSV file.
    '''
    # open new CSV file into data folder of current directory
    csv_file = open(path/f"data/{user}.csv", "a")
    # create CSV writer
    csv_writer = csv.writer(csv_file)
    
    # write a single row with the headers of the columns
    csv_writer.writerow(
        [
            "id_str",
            "screen_name",
            "created_at",
            "lang",
            "source",
            "retweet_count",
            "favorite_count",
            "is_retweet",
            "full_text"
        ]
    )
    
    # Tweepy authorization
    auth = tweepy.OAuthHandler(API_KEY, API_SECRET_KEY)
    
    # set Tweepy access token's
    auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
    
    # call Twitter API
    api = tweepy.API(auth_handler=auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
    
    # get Tweets
    for tweet in tweepy.Cursor(api.user_timeline, screen_name=user, tweet_mode="extended").items():
        csv_writer.writerow(
            [
                tweet.id_str,
                tweet.user.screen_name,
                tweet.created_at,
                tweet.lang,
                tweet.source,
                tweet.retweet_count,
                tweet.favorite_count,
                is_retweet(tweet),
                tweet.full_text
            ]
        )
        
    # close csv file
    csv_file.close()

In [20]:
def get_user(API_KEY, API_SECRET_KEY, ACCESS_TOKEN, ACCESS_TOKEN_SECRET):
    '''
    Gathers input for Twitter username, ensures that it is a valid account, and returns username as string.
    '''
    # Tweepy authorization
    auth = tweepy.OAuthHandler(API_KEY, API_SECRET_KEY)
    
    # set Tweepy access token's
    auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
    
    # call Twitter API
    api = tweepy.API(auth_handler=auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
    
    # ask for input
    while True:
        user_input = str(input("Please enter username of Verified Twitter User: "))
        try:
            if api.get_user(user_input):
                return user_input
        except:
            print("Please enter valid username.")

In [19]:
get_user()

Please enter username of Verified Twitter User:  1


Please enter valid username.


Please enter username of Verified Twitter User:  earny_joe


'earny_joe'

## _Katy Perry: [@katyperry](https://twitter.com/katyperry?ref_src=twsrc%5Egoogle%7Ctwcamp%5Eserp%7Ctwgr%5Eauthor)_

In [7]:
%%time

get_tweets(path=path, user="katyperry")

CPU times: user 2.99 s, sys: 251 ms, total: 3.24 s
Wall time: 41.7 s


In [9]:
len(pd.read_csv(path/"data/katyperry.csv"))

3192

## _Snoop Dogg: [@SnoopDogg](https://twitter.com/SnoopDogg)_

In [10]:
%%time 

get_tweets(path=path, user="SnoopDogg")

CPU times: user 3.3 s, sys: 394 ms, total: 3.7 s
Wall time: 47.1 s


In [11]:
len(pd.read_csv(path/"data/SnoopDogg.csv"))

3213

## _Bill Gates: @BillGates_

In [12]:
%%time 

get_tweets(path=path, user="BillGates")

CPU times: user 3.1 s, sys: 228 ms, total: 3.33 s
Wall time: 43 s


In [13]:
len(pd.read_csv(path/"data/BillGates.csv"))

3235