In [17]:
import json, yaml, os, time
import pandas as pd
from searchtweets import load_credentials, gen_rule_payload, ResultStream
from tweet_parser.tweet import Tweet
from dotenv import load_dotenv
load_dotenv()

consumer_api_key = os.environ.get('Twitter_API_Key')
consumer_api_secret = os.environ.get('Twitter_API_Secret_Key')
access_token_key = os.environ.get('Twitter_Access_Token')
access_token_secret = os.environ.get('Twitter_Access_Token_secret')

# https://lucahammer.com/2019/11/05/collecting-old-tweets-with-the-twitter-premium-api-and-python/
# https://twitterdev.github.io/search-tweets-python/
# https://github.com/twitterdev/search-tweets-python/tree/master/examples
'''
Example script to collect old Tweets with the Twitter Premium Search API
Article: https://lucahammer.com/?p=350

To use this script, change the constants (UPPERCASE variables) to your needs,
and run it. For example in your CLI by executing: "python premiumapi.py".

Find your app credentials here: https://developer.twitter.com/en/apps
Find your dev environment label here: https://developer.twitter.com/en/account/environments
'''
# Variables used to be able to pull the data needed from twitter
API_SCOPE = 'fullarchive'
DEV_ENVIRONMENT_LABEL = 'injuries'
SEARCH_QUERY = '"Christian McCaffrey" Shoulder from:ProFootballDoc OR from:InsideInjuries'

RESULTS_PER_CALL = 100  # 100 for sandbox, 500 for paid tiers
TO_DATE = '2020-11-15' # format YYYY-MM-DD HH:MM (hour and minutes optional)
FROM_DATE = '2020-11-1'  # format YYYY-MM-DD HH:MM (hour and minutes optional)

# max number of tweets to collect
MAX_RESULTS = 100

# create csv
FILENAME = 'twitter_nfl_injury.csv'  # Where the Tweets should be saved

# Script prints an update to the CLI every time it collected another X Tweets
PRINT_AFTER_X = 1

#create a yaml document to hold credentials
config = dict(
    search_tweets_api=dict(
        account_type='premium',
        endpoint=f"https://api.twitter.com/1.1/tweets/search/{API_SCOPE}/{DEV_ENVIRONMENT_LABEL}.json",
        consumer_key=consumer_api_key,
        consumer_secret=consumer_api_secret
    )
)

with open('twitter_keys.yaml', 'w') as config_file:
    yaml.dump(config, config_file, default_flow_style=False)

# opening the communication
premium_search_args = load_credentials("twitter_keys.yaml",
                                       yaml_key="search_tweets_api",
                                       env_overwrite=False)

# creating the payload for search
rule = gen_rule_payload(SEARCH_QUERY,
                        results_per_call=RESULTS_PER_CALL,
                        from_date=FROM_DATE,
                        to_date=TO_DATE
                        )

#returning the results from the payload and stops at max_results
rs = ResultStream(rule_payload=rule,
                  max_results=MAX_RESULTS,
                  **premium_search_args)

# working properly, need to filter out retweets by using retweeted_status to remove retweets would be easiest in a dataframe
# need to format it for a pandas DF and write into csv instead of writing the full dictionary
# find headers within the data so that the differing size rows get split up properly

# loop to open FILENAME as f and write to f the value of tweet which would be results_per_call
with open(FILENAME, 'a', encoding='utf-8') as f:
    n = 0
    for tweet in rs.stream():
        n += 1
        # once n reaches value of print_after_x function discontinues
        if n % PRINT_AFTER_X == 0:
            print('{0}: {1}'.format(str(n), tweet['created_at']))
        print(tweet)
        #df = pd.DataFrame.from_dict(tweet)
        # writing tweets to file
        json.dump(tweet, f)
        f.write('\n')
print('done')

Grabbing bearer token from OAUTH


1: Sat Nov 14 23:05:02 +0000 2020
{'created_at': 'Sat Nov 14 23:05:02 +0000 2020', 'id': 1327749411661877249, 'id_str': '1327749411661877249', 'text': 'RT @InsideInjuries: #Bengals Joe Mixon and #Seahawks Chris Carson have both been ruled OUT again this week as they recover from foot injuri‚Ä¶', 'source': '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>', 'truncated': False, 'in_reply_to_status_id': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id': None, 'in_reply_to_user_id_str': None, 'in_reply_to_screen_name': None, 'user': {'id': 3263578639, 'id_str': '3263578639', 'name': 'Hunter Camp', 'screen_name': 'Hunter2Camp4', 'location': 'Alabama, USA', 'url': None, 'description': 'WLHS‚öæÔ∏èCoach. Umpire. WLHS‚Äô17. Special Ed. Major. CCC‚Äô19. ASU‚Äô21üìùLALüíúBraves‚öæÔ∏è, Alabama, Cowboysüèà, CavaliersüèÄ, Predatorsüèí, USA üá∫üá∏', 'translator_type': 'none', 'protected': False, 'verified': False, 'followers_count': 634, 'friends_