# ObtainMulti-Airline Tweet Data from Twitter Premium API (30-days)

In [1]:
# Install searchtweets wrapper for the premium API

# !pip install searchtweets

Collecting searchtweets
  Downloading https://files.pythonhosted.org/packages/51/d7/7dd296ba9469e046bad23583aaa0d36b18c7d6e4df9fd2acfb433d1c7ee2/searchtweets-1.7.4-py3-none-any.whl
Collecting tweet-parser
  Downloading https://files.pythonhosted.org/packages/4b/ea/cb82efb43dbcb115ea447313fbd287ff66349b34bdfdb4a78e25d3a42cb0/tweet_parser-1.13.2-py3-none-any.whl
Installing collected packages: tweet-parser, searchtweets
Successfully installed searchtweets-1.7.4 tweet-parser-1.13.2


In [1]:
# Import libraries

from searchtweets import load_credentials, gen_rule_payload, ResultStream
import pandas as pd
import json
import yaml

In [2]:
premium_search_args = load_credentials("/Users/dougl/.secret/twitter_keys_30days.yaml",
                                       yaml_key="search_tweets_api",
                                       env_overwrite=False)
print(premium_search_args)

  search_creds = yaml.load(f)[yaml_key]
Grabbing bearer token from OAUTH


{'bearer_token': 'AAAAAAAAAAAAAAAAAAAAAGCjCgEAAAAA3u1GCjoHUSbgcaYY1SP2MG9WIbo%3DwfzWYcrDUyqs0msK9VGiQCny7qBQq5jn4sWfolTSqmUj4eBVbU', 'endpoint': 'https://api.twitter.com/1.1/tweets/search/30day/flatironcapstone.json', 'extra_headers_dict': None}


In [3]:
# Set up rule to obtain tweets for last month, excluding time period  already covered by standard twitter API

rule = gen_rule_payload("""(#americanairlines OR #americanair OR @AmericanAir OR
                            #southwestairlines OR #southwestair OR @SouthwestAir OR
                            #unitedairlines OR #unitedair OR @united OR
                            #deltaairlines OR #deltaair OR @delta OR 
                            #virginamerica OR #virginair OR @VirginAmerica OR 
                            #alaskaair OR #alaskaairlines OR @AlaskaAir OR
                            #jetblue OR @JetBlue OR
                            #spiritairlines OR #spiritair OR @SpiritAirlines OR
                            #flyfrontier OR #frontierairlines OR @FlyFrontier OR
                            #allegiant OR #allegiantair OR @Allegiant OR
                            #hawaiianairlines OR @HawaiianAir OR
                            #suncountryair OR @SunCountryAir) -is:retweet -is:reply -RT lang:en""",
                        results_per_call=500,
                        from_date="2020-02-04",
                        to_date="2020-03-04")

In [4]:
rs = ResultStream(rule_payload=rule,
                  max_results=150000,
                  **premium_search_args)
print(rs)

ResultStream: 
	{
    "username": null,
    "endpoint": "https://api.twitter.com/1.1/tweets/search/30day/flatironcapstone.json",
    "rule_payload": {
        "query": "(#americanairlines OR #americanair OR @AmericanAir OR #southwestairlines OR #southwestair OR @SouthwestAir OR #unitedairlines OR #unitedair OR @united OR #deltaairlines OR #deltaair OR @delta OR #virginamerica OR #virginair OR @VirginAmerica OR #alaskaair OR #alaskaairlines OR @AlaskaAir OR #jetblue OR @JetBlue OR #spiritairlines OR #spiritair OR @SpiritAirlines OR #flyfrontier OR #frontierairlines OR @FlyFrontier OR #allegiant OR #allegiantair OR @Allegiant OR #hawaiianairlines OR @HawaiianAir OR #suncountryair OR @SunCountryAir) -is:retweet -is:reply -RT lang:en",
        "maxResults": 500,
        "toDate": "202003040000",
        "fromDate": "202002040000"
    },
    "tweetify": true,
    "max_results": 150000
}


In [5]:
# rs.stream()

In [6]:
iD = []
datetime = []
text = []
retweets = []

for tweet in rs.stream():
    iD.append(tweet['id'])
    datetime.append(tweet['created_at'])
    try:
        text.append(tweet['extended_tweet']['full_text'])
    except:
        try:
            text.append(tweet['full_text'])
        except:
            text.append(tweet['text'])
    retweets.append(tweet['retweet_count'])

retrying request; current status code: 429
retrying request; current status code: 429


In [7]:
df = pd.DataFrame([])
df = pd.concat([df, pd.Series(iD), pd.Series(datetime), pd.Series(text), pd.Series(retweets)], axis=1)
df.columns = ['id', 'datetime', 'text', 'retweets']
df

Unnamed: 0,id,datetime,text,retweets
0,1234991579502243843,Tue Mar 03 23:58:32 +0000 2020,Just announced: @united says for flights booke...,1
1,1234991364468690944,Tue Mar 03 23:57:40 +0000 2020,Does @Alitalia really stands on its position o...,2
2,1234991343853654017,Tue Mar 03 23:57:36 +0000 2020,Hey @Delta I need to track down a past flight ...,0
3,1234991315319848961,Tue Mar 03 23:57:29 +0000 2020,Hey @Delta now we’re stuck because you cancele...,0
4,1234991155688792066,Tue Mar 03 23:56:51 +0000 2020,OMG! I’m flying @americanair. All I can think ...,0
...,...,...,...,...
41471,1224483897196466177,Tue Feb 04 00:04:45 +0000 2020,#United aircraft deicing at #DIA. @CBSDenver @...,4
41472,1224483511861571584,Tue Feb 04 00:03:13 +0000 2020,Now I know why I never check luggage. 45 mins ...,0
41473,1224483376997945344,Tue Feb 04 00:02:41 +0000 2020,It shouldn't have taken someone suing to get #...,2
41474,1224483083761541122,Tue Feb 04 00:01:31 +0000 2020,"After nearly 3 million miles on just @United, ...",0


In [8]:
# Convert final_df to a .csv file, and save in current directory

df.to_csv('twitter_30_days_all_airlines.csv', index=False)