# Gathering Tweet Data using twitter module tweety
Information about the Twitter api can be found at [docs.tweepy.org](http://docs.tweepy.org/en/v3.1.0/api.html#API.search)
Additional and updated information about Twitter's `Standard Search api` can be found [here](https://developer.twitter.com/en/docs/tweets/search/api-reference/get-search-tweets)


In [1]:
import tweepy
import pandas as pd

import matplotlib.pyplot as plt

# Get your Twitter API credentials and enter them here
consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""


auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

# authenticate the api with token and access information
api = tweepy.API(auth)

In [75]:
# API.home_timeline returns the 'n' most recent statuses, including retweets, 
# posted by the authenticating user and that user’s friends. 

public_tweets = api.home_timeline(count = 10)
for i, tweet in enumerate(public_tweets):
     print (i, tweet.text)

0 Our developer community is full of inspiring ideas that help the world learn from and improve our collective prepar… https://t.co/XObxbOQqmQ
1 This new streaming endpoint is available in Twitter Developer Labs and provides comprehensive access to COVID-19 re… https://t.co/wFylrCYJI8
2 The volume of conversation about COVID-19 is tremendous, which means it requires expertise and computational resour… https://t.co/Rzsa9P2Bn1
3 During these unprecedented times, what’s happening on Twitter can help the world better understand &amp; respond to the… https://t.co/X5ZVcb6m1j
4 You're up for some fun today and looking for an adventure, and... More for Pisces https://t.co/PMQHAipvly
5 New @abellfoundation report drills down on trends in Bmore neighborhoods: https://t.co/P64gzKFnyq. Leading urban an… https://t.co/w1Qr0h6ea2
6 FREE AMERICA NOW
7 Bravo Texas! https://t.co/cVkDewRqGv
8 Give people their freedom back! https://t.co/iG8OYGaVZ0
9 Fun moves to the top of your agenda today. Take this op

In [78]:
# Use the API.search eturns tweets that match a specified query
# reference: http://docs.tweepy.org/en/v3.1.0/api.html#API.search

covid_tweets = api.search('StayAtHome', lang='en')
for i, t in enumerate(covid_tweets):
    if i > 4: break
    print(i, t.text)

0 RT @MoviezAdda: #WednesdayMotivation #FitnessMotivation
🔥💪 "#Bhaijaan #Fitness Icon" 🔥💪

#Jacqueline Shot A Perfect Pic Of Dashing #SalmanK…
1 RT @Fabriziobustama: The inventor of the N95 mask Peter Tsai is coming out of retirement to help fight COVID-19

By @nowthisnews 
#COVID19…
2 RT @susialika5: Dusk teaches that happiness does not have to come earlier,i hope you life you're proud of.if you find that you're not,I hop…
3 So love these six #weeblewobbleblobclan #stayathome #artistsathome #reshedency #WIP for #raveninghamsculpturetrail… https://t.co/m95Cf0UVby
4 RT @FoniLunboy: Better days are ahead of us. Believe me. #StayAtHome 🇲🇻🙏♥️


In [83]:
# Conver twitter results to pandas dataframe for other analysis
results = api.search(q='StayAtHome', count=10)
json_data = [r._json for r in results]
df = pd.io.json.json_normalize(json_data)
print(df.loc[0, :])

contributors                                                                            None
coordinates                                                                             None
created_at                                                    Wed Apr 29 21:25:01 +0000 2020
entities.hashtags                          [{'text': 'QuedateEnCasa', 'indices': [23, 37]...
entities.media                             [{'id': 1255609050483564544, 'id_str': '125560...
entities.symbols                                                                          []
entities.urls                                                                             []
entities.user_mentions                                                                    []
extended_entities.media                    [{'id': 1255609050483564544, 'id_str': '125560...
favorite_count                                                                             0
favorited                                                             

## Create function to pull  twitter text given a query

In [2]:
def get_twitter_data(api, query='StayAtHome', num_result = 100):
    '''
    input: 
        api - twitter ai
        query - words to use for query 
        num_result 
    ouput: 
        a transformed data frame object
    '''
     
    json_data = []; n = 0;
    while n < num_result: 
        for search_result in api.search(query, lang='en', count=num_result-n):
            if n > num_result: 
                break 
            n = n + 1
            json_data.append(search_result._json)
    
    df = pd.io.json.json_normalize(json_data)
    
    return df[['id', 'created_at', 'text', 'favorite_count', 'retweet_count', 'truncated']]

### potential queries related to COVID-19: `flattenthecurve, staysafe, covid19`

In [3]:
twit_data = get_twitter_data(api, 'StayAtHome', 1000)
twit_data = twit_data.append(get_twitter_data(api, 'COVID19', 1000))
twit_data = twit_data.append(get_twitter_data(api, 'Flatten', 500))

twit_data.tail(3)

Unnamed: 0,id,created_at,text,favorite_count,retweet_count,truncated
497,1255632936491089920,Wed Apr 29 22:59:55 +0000 2020,"RT @DiamondandSilk: We are the United States, ...",0,4852,False
498,1255632910243303429,Wed Apr 29 22:59:48 +0000 2020,@JoeySalads “fLaTtEn tHe cUrVe”,0,0,False
499,1255632906019471360,Wed Apr 29 22:59:47 +0000 2020,I am currently practicing social distancing fr...,12,3,False


In [6]:
# Load to CSV File
twit_data.to_csv("data/covid19.csv", index=False, encoding="utf-8")

In [117]:
flatten_curve = get_twitter_data(api, 'Flatten', 500)
flatten_curve.tail(3)
flatten_curve.to_csv("flatten_curve.csv", index=False, encoding="utf-8")

['created_at', 'id', 'id_str', 'text', 'truncated', 'entities', 'metadata', 'source', 'in_reply_to_status_id', 'in_reply_to_status_id_str', 'in_reply_to_user_id', 'in_reply_to_user_id_str', 'in_reply_to_screen_name', 'user', 'geo', 'coordinates', 'place', 'contributors', 'retweeted_status', 'is_quote_status', 'retweet_count', 'favorite_count', 'favorited', 'retweeted', 'lang']


In [42]:
print(search_result._json['user'])

{'id': 1017767895290535936, 'id_str': '1017767895290535936', 'name': 'FmNdlozi', 'screen_name': 'ForsterNdlozi', 'location': 'South Africa', 'description': 'Jesus Christ is Lord and I love Him because He first loved me.', 'url': None, 'entities': {'description': {'urls': []}}, 'protected': False, 'followers_count': 169, 'friends_count': 968, 'listed_count': 0, 'created_at': 'Fri Jul 13 13:49:10 +0000 2018', 'favourites_count': 2969, 'utc_offset': None, 'time_zone': None, 'geo_enabled': False, 'verified': False, 'statuses_count': 434, 'lang': None, 'contributors_enabled': False, 'is_translator': False, 'is_translation_enabled': False, 'profile_background_color': 'F5F8FA', 'profile_background_image_url': None, 'profile_background_image_url_https': None, 'profile_background_tile': False, 'profile_image_url': 'http://pbs.twimg.com/profile_images/1198683183757307904/VuOL11U8_normal.jpg', 'profile_image_url_https': 'https://pbs.twimg.com/profile_images/1198683183757307904/VuOL11U8_normal.jpg

In [34]:
print(search_result._json['text'])

RT @GovernmentZA: South Africa will shift to #Level4Lockdown to prevent and combat the spread of #COVID19SA #StayAtHome https://t.co/NWEOWJ…
