In [None]:
import pandas as pd
import tweepy

## Sign-up & Authentification

In [None]:
api_key = api_key
api_key_secret = api_key_secret

access_token = access_token
access_token_secret = access_token_secret

In [None]:
auth = tweepy.OAuthHandler(api_key, api_key_secret)
auth.set_access_token(access_token, access_token_secret)

In [None]:
api = tweepy.API(auth)

In [None]:
dir(api)

In [None]:
api.update_status('Twitter API trial')

In [None]:
api.destroy_status(id = 1502097617140203520)

## Json

Official doc: https://www.json.org/json-en.html

- A data format designed for data exchange between systems and languages

- Essentially a tree-like structure built on two types of structure common in most programming languages (akin to dictionary and list in Python)

## Put tweets into a DataFrame

In [None]:
my_timeline = api.home_timeline()

In [None]:
my_timeline

In [None]:
tweet_5 = my_timeline[4]
tweet_5

In [None]:
tweet_5.text

In [None]:
tweet_5.user.screen_name

In [None]:
vars(tweet_5)

In [None]:
vars(tweet_5)['text']

In [None]:
vars(tweet_5)['user'].screen_name

In [None]:
headers = set()
# Python lists allow duplicate elements; sets do not.
tweets_data = []

for tweet in my_timeline:
    tweet_dict = vars(tweet)
    keys = tweet_dict.keys()
    single_tweet_data = {"user": tweet.user.screen_name}
    for k in keys:
        v_type = type(tweet_dict[k])
        if v_type in [str, int]:
            single_tweet_data[k] = tweet_dict[k]
            headers.add(k)
    tweets_data.append(single_tweet_data)
    
columns = list(headers)
columns.append("user")

In [None]:
columns

In [None]:
tweets_data

In [None]:
df = pd.DataFrame(tweets_data, columns=columns)
df.head()

In [None]:
def extract_timeline_df(timeline):
    headers = set()
    tweets_data = []

    for tweet in timeline:
        tweet_dict = vars(tweet)
        keys = tweet_dict.keys()
        single_tweet_data = {"user": tweet.user.screen_name}
        for k in keys:
            v_type = type(tweet_dict[k])
            if v_type in [str, int]:
                single_tweet_data[k] = tweet_dict[k]
                headers.add(k)
        tweets_data.append(single_tweet_data)

    columns = list(headers)
    columns.append("user")
    
    df = pd.DataFrame(tweets_data, columns = columns)
    return df

## Tweets from other users and hashtags

In [None]:
# Let's check the latest tweets of the author of our textbook — Matthew Salganik

user = 'msalganik'
user_timeline = api.user_timeline(screen_name = user, count = 10)

df_2 = extract_timeline_df(user_timeline)
df_2

In [None]:
df_2['text'][6]

In [None]:
user_timeline = api.user_timeline(screen_name = user, count = 300)

df_3 = extract_timeline_df(user_timeline)
df_3

There's a maximum number of tweets `user_timeline` can extract. To extract more tweets, we use `Cursor`

In [None]:
user_timeline = tweepy.Cursor(api.user_timeline, screen_name = user).items(300)
df_3 = extract_timeline_df(user_timeline)
df_3

In [None]:
# search tweets by a keyword

keywords = ['China', '#EconTwitter']
search_timeline = tweepy.Cursor(api.search_tweets, q = keywords).items(10)

df_4 = extract_timeline_df(search_timeline)
df_4

## Streaming

In [None]:
# create a subclass of tweepy.Stream

class Linstener(tweepy.Stream):

    tweets = []
    limit = 30
    

    def on_status(self, status):
        self.tweets.append(status)
        
        # print(status.user.screen_name + ": " + status.text)
        
        # stop streaming after 30 tweets
        if len(self.tweets) == self.limit:
            self.disconnect()

# Notice Stream is a different application from API, we need to authenticate again. 
stream_tweet = Linstener(api_key, api_key_secret, access_token, access_token_secret)

In [None]:
# stream by keywords

keywords = ['Ukraine']

stream_tweet.filter(track=keywords)

In [None]:
# Put streaming tweets into df

columns = ['User', 'Tweet']
data = []

for tweet in stream_tweet.tweets:
    data.append([tweet.user.screen_name, tweet.text])

df = pd.DataFrame(data, columns=columns)
df

For more on Twitter Streaming, see https://developer.twitter.com/en/docs/tutorials/consuming-streaming-data