# Quick view with Python
(This example using Twitter API and [tweepy](http://docs.tweepy.org/en/v3.5.0/index.html))

In [1]:
#!pip install -U tweepy

### Sample Data

In [2]:
import os
from tweepy import OAuthHandler, API, TweepError
from tweepy.parsers import JSONParser


class TwitterClient:
    def __init__(self):
        '''
        authenticate API
        '''
        consumer_key = os.environ['TWITTER_CONSUMER_KEY']
        consumer_secret = os.environ['TWITTER_CONSUMER_SECRET']
        access_token = os.environ['TWITTER_ACCESS_TOKEN']
        access_token_secret = os.environ['TWITTER_ACCESS_TOKEN_SECRET']
        try:
            self.auth = OAuthHandler(consumer_key, consumer_secret)
            self.auth.set_access_token(access_token, access_token_secret)
        except:
            print('Error: Authentication Failure')


    def sample(self, query, count = 10, geo = False):
        '''
        get sample data
        '''
        api = API(self.auth, parser = JSONParser())
        try:
            tweets = api.search(q = query, count = count)
            return tweets
        except TweepError as e:
            print('Error: {}'.format(e))
            return []


In [3]:
twitter = TwitterClient()
# get 5 tweets
sample = twitter.sample('machine learning,AI', count = 5)

In [4]:
for key in sample:
    print(key)

search_metadata
statuses


In [5]:
import re

def quick_look(obj):
    nested = []
    for key in sorted(list(obj.keys())):
        dtype = re.sub('\W+', '', str(type(obj[key])).split().pop())
        print('{:<35} {}'.format(key, '? (missing)' if dtype == 'NoneType' else dtype))
        if dtype == 'dict':
            nested.append(key)
    if len(nested) > 0:
        print('\nNested objects:')
        for key in nested:
            print('\t{}'.format(key))

In [6]:
quick_look(sample['search_metadata'])

completed_in                        float
count                               int
max_id                              int
max_id_str                          str
next_results                        str
query                               str
refresh_url                         str
since_id                            int
since_id_str                        str


In [7]:
quick_look(sample['statuses'][1])

contributors                        ? (missing)
coordinates                         ? (missing)
created_at                          str
entities                            dict
favorite_count                      int
favorited                           bool
geo                                 ? (missing)
id                                  int
id_str                              str
in_reply_to_screen_name             ? (missing)
in_reply_to_status_id               ? (missing)
in_reply_to_status_id_str           ? (missing)
in_reply_to_user_id                 ? (missing)
in_reply_to_user_id_str             ? (missing)
is_quote_status                     bool
lang                                str
metadata                            dict
place                               ? (missing)
retweet_count                       int
retweeted                           bool
retweeted_status                    dict
source                              str
text                                str
tr

In [8]:
quick_look(sample['statuses'][1]['user'])

contributors_enabled                bool
created_at                          str
default_profile                     bool
default_profile_image               bool
description                         str
entities                            dict
favourites_count                    int
follow_request_sent                 bool
followers_count                     int
following                           bool
friends_count                       int
geo_enabled                         bool
has_extended_profile                bool
id                                  int
id_str                              str
is_translation_enabled              bool
is_translator                       bool
lang                                str
listed_count                        int
location                            str
name                                str
notifications                       bool
profile_background_color            str
profile_background_image_url        str
profile_background_image_url_

In [9]:
quick_look(sample['statuses'][1]['retweeted_status'])

contributors                        ? (missing)
coordinates                         ? (missing)
created_at                          str
entities                            dict
favorite_count                      int
favorited                           bool
geo                                 ? (missing)
id                                  int
id_str                              str
in_reply_to_screen_name             ? (missing)
in_reply_to_status_id               ? (missing)
in_reply_to_status_id_str           ? (missing)
in_reply_to_user_id                 ? (missing)
in_reply_to_user_id_str             ? (missing)
is_quote_status                     bool
lang                                str
metadata                            dict
place                               ? (missing)
possibly_sensitive                  bool
retweet_count                       int
retweeted                           bool
source                              str
text                                str
tr

In [10]:
import pandas

data = pandas.DataFrame(sample['statuses'])

In [11]:
data[['truncated', 'favorited', 'retweeted', 'contributors', 'retweet_count', 'favorite_count']].head()

Unnamed: 0,truncated,favorited,retweeted,contributors,retweet_count,favorite_count
0,False,False,False,,19,0
1,False,False,False,,25,0
2,False,False,False,,1,0
3,False,False,False,,33,0
4,False,False,False,,0,0


In [12]:
data[['place', 'geo', 'coordinates']].head()

Unnamed: 0,place,geo,coordinates
0,,,
1,,,
2,,,
3,,,
4,,,


### Data Stream

In [13]:
from time import time, sleep 
from tweepy import Stream
from tweepy.streaming import StreamListener


class TwitterStream(TwitterClient):

    class TweetListener(StreamListener):
        def __init__(self, broadcast, handle, count, timer):
            '''
            define broadcast and error-handle methods
            set count: how many tweets before disconnect
            set timer: seconds run before disconnect
            '''
            self.broadcast = broadcast
            self.handle = handle
            self.limit = count
            self.count = 0
            self.timer = time() + timer

        def on_data(self, data):
            self.broadcast(data)
            self.count += 1
            if self.limit and self.count > self.limit:
                return False  # disconnect: exceeded conf count
            if self.timer < time():
                return False  # disconnect: exceeded time limit
            return True

        def on_error(self, status):
            self.handle(status)
            if status == 420: # disconnect: exceeded API connection failure limit
                return False
            return True


    def stream(self,
               query, geo = False,
               broadcast = print, handle = print,
               count = False, timer = 60,
               async = True):
        '''
        stream tweets matching query with/or geo-filter
        '''
        if query == '': # get anything with geo
            geo = True
            keywords = []
        else:
            keywords = query.split(',')

        tweets = Stream(self.auth, self.TweetListener(broadcast, handle, count, timer))
        if geo:
            tweets.filter(track = keywords, locations = [-180,-90,180,90], async = async)
        else:
            tweets.filter(track = keywords, async = async)


In [14]:
import json

# save arriving tweets in variable <sample>
# disconnect when hit the required number
sample = []
def collect(data):
    data = json.loads(data)
    print('\n----------------------------------------------\n{}'.format(data['text']))
    sample.append(data)

twitter = TwitterStream()
twitter.stream('', geo = True, broadcast = collect, count = 10, async = False)


----------------------------------------------
Can you recommend anyone for this #job? barista - Store# 02303, CHAGRIN &amp; GREEN - https://t.co/Bb35XS0tis #CustomerService #Beachwood, OH

----------------------------------------------
I wish it hadn't taken me this long to realise that I made the ring decision, I would of gone through all the tough… https://t.co/4LL3kwQvI0

----------------------------------------------
أنا من حُبي لك تسألني حتى الروح هو قلبي أنا أوحضرتك راعيه" تخييييل؟.

----------------------------------------------
If I’m not at COM or at work I’m with tha homies lol busy life

----------------------------------------------
All the Police Chiefs in Anderson County in one photo to help us with the Drug Take Back day at Clinton Police Depa… https://t.co/jIHsORoYhP

----------------------------------------------
@ShowTV @GkhnAlkanOnline @kalpatisitv Bu ne bicim soru TABİKİ HAYIR DALGAMI GECİYIRSUN BİZİMLE https://t.co/Nd4xvNIuOd

------------------------------------

In [15]:
# geo = True filters tweets with nested object <place> populated
quick_look(sample[0]['place'])

attributes                          dict
bounding_box                        dict
country                             str
country_code                        str
full_name                           str
id                                  str
name                                str
place_type                          str
url                                 str

Nested objects:
	attributes
	bounding_box


In [16]:
quick_look(sample[0]['place']['bounding_box'])

coordinates                         list
type                                str


In [17]:
sample[0]['place']['bounding_box']

{'coordinates': [[[-81.532635, 41.449669],
   [-81.532635, 41.501448],
   [-81.48037, 41.501448],
   [-81.48037, 41.449669]]],
 'type': 'Polygon'}

### Processing
For a simple stream-visualization app we are going to use some [data aggregation](Aggregation.ipynb) and some [basic sentiment analysis](Sentiment.ipynb).