### Instagram

In [None]:
%pylab inline

client_id = ''
client_secret = ''

from instagram.client import InstagramAPI
api = InstagramAPI(client_id=client_id, client_secret=client_secret)

In [None]:
tag = 'NationalKaleDay'
print api.tag(tag).media_count

# get ig media based on tag

max_tag_id = 0
media_per_query = 33
MAX_ITEMS = 10000

all_media, next_ = api.tag_recent_media(media_per_query, max_tag_id, tag)

while next_:
    more_media, next_ = api.tag_recent_media(count=media_per_query, tag_name=tag, with_next_url=next_)
    all_media.extend(more_media)
        
    #print len(all_media)
    if len(all_media)>MAX_ITEMS:
        break

In [None]:
# create a dataframe and add the Instagram data we just collected
import pandas as pd

df = pd.DataFrame({
        'times':[m.created_time for m in all_media], 
        'users':[m.user.username for m in all_media]
    })

In [None]:
# since we need hourly counts, let's create a function that sets the second/minute value to zero
def make_date(d):
    d = d.replace(second=0, minute=0)
    return d

df['dt']=df[['times']].apply(lambda x: make_date(x['times']), axis=1)
ig_df = df.set_index('dt')

In [None]:
# now we have everything we need in our dataframe
ig_df.head()

In [None]:
# let's plot it over time, group by hour, which is effectively our 'dt' column (or the index in this case)

df.groupby(df.dt).size().plot(figsize=(20,6))
title('#%s - IG posts over time' % tag)

In [None]:
# slice only the past week -> since Twitter only lets us search back for a week
ig_df.groupby(ig_df.index).size()['2015-10-01':].plot(figsize=(20,6))

### Twitter

In [None]:
# pip install twitter
import twitter

# Twitter API keys go here
CONSUMER_KEY = ''
CONSUMER_SECRET = ''

OAUTH_TOKEN = ''
OAUTH_TOKEN_SECRET = ''


auth = twitter.oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
                           CONSUMER_KEY, CONSUMER_SECRET)

twitter_api = twitter.Twitter(auth=auth)

In [None]:
count = 100

# here's how we make a basic call to twitter search -> given a query that's a hashtag
search_results = twitter_api.search.tweets(q='#'+tag, count=count)

In [None]:
# the results have a metadata portion which gives us all the information we need for further pagination 
search_results['search_metadata']

In [None]:
# we need to parse the 'max_id' value from the 'next_results' item (in order to paginate through the next results)

def parse_twitter_next(_metadata):
    try:
        params = {a:b for a,b in [x.split('=') for x in _metadata['next_results'][1:].split('&')]}
        return int(params['max_id'])
    except:
        return None    

In [None]:
# our newly created function works!! (extracts the value of the max_id variable in the 'next_results' url)
parse_twitter_next(search_results['search_metadata'])

In [None]:
# paginate through Twitter results -> GET ALL THE DATA!
num_iterations = 30

search_results = twitter_api.search.tweets(q='#'+tag, count=count)
statuses = search_results['statuses']

for i in range(num_iterations):
    max_id = parse_twitter_next(search_results['search_metadata'])   
    search_results = twitter_api.search.tweets(q='#'+tag, count=count, max_id=max_id)
    statuses += search_results['statuses']

print len(statuses)

In [None]:
# create a DataFrame to hold the tweets
tw_df = pd.DataFrame({
        'times':[s['created_at'] for s in statuses], 
        'users':[s['user']['screen_name'] for s in statuses]
    })

In [None]:
tw_df.head()

In [None]:
# Here its a bit more complicated - we need to parse the date string into a datetime object, which we can then
# use to set our second/minute value to zero (just like we did above for instagram)

from datetime import datetime

def make_tw_date(dt_str):
    d = datetime.strptime(dt_str, '%a %b %d %H:%M:%S +0000 %Y')
    d = d.replace(second=0, minute=0)
    return d

tw_df['dt']=tw_df[['times']].apply(lambda x: make_tw_date(x['times']), axis=1)
tw_df = tw_df.set_index('dt')

### Merge Two DataFrames

In [None]:
tw_df.groupby(tw_df.index).size()['2015-10-07':].plot(figsize=(20,6), label='tweets')
ig_df.groupby(ig_df.index).size()['2015-10-07':].plot(figsize=(20,6), label='instagram')
legend()
title('#%s - tweets and instagram posts' % tag)
xlabel('hourly counts')
ylabel('number of posts')