In [1]:
import json
import datetime
import cPickle as pickle

import numpy as np

from astropy.time import Time
from astropy import units as u

# Accessing twitter/downloading tweets 

Note that Twitter's search API has a limited time it stores tweets, so you'll probably want to used the saved version if you're actually looking for the data on LGAstat during the conference.

In [2]:
import tweepy
tweepy.__version__

'3.3.0'

In [None]:
def search_all(query, api, maxcount):
    """
    Get up to `maxcount` tweets from the search query `query`, using the `api` object.
    """
    res = []
    lastid=None
    while len(res) < maxcount:
        if lastid is None:
            s = api.search(query, count=100)
        else:
            s = api.search(query, count=100, max_id=lastid-1)
        if len(s)<1:
            print('finished search')
            break
        res.extend(s)
        lastid = s[-1].id
        print('with',len(res),'we go back to',s[-1].created_at)
    return res
def write_tweets(tws, fname):
    """
    Write out the tweets to JSON and pickle files
    """
    with open(fname+'.json','w') as f:
        json.dump([si._json for si in tws], f)
    with open(fname+'.pickle','w') as f:
        pickle.dump(tws, f, -1)

You'll need to creat a twitter app, generate an access token, and then set the `consumer_key`, `consumer_secret`, `acess_token`, and `access_token_secret` variables.

In [None]:
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(acess_token, access_token_secret)
api = tweepy.API(auth)

In [None]:
tweets = search_all('#LGAstat',api,10000)

In [None]:
dates = [e.created_at for e in tweets]
jds = Time(dates).jd

In [None]:
#this cell writes them out to local files
write_tweets(tweets, 'tweets')
np.save('tweets_jd', jds)

# Or load from local file 

loading the tweets isn't necessary if you only care about the timestamps, so you can skip to the last cell of this section if that's the case

In [None]:
import tweepy  # must be installed to use the pickled tweepy objects

with open('tweetstothnight.pickle') as f:
    tweets = pickle.load(f)
dates = [e.created_at for e in tweets]
jds = Time(dates).jd

In [None]:
with open('tweetstothnight.json') as f:
    tweetsjson = json.load(f)
dates = [datetime.datetime.strptime(e['created_at'].replace('+0000 ','') , '%a %b %d %H:%M:%S %Y') for e in tweetsjson]
jds = Time(dates).jd

In [3]:
jds = np.load('tweets_jd_tothnight.npy')

# Plots 

In [4]:
%matplotlib inline
import mpld3
from matplotlib import pyplot as plt

In [5]:
startjd = Time(datetime.datetime(2015, 6, 1, 13)).jd #9am eastern, June 1st
djds = jds - startjd

In [6]:
plt.step(np.sort(djds), np.arange(len(djds)))
plt.xlabel('jd - LGAjd')
plt.ylabel('Cumulative tweets')
mpld3.save_json(plt.gcf(), 'tweet_cumulative_jds.json')
mpld3.display()

In [7]:
width = (15*u.min).to(u.day)
hwval = width.value/2
rates = []

xs = np.linspace(-4,3.5,4000)
for dj in xs:
    lower = dj - hwval
    upper = dj + hwval
    rates.append(np.sum((lower<djds) & (djds<upper)))
    
plt.plot(xs, rates)
plt.xlabel('jd - LGAjd')
plt.ylabel('Tweets per {0}'.format(width.to(u.min)))
mpld3.save_json(plt.gcf(), 'tweetrates_jds.json')
mpld3.display()