Permalink
Fetching contributors…
Cannot retrieve contributors at this time
executable file 389 lines (321 sloc) 12.7 KB
#!/usr/bin/env python
import humanize
import math
import datetime
import bitly_api
from optparse import OptionParser
import os, re, yaml, json
import tweepy
from random import choice
from json import load
from urllib2 import urlopen
from urllib import urlencode
from pandas import DataFrame
from requests import get
######################################
# HELPERS
######################################
T_IO = "http://treasury.io"
def load_options():
parser = OptionParser()
parser.add_option("-t", "--tweet-type", dest="tweet_type", default="total_debt",
help="write report to FILE", metavar="FILE")
(options, args) = parser.parse_args()
return options
# Helpers to humanize numbers / dates
def human_number(num):
n = humanize.intword(int(math.ceil(num))).lower()
if re.search(r"^(\d+)\.0 ([A-Za-z]+)$", n):
m = re.search(r"^(\d+)\.0 ([A-Za-z]+)$", n)
n = m.group(1) + " " + m.group(2)
return n
def human_date(date):
def style_day(n):
n = int(n)
return str(n)+("th" if 4<=n%100<=20 else {1:"st",2:"nd",3:"rd"}.get(n%10, "th"))
# apply humanization fx
h = humanize.naturalday(datetime.datetime.strptime(date, "%Y-%m-%d")).title()
# remove zeros
m0 = re.search(r"([A-Za-z]+) 0([0-9])", h)
if m0: h = "%s %s" % ( m0.group(1), m0.group(2) )
# style day
m_day = re.search(r"([A-Za-z]+) (\d+)", h)
if m_day: h = "%s %s" % ( m_day.group(1), style_day(m_day.group(2)) )
# lowercase yesterday and today
if h in ['Yesterday', 'Today']: h = h.lower()
return h
def gen_bitly_link(long_url):
try:
access_token = yaml.safe_load(open("%s/%s" % (os.getenv("HOME"), 'bitly.yml')))['access_token']
except:
return long_url
else:
if access_token is None:
return long_url
else:
btly = bitly_api.Connection(access_token = access_token)
blob = btly.shorten(long_url)
return re.sub("http://", "", str(blob['url']))
######################################
# DATA
######################################
def query(sql):
'''
Submit an `sql` query (string) to treasury.io and return a pandas DataFrame.
For example::
print('Operating cash balances for May 22, 2013')
print(treasury.io('SELECT * FROM "t1" WHERE "date" = \'2013-05-22\';'))
'''
url = 'https://premium.scraperwiki.com/cc7znvq/47d80ae900e04f2/sql/'
query_string = urlencode({'q':sql})
handle = urlopen(url + '?' + query_string)
if handle.code == 200:
d = load(handle)
return DataFrame(d)
else:
raise ValueError(handle.read())
######################################
# TWITTER
######################################
def connect_to_twitter(config = os.path.expanduser("~/.twitter.yml")):
conf = yaml.safe_load(open(config))
auth = tweepy.OAuthHandler(conf['consumer_key'], conf['consumer_secret'])
auth.set_access_token(conf['access_token'], conf['access_token_secret'])
api = tweepy.API(auth)
return api
def tweet(tweet_text_func):
'''
A decorator to make a function Tweet
Parameters
- `tweet_text_func` is a function that takes no parameters and returns a tweetable string
For example::
@tweet
def total_deposits_this_week():
# ...
@tweet
def not_an_interesting_tweet():
return 'This tweet is not data-driven.'
'''
def tweet_func():
api = connect_to_twitter()
tweet = tweet_text_func()
print "Tweeting: %s" % tweet
try:
api.update_status(tweet)
except tweepy.error.TweepError as e:
print e
pass
else:
return tweet
return tweet_func
######################################
# TWEETS
######################################
T2_ITEM_DICT = {
"Unemployment": "Unemployment",
"Education Department programs": "the Education Dept.",
"Energy Department programs": "the Energy Dept.",
"Medicaid": "Medicaid",
"Medicare": "Medicare",
"Social Security Benefits ( EFT )": "social security benefits",
"NASA programs": "NASA",
"Housing and Urban Development programs": "housing and urban development programs",
"Justice Department programs": "justice dept. programs",
"Postal Service": "the postal service",
"Defense Vendor Payments ( EFT )": "military contractors",
"Federal Employees Insurance Payments": "fed. employees ins. payments",
"Fed Highway Administration programs": "the federal hwy admin",
"Federal Salaries ( EFT )": "federal salaries",
"Food Stamps": "food stamps",
"Postal Service Money Orders and Other": "postal service money orders",
"Interest on Treasury Securities": "interest on treasury securities",
"Temporary Assistance for Needy Families ( HHS )": "Welfare",
"Veterans Affairs Programs": "veterans affairs programs",
"Air Transport Security Fees": "air transport security fees",
"Railroad Unemployment Ins": "railroad unemployement insurance",
"FSA Tobacco Assessments": "Tobacco Taxes",
"Agency for International Development": "USAID",
"Securities and Exchange Commission": "the SEC",
"Natl Railroad Retirement Inv Trust": "the Nat'l Railroad Retirement Inv. Trust",
"Federal Communications Commission": "the FCC",
"SEC: Stock Exchange Fees": "stock exchange fees",
"Environmental Protection Agency": "the EPA",
"IRS Tax Refunds Business ( EFT )": "tax refunds for businesses",
"IRS Tax Refunds Individual ( EFT )": "tax refunds for individuals",
"Military Active Duty Pay ( EFT )": "military active duty pay",
"Veterans Benefits ( EFT )": "veterans benefits",
"State Department": "the State dept.",
"Library of Congress": "the Lib. of Congress",
"Federal Trade Commission": "the FTC",
"Transportation Security Admin ( DHS )": "the TSA",
"TARP": "TARP",
"Interior": "Interior",
"USDA: Forest Service": "the forest service"
}
@tweet
def random_comparison_tweet():
df = query('''SELECT date, item, fytd, url
FROM t2
WHERE transaction_type = 'withdrawal' AND date = (SELECT max(date) FROM t2)''')
# get two random items to compare
item_1_df = df[df.item==choice([i for i in df.item if i in set(T2_ITEM_DICT.keys())])]
item_2_df = item_1_df
while item_2_df.item == item_1_df.item:
item_2_df = df[df.item==choice([i for i in df.item if i in set(T2_ITEM_DICT.keys())])]
item_1 = T2_ITEM_DICT[str([i for i in item_1_df.item][0])]
item_2 = T2_ITEM_DICT[str([i for i in item_2_df.item][0])]
# detmine diff and value
if int(item_1_df.fytd) > int(item_2_df.fytd):
per_diff = 100*(float(item_1_df.fytd) / float(item_2_df.fytd))
else:
per_diff = 100*(float(item_2_df.fytd) / float(item_1_df.fytd))
# switch item 1 and 2
item_3 = item_1
item_1 = item_2
item_2 = item_3
per = str(int(math.ceil(per_diff))) + "%"
btly = gen_bitly_link(str(df['url'][0]))
vals = (per, item_1, item_2, btly)
return "The US Gov has spent %s more on %s than on %s this fiscal year - %s" % vals
@tweet
def random_item_tweet():
df = query('''SELECT date, item, today, transaction_type, url
FROM t2
WHERE date = (SELECT max(date) FROM t2)''')
the_df = df[df.item==choice([i for i in df.item if i in set(T2_ITEM_DICT.keys())])]
# determine change
if len(the_df) == 1:
if the_df['transaction_type'] == "deposit":
change = "took in"
preposition = "from"
elif the_df['transaction_type'] == "withdrawal":
change = "spent"
preposition = "on"
val = int(the_df['today'])
else:
val = sum(the_df[the_df.transaction_type == 'deposit']['today']) - sum(the_df[the_df.transaction_type == 'withdrawal']['today'])
if val > 0:
change = "took in"
preposition = "from"
else:
change = "spent"
preposition = "on"
# gen values
btly = gen_bitly_link(df['url'][0])
the_date = human_date(df['date'][0])
if the_date in ["Yesterday", "Today"]:
intro = ""
else:
intro = "On "
the_val = human_number(abs(val*1e6))
the_item = T2_ITEM_DICT[str([i for i in the_df.item][0])]
vals = (intro, the_date, change, the_val, preposition, the_item, btly, T_IO)
return "%s%s, the US Gov %s $%s %s %s %s \r\n - %s" % vals
@tweet
def total_debt_tweet():
df = query('''SELECT date, close_today, url
FROM t3c
WHERE (item LIKE \'%subject to limit%\' AND year = 2013 AND month >=1)
ORDER BY date DESC''')
# determine length of DataFrame
end = len(df)-1
# extract current amount and amount at the beginning of the year
current_amt = df['close_today'][0]*1e6
previous_amt = df['close_today'][end]*1e6
# calculate change
delta = abs(current_amt - previous_amt)
# generate word to represnet the direction of change
if current_amt > previous_amt:
change = "increased"
elif current_amt < previous_amt:
change = "decreased"
# humanize values
# Notice the included ``human_date`` and ``human_number`` functions which simplify these values for you
btly = gen_bitly_link(df['url'][0])
current_date = human_date(df['date'][0])
amt = human_number(current_amt)
delta = human_number(delta)
previous_date = human_date(df['date'][end])
# generate tweet
vals = (current_date, amt, btly, T_IO)
return "Think you're in debt? As of %s, the US Gov is $%s in the hole! %s \r\n - %s" % vals
def dist_to_debt_ceiling_tweet():
df = query('''SELECT a.date, a.close_today AS debt_ceiling,
b.close_today AS debt_subject_to_ceiling,
a.close_today - b.close_today as distance_from_debt_ceiling
FROM t3c a
INNER JOIN t3c b ON a.date = b.date
WHERE a.item = "Statutory Debt Limit" AND b.item = "Total Public Debt Subject to Limit"
AND a.year = "2008"
ORDER BY a.date DESC
LIMIT 100
''')
@tweet
def change_in_balance_tweet():
df = query('''SELECT close_today - open_today AS change, date, weekday, url
FROM t1
WHERE account = 'Total Operating Balance'
ORDER BY date DESC
LIMIT 1''')
# calculate change
raw_amt = df['change'][0]
if raw_amt < 0:
change = "dropped"
elif raw_amt > 0:
change = "rose"
# humanize number and date
amt = human_number(abs(raw_amt)*1e6)
btly = gen_bitly_link(df['url'][0])
the_date = human_date(df['date'][0])
# generate tweet
vals = (change, amt, the_date, btly, T_IO)
return "The US Gov's total operating balance %s $%s on %s %s \r\n - %s" % vals
@tweet
def is_it_running_tweet():
def date_pair(date_date):
return {
'days': (datetime.date.today() - date_date).days,
'date': date_date,
}
def observed_data():
url = 'https://premium.scraperwiki.com/cc7znvq/47d80ae900e04f2/sql'
sql = '''SELECT MAX(date) AS max_date FROM t1;'''
r = get(url, params = {'q': sql})
date_string = json.loads(r.text)[0]['max_date']
date_date = datetime.datetime.strptime(date_string, '%Y-%m-%d').date()
return date_pair(date_date)
def expected_data():
'The date when the script should have last run'
adate = datetime.date.today()
adate -= datetime.timedelta(days=1)
while adate.weekday() >= 4: # Mon-Fri are 0-4
adate -= datetime.timedelta(days=1)
return date_pair(adate)
def gen_test_tweet():
peeps = "@brianabelson @mhkeller @jbialer @thomaslevine @bdewilde @Cezary"
current_date = datetime.datetime.now()
observed = observed_data()
expected = expected_data()
if observed['days'] > 7:
return "Yo %s! Something is probably wrong - @%s" % (peeps, current_date.date().strftime("%S"))
elif observed['days'] - expected['days'] > 3:
return "Hey %s, something might be wrong unless %s is a holiday! " % (peeps, expected['date'].strftime("%B %d"))
else:
return None
return gen_test_tweet()
######################################
# SELECTOR
######################################
if __name__ == '__main__':
options = load_options()
t = options.tweet_type
if t == 'total_debt':
total_debt_tweet()
elif t == 'change_in_balance':
change_in_balance_tweet()
elif t == 'is_it_running':
is_it_running_tweet()
elif t == 'random_item':
random_item_tweet()
elif t == 'random_comparison':
random_comparison_tweet()