In [None]:
#Adapted from Marc Joffe, 2016 

In [2]:
import os
import sys
import json
from backend import *

from datetime import date
import dateutil.parser as date_parser

from itertools import chain

In [3]:
tables_path = "./static/ug/muni_bonds/ER588705-ER457598-ER860368.pdf.txt.tables.json"
with codecs.open(tables_path, "r", "utf-8") as file:
    tables = json.load(file)

funds_table = tables['572']
schedule_table = tables['606']
#print json.dumps(schedule_table)

#subtypes = Counter(c[0]['subtype'] for c in schedule_table['data'] if c[0]['subtype'] != "none") 
#print subtypes
print schedule_table['subtypes']



[u'date', u'dollar', u'dollar', u'dollar', u'dollar', u'dollar']


In [4]:
def get_fuzzy_date(string):
    today = date.today()
    v_ascii = string.encode("ascii", errors="ignore")
    try: 
        dt = date_parser.parse(v_ascii, fuzzy=True, default=today)
        if dt != today:
            return dt
    except:
        return None
    
def get_first_date(lines, query_string, threshold = 0.4):
    for i, l in enumerate(lines):
        if fuzzy_str_match(query_string, l) > threshold: 
            dt = get_fuzzy_date(l)
            if dt:
                return dt, i, l
                   
def find_row(table, query_string):
    #Find first 'other' typed row
    try:
        index = table['types'].index('other')
    except ValueError:
        print "no column with mainly string data found"
        return None
    
    strings = (s[index]['value'] for s in table['data'])
    scores_indices = ((val, idx) for (idx, val) in enumerate(fuzzy_str_match(query_string, s) for s in strings ) )
    
    return table['data'][max(scores_indices)[1]]

def find_column(table, query_string, types=None, subtypes=None, threshold = 0.4):
    #Find first column with specific types
    
    columns = []
    for i, t in enumerate(zip(table['types'], table['subtypes'])):
        t, st = t[0], t[1]
        if t in (types or t) and st in (subtypes or st):
            if fuzzy_str_match(query_string, table['captions'][i]) > threshold: return i


def closest_row_numeric_value(table, query_string):
    row = find_row(table, query_string)
    if row:
        for c in row:
            if c['type'] in ('integer'): 
                return int(c['value'])
            elif c['type'] in ('large_num', 'small_float'):
                return float(c['value'].replace(",", ""))

In [5]:
def filter_time_series(table, query_string, subtypes = ['dollar'], treshold = 0.4):
    time_index = find_column(table, "", subtypes=['date'])
    value_index = find_column(schedule_table, query_string, subtypes = subtypes)

    for r in table['data']:
        dt = get_fuzzy_date(r[time_index]['value'])
        if dt:
            c = r[value_index]
            v = None
            if c['type'] in ('integer'): 
                v = int(c['value'])
            elif c['type'] in ('large_num', 'small_float'):
                v = float(c['value'].replace(",", ""))
            if v: yield dt, v

In [11]:
def get_key_values(table, key_queries):
    return { k : closest_row_numeric_value(table, kk) for k, kk in key_queries.iteritems() }

first_cf_dict = {'face_value' : 'Principal Amount', 'premium_or_discount' : 'Issue Premium',
                'underwriter_discount' : 'Underwriter Discount', 'cost_of_issuance' : 'Costs of Issuance'}

def calc_net_proceeds(table):
    v = get_key_values(table, first_cf_dict)
    net_proceeds_calc = v['face_value'] + v['premium_or_discount'] - v['underwriter_discount'] - v['cost_of_issuance']   
    return net_proceeds_calc

In [28]:
debug_each_guess = True  # Change to True for verbose output
guess = .05
guess_num = 1
guesses = []


def newton(func, x0, fprime=None, args=(), tol=1.48e-8, maxiter=50):
    """Given a function of a single variable and a starting point,
    find a nearby zero using Newton-Raphson.

    fprime is the derivative of the function.  If not given, the
    Secant method is used.

    # Source: http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.newton.html
    # File:   scipy.optimize.minpack.py
    # License: BSD: http://www.scipy.org/License_Compatibility
    """

    if fprime is not None:
        p0 = x0
        for iter in range(maxiter):
            myargs = (p0,)+args
            fval = func(*myargs)
            fpval = fprime(*myargs)
            if fpval == 0:
                print "Warning: zero-derivative encountered."
                return p0
            p = p0 - func(*myargs)/fprime(*myargs)
            if abs(p-p0) < tol:
                return p
            p0 = p
    else: # Secant method
        p0 = x0
        p1 = x0*(1+1e-4)
        q0 = func(*((p0,)+args))
        q1 = func(*((p1,)+args))
        for iter in range(maxiter):
            if q1 == q0:
                if p1 != p0:
                    print "Tolerance of %s reached" % (p1-p0)
                return (p1+p0)/2.0
            else:
                p = p1 - q1*(p1-p0)/(q1-q0)
            if abs(p-p1) < tol:
                return p
            p0 = p1
            q0 = q1
            p1 = p
            q1 = func(*((p1,)+args))
    raise RuntimeError, "Failed to converge after %d iterations, value is %s" % (maxiter,p)


def eir_func(rate, pmts, dates):
    """Loop through the dates and calculate a discounted cashflow total

    This is a simple process, but the debug messages clutter it up to
    make it seem more complex than it is.  With the debug messages removed,
    it is very similar to eir_derivative_func, but with the EIR formula,
    rather than f'rate.

    Credit: http://mail.scipy.org/pipermail/numpy-discussion/2009-May/042736.html
    """

    print_debug_messages = False
    
    # Globals used for debug printing
    global guess_num
    global debug_each_guess
    global guesses
    
    if rate not in guesses:
        print_debug_messages = debug_each_guess
        guesses.append(rate)
        if print_debug_messages:
            print "-----------------------------------------------------------------------------------------------"
            print "Guess #%s:  %s" % (guess_num, rate)
            print ""
            print "   # DATE          # DAYS  CASHFLOW      DISCOUNTED    Formula: cf * (rate + 1)^(-days/365)"
            print "   --------------------------------------------------------------------------------------------"
        guess_num +=1

    dcf=[]
    for i, cf in enumerate(pmts):
        d = dates[i] - dates[0]
        discounted_period = cf * (rate + 1)**(-d.days / 365.)
        dcf.append( discounted_period )

        if print_debug_messages:
            cf = "%.2f" % cf
            cf = cf.rjust(9, " ")
            discounted_period = '%.8f' % discounted_period
            formula = '%s * ((%0.10f + 1)^(-%d /365)) ' % (cf, rate, d.days)
            discounted_period = discounted_period.rjust(15, " ")
            print "  %2i %s  %3.0d days %s %s =%s"  % \
            (i, dates[i], d.days, cf, discounted_period, formula )

    discounted_cashflow = sum(dcf)

    if print_debug_messages:
        discounted_cashflow = "%.8f" % discounted_cashflow
        total = "total:".rjust(35, " ")
        print "%s %s" % (total, discounted_cashflow.rjust(15, " "))
        print ""

    return discounted_cashflow

def eir_derivative_func(rate, pmts, dates):
    """Find the derivative or the EIR function, used for calculating
    Newton's method:

    http://en.wikipedia.org/wiki/Newton's_method

    EIR = cf*(1+rate)^d
    f'rate = cf*d*(rate+1)^(d-1)

    Credit: http://mail.scipy.org/pipermail/numpy-discussion/2009-May/042736.html
    """

    dcf=[]
    for i, cf in enumerate(pmts):
        d = dates[i] - dates[0]
        n = (-d.days / 365.)
        dcf.append( cf * n * (rate + 1)**(n - 1) )
    return sum(dcf)

In [29]:
txt_path = "./static/ug/muni_bonds/ER588705-ER457598-ER860368.pdf.txt"
with codecs.open(txt_path, 'r', 'utf-8', errors='replace') as file:
    due_date, date_linenr, line_str = get_first_date(file, 'deliver') 

net_proceeds = calc_net_proceeds(funds_table)


In [33]:
cnt = 0 
rate = None
dates = {}
payments = []


cf_time = chain( ((due_date, net_proceeds),) , 
                ((d, -v) for d,v in filter_time_series(schedule_table, "Debt Service")))

# Convert our sequence of dates and cashflows into random access iterables
for i, cf_dt in enumerate(cf_time):
    date, cf = cf_dt[0], cf_dt[1]
    dates[i]=date
    payments.append(cf)


import time
# Begin Main Calculation
timer_start = time.clock()
if len(dates) > 1:
    f = lambda x: eir_func(x, payments, dates)
    derivative = lambda x: eir_derivative_func(x, payments, dates)
    try:
        rate = newton(f, guess, fprime=derivative, args=(),
            tol=0.00000000001, maxiter=100)
    except RuntimeError:
        pass # failed to converge after maxiterations

timer_end = time.clock()
# End Main Calculation

elapsed_time = timer_end - timer_start
final_rate = rate * 100

if not debug_each_guess:
    print ""
    print "Cashflow Dates: "
    print "-------------------------"
    for i, dte in enumerate(dates.values()):
        print "%s %s " % (dte, str(payments[i]).rjust(5," ") )

print """
Guesses Summary
------------------"""

for i, guess in enumerate(guesses):
  print i +1, "%0.10f" % guess

print """
Final Rate: %.5f %%
""" % final_rate

print """Calculation time: %s seconds
""" % elapsed_time

-----------------------------------------------------------------------------------------------
Guess #2:  0.0518759744569

   # DATE          # DAYS  CASHFLOW      DISCOUNTED    Formula: cf * (rate + 1)^(-days/365)
   --------------------------------------------------------------------------------------------
   0 2012-04-26      days 2157204.11 2157204.11000000 =2157204.11 * ((0.0518759745 + 1)^(-0 /365)) 
   1 2013-02-01  281 days -57033.85 -54855.85714578 =-57033.85 * ((0.0518759745 + 1)^(-281 /365)) 
   2 2013-08-01  462 days -37331.25 -35016.34768817 =-37331.25 * ((0.0518759745 + 1)^(-462 /365)) 
   3 2014-02-01  646 days -37331.25 -34134.87468580 =-37331.25 * ((0.0518759745 + 1)^(-646 /365)) 
   4 2014-08-01  827 days -37331.25 -33289.42626174 =-37331.25 * ((0.0518759745 + 1)^(-827 /365)) 
   5 2015-02-01  1011 days -37331.25 -32451.42537211 =-37331.25 * ((0.0518759745 + 1)^(-1011 /365)) 
   6 2015-08-01  1192 days -37331.25 -31647.67241587 =-37331.25 * ((0.0518759745 + 1)^(-119