In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import requests

from dataloader import COLDataLoader
from colgeocoder import COLGeoUtil
import pdb




In [10]:
class COLCalculator():
    #Cost of Living Calculator
    #Instantiate with the path to the Zillow city csvs and Consumer Expenditure xlsxs
    def __init__(self, zillow_path, ce_path, state_tax_path):
        self.data = COLDataLoader(zillow_path, ce_path, state_tax_path)
        self.data.load()
        self.gc = COLGeoUtil()
        
    def calculate(self, gross_income, city, state, married=False):
        #Calculation function: returns a dict (or json) with the following values
        #income_taxes (Tuple): (Float: Approx amount of taxes paid, Float: Approx. Effective Tax Rate)
        #housing_rent (Tuple): (Int: sqft affordable at the housing % ratio, Float: Ratio of housing/income)
        #housing_own (Tuple): (Int: sqft affordable at the housing % ratio, Float: Ratio of housing/income)
        geocode = self.gc.geocode_one([city,state])
        tax_amount, tax_rate, net_income = self._get_taxes(gross_income, city, state, married)
        ratio_dict = self._get_ratios(gross_income, geocode)
        housing_rent, housing_own = self._get_housing(gross_income, ratio_dict, geocode)

        return {'taxes': tuple((tax_amount, tax_rate, net_income)), 'geocode' : geocode,
               'ratios': ratio_dict, }
    
    # Tax Calculation  - Federal and State
    def _get_taxes(self, gross_income, city, state, married):
        #returns tax amount, tax %, and net income
        fed_tax, fed_rate = self._calc_fed_tax(gross_income, married)
        state_tax, state_rate = self._calc_state_tax(gross_income, state, married)
        total_tax = fed_tax + state_tax
        net_income = gross_income - total_tax
        effective_tax = total_tax/gross_income

        return total_tax, effective_tax, net_income


    
    def _calc_fed_tax(self, gross_income, married):
        #tax withholding from https://www.irs.gov/pub/irs-pdf/n1036.pdf
        #Returns Tax Amount and rate used to calculate withholding
        single_rate = np.array([[3800, 0, .1],
                                [13500, 970,  .12],
                                [43275, 4543,  .22],
                                [88000, 14382, .24],
                                [164525, 32748,  .32],
                                [207900, 46628, .35],
                                [514100, 153799, .37]]
                                )

        single_rate = np.hstack((single_rate, 
                                np.append(single_rate[1:,0],np.inf).reshape(-1,1)
                                ))

        married_rate = np.array([[11800, 0, .1],
                                 [31200, 1940, .12],
                                 [90750, 9086, .22],
                                 [180200, 28765, .24],
                                 [333250, 65497, .32],
                                 [420000, 93257, .35],
                                 [624150, 164710, .37]
                                ])

        married_rate = np.hstack((married_rate, 
                                np.append(married_rate[1:,0],np.inf).reshape(-1,1)
                                ))
        rate = single_rate
        if married:
            rate = married_rate

        mask = np.logical_and(rate[:,0]<=gross_income, rate[:,-1]>gross_income)

        mask = np.logical_and(rate[:,0]<=gross_income, rate[:,-1]>gross_income)
        if mask.any():
            threshold, base_tax, rate = rate[mask][0][0], rate[mask][0][1], rate[mask][0][2]
            return ((base_tax + (gross_income-threshold)*rate), rate)
        else:
            return (0, 0)

        
    def _calc_state_tax(self,gross_income, state, married):
        #Returns State Tax Amount and Rate used to calculate the amount
        income = gross_income - 12000
        if married:
            income = gross_income - 24000
        tax_rates = self.data.df_state_tax.loc[self.data.df_state_tax.stateAbbr == state]
        mask = np.logical_and(tax_rates.iloc[:,-2].values<gross_income, tax_rates.iloc[:,-1].values>=gross_income)
        rate = tax_rates.loc[mask,'incomeTaxRate'].values[0]
        return (income*rate, rate)
    
    def _get_ratios(self, gross_income, geocode):
        #get closest city
        ce_idx = self.gc.get_closest_index(self.data.ce_geocodes, geocode)
        df = self.data.df_ce.iloc[:,ce_idx]
        df.dropna(axis=0)
        
        #Make % ratio of gross income spent on living items
        ratios = df / df['Income before taxes']
        
        #Hard-code list of spending items (in order of importance) that should total 
        items_expenditure = ['Housing', 'Food', 'Transportation', 'Healthcare',
                             'Apparel and services', 'Entertainment', 'Personal care products and services',
                             'Reading', 'Education', 'Tobacco products and smooking supplies', 'Miscellaneous',
                             'Cash contributions', 'Personal insurance and pensions']
        quality_of_life = ['Apparel and services', 'Entertainment', 'Personal care products and services',
                             'Reading', 'Education', 'Tobacco products and smooking supplies', 'Miscellaneous',
                             'Cash contributions', 'Personal insurance and pensions']
        
        out_dict = {}
        for key in items_expenditure:
            ratio = ratios[key]
            value = ratio*gross_income
            out_dict[key] = tuple((value, ratio))
        return out_dict
    
    def _get_housing(self, gross_income, ratio_dict, geocode):
        #Returns two tuples
        #housing_own (Tuple): (Int: sqft affordable at the housing % ratio, Float: $/sqft, max # people)
        #housing_rent (Tuple): (Int: sqft affordable at the housing % ratio, Float: $/sqft, max # people)
        
        city_own_idx = self.gc.get_closest_index(self.data.df_zil_own.lat_lng.values, geocode)
        city_rent_idx = self.gc.get_closest_index(self.data.df_zil_rent.lat_lng.values, geocode)
        
        own_cost_per_sqft = self.data.df_zil_own.iloc[city_own_idx, -1]
        rent_cost_per_sqft = self.data.df_zil_rent.iloc[city_rent_idx, -1]
        
        #Need a mortgage calculator
        
        #Calc rent sqft
        

In [11]:
base_ce_path = '../data/bls_ce/msa'
base_zil_path = '../data/zillow/city'
base_tax_path = '../data/state_tax'

calculator = COLCalculator(base_zil_path,base_ce_path,base_tax_path)

In [12]:
calculator.calculate(100000, 'San Francisco', 'CA')

getting place id for San Francisco, CA
getting geocode for ChIJIQBpAG2ahYAR_6128GcTUEo


{'taxes': (25446.0, 0.25446, 74554.0),
 'geocode': array([  37.7749295, -122.4194155])}

In [14]:
np.vstack(calculator.data.ce_geocodes)

array([[  41.8781136,  -87.6297982],
       [  42.331427 ,  -83.0457538],
       [  44.977753 ,  -93.2650108],
       [  38.6270025,  -90.1994042],
       [  38.9071923,  -77.0368707],
       [  39.2903848,  -76.6121893],
       [  33.7489954,  -84.3879824],
       [  25.7616798,  -80.1917902],
       [  32.7554883,  -97.3307658],
       [  29.7604267,  -95.3698028],
       [  27.950575 ,  -82.4571776],
       [  40.7127753,  -74.0059728],
       [  39.9525839,  -75.1652215],
       [  42.3600825,  -71.0588801],
       [  34.0522342, -118.2436849],
       [  37.7749295, -122.4194155],
       [  32.715738 , -117.1610838],
       [  47.6062095, -122.3320708],
       [  33.4483771, -112.0740373],
       [  39.7392358, -104.990251 ],
       [  21.3069444, -157.8583333],
       [  61.2180556, -149.9002778]])

In [34]:
df_ce = calculator.data.df_ce

In [35]:
df = df_ce.iloc[:,1].dropna(axis=0)

In [36]:
df.loc['Income before taxes']

80928.0

In [42]:
ratios = df / df['Income before taxes']

In [43]:
ratios

Adults 65 and older                        0.000005
Age of reference person                    0.000654
Alcoholic beverages                        0.008131
Apparel and services                       0.025974
Average annual expenditures                0.743661
Cash contributions                         0.018856
Cereals and bakery products                0.006957
Children under 18                          0.000006
Dairy products                             0.004844
Earners                                    0.000016
Education                                  0.018918
Entertainment                              0.037095
Food                                       0.091402
Food at home                               0.050823
Food away from home                        0.040579
Fruits and vegetables                      0.009997
Gasoline, other fuels, and motor oil       0.024009
Healthcare                                 0.062759
Household furnishings and equipment        0.024787
Household op

AttributeError: 'Series' object has no attribute 'info'