## xml_recapitulation

E. Quinn 7/3/2021

Read the XML recapitulation summary

To do:

Get 2012 photo and update xml file

## Import standard python datascience packages

In [24]:
import sys
import math
import re
import copy
import numpy as np
import scipy as sc
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
%matplotlib inline

In [25]:
from datetime import datetime, timedelta, date
from datascience import *
import uuid
import random

### Show the directory we are running in

In [26]:
!pwd

/home/gquinn/EG/RI_Div_of_Municipal_Finance/notebooks


In [27]:
# importing element tree
import xml.etree.ElementTree as ET 

# Pass the path of the xml document 
tree = ET.parse('../recapitulation.xml') 

# get the parent tag 
root = tree.getroot() 

# print the root (parent) tag along with its memory location 
print(root) 

<Element 'recapitulations' at 0x7f777b7aaea0>


### Decode recapitulation xml file

In [28]:
def get_amt(elem):
    amt = 0.0
    lst = list(elem)
    if (len(lst) > 0):
        for e in lst:
            amt += float(e.text)
    else:
        try:
            amt += float(elem.text)
        except ValueError:
            print('get_amt ValueError ',elem,elem.text)
    return(amt)

In [29]:
def get_element(year,cat,elem,key,dd):
    elm = None
    if(elem.tag==key):
        if cat not in dd[year].keys():
            dd[year][cat]={}
        if key not in dd[year][cat].keys():
            dd[year][cat][key] = {}
        if 'element' not in dd[year][cat][key].keys():
            dd[year][cat][key]['element'] = {}
        dd[year][cat][key]['element'] = elem
        return
    else:
        elist = list(elem)
        if (len(elist)!=0):
            for el in elist:
                elm = get_element(year,cat,el,key,dd)
    return

In [30]:
rcd = {}

for recap in root:
    year = int(recap.find('year').text)                       #'year' is always present
    if year not in rcd.keys():
        rcd[year] = {}                                        #year is high level index into dictionary
    valuation_date = recap.find('valuation_date').text        #'valuation_date' is always present
    rcd[year]['valuation_date'] = valuation_date
    cats=['valuation','tax']
    sums = ['real_estate','personal_property','motor_vehicle','commercial_industrial']
    for cat in cats:
        e = recap.find(cat)
        for s in sums:
            elm = get_element(year,cat,e,s,rcd)
            
for year in rcd.keys():
    for cat in cats:
        for s in sums:
            try:
                elem = rcd[year][cat][s]['element']
                amt = get_amt(elem)
                rcd[year][cat][s]['amt'] = amt
            except KeyError:
                continue
                
rcd

{1919: {'valuation_date': '9/19/1919',
  'valuation': {'real_estate': {'element': <Element 'real_estate' at 0x7f777b70b4a0>,
    'amt': 2137200.0},
   'personal_property': {'element': <Element 'personal_property' at 0x7f777b70b270>,
    'amt': 1057850.0}},
  'tax': {'real_estate': {'element': <Element 'real_estate' at 0x7f777b70b310>,
    'amt': 38469.6},
   'personal_property': {'element': <Element 'personal_property' at 0x7f777b70b4f0>,
    'amt': 10999.7}}},
 1920: {'valuation_date': '9/20/1920',
  'valuation': {'real_estate': {'element': <Element 'real_estate' at 0x7f777b70b090>,
    'amt': 2160550.0},
   'personal_property': {'element': <Element 'personal_property' at 0x7f777bf8ab30>,
    'amt': 1132210.0}},
  'tax': {'real_estate': {'element': <Element 'real_estate' at 0x7f777bf8acc0>,
    'amt': 38889.9},
   'personal_property': {'element': <Element 'personal_property' at 0x7f777b70d900>,
    'amt': 11831.38}}},
 1921: {'valuation_date': '6/15/1921',
  'valuation': {'real_estate

In [31]:
cols = {1:sums[0],2:sums[1],3:sums[2],4:sums[3]}
catcols = {1:cats[0],2:cats[1]}

current_date = date.today()
fname = '../EG_valuation_and_tax_history_' + str(current_date.month) + '_' + \
    str(current_date.day) + '_' + str(current_date.year) + '.csv'

file1 = open(fname,'w')

hstr = '"Year","Valuation Date","Real Estate Valuation","Real Estate Tax","Personal Property Valuation"'\
    + ',"Personal Property Tax","Motor Vehicle Valuation","Motor Vehicle Tax"'\
    + ',"Commercial Valuation","Commercial Tax"\n'
file1.write(hstr)

for year in sorted(rcd.keys()):
    
    hstr = '"' + str(year) + '","' + rcd[year]['valuation_date'] + '"'
    for s in sorted(cols.keys()):
        for cat in sorted(catcols.keys()):
            try:
                amt = str(round(rcd[year][catcols[cat]][cols[s]]['amt'],2))
            except KeyError:
                amt = ''
            hstr += ',' + amt
    file1.write(hstr + '\n')
    
file1.close()