In [1]:
from IPython.display import Markdown, display

display(Markdown("checkbook.md"))

## ArlingtonMA [Open Checkbook](https://www.arlingtonma.gov/departments/finance/financial-budget-information/open-checkbook)

Manual downloads by funding type.

Extract could be automated for periodic updates

## Set-up

In [None]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv (
        find_dotenv (
            usecwd=True
        ),
    override=True
) # read local .env file and override any existing

from sqlalchemy import create_engine
from os import environ

username     =  environ.get("POSTGRES_USERNAME", "postgres")
password     =  environ.get("POSTGRES_PASSWORD", "postgres")
ipaddress    =  environ.get("POSTGRES_IPADDRESS", "localhost")
port         =  environ.get("POSTGRES_PORT", "5432")
dbname       =  environ.get("POSTGRES_DBNAME", "ArlingtonMA")

#establish database connection for Transform queries and Loads
cnx= create_engine(f'postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}')


## Extract

In [None]:
import pandas as pd
from glob import glob

##gzip downloads from source
data_dir = 'checkbook/'

df = pd.DataFrame()
for filename in sorted(glob(data_dir+'*')):
    tmp = pd.read_csv(filename,sep='\t')
    tmp['year'] = filename[-8:-4]
    df = pd.concat([df,tmp])

## Transform

In [None]:
int_value_pairs = pd.DataFrame()

for cols in [
    ['department_category_id','department_category_name'],
    ['description_id','description_name'],
    ['department_id','department_name'],
    ['vendor_id','vendor_name'],
    ['fund_type_id','fund_type_name']

]:

    ivp = df[cols].groupby(cols).count().reset_index().rename(columns=dict(zip(cols,['key','value'])))
    ivp['item']='checkbook_'+cols[0].replace('_id','')
    int_value_pairs = pd.concat([int_value_pairs,ivp])

int_value_pairs


cols = ['year','date','check_number','amount',
        'vendor_id','description_id',
        'fund_type_id','department_category_id','department_id',
        ]
checkbook = df[cols].sort_values(['date','check_number']).reset_index(drop=True)
checkbook.columns=checkbook.columns.str.replace('_id','')
checkbook.columns=list(checkbook.columns[0:4])+list('checkbook_'+checkbook.columns[4:])

checkbook=checkbook[~checkbook.duplicated(keep=False)]
checkbook['dor']=10
 

## Load

In [None]:
table_create_checkbook_query = \
"""
        DROP TABLE IF EXISTS governance.checkbook;
        CREATE TABLE governance.checkbook (
            "dor" SMALLINT,
            "year" SMALLINT,
            "date" DATE,
            "check_number" INT,
            "amount" REAL,
            "checkbook_vendor" SMALLINT ,
            "checkbook_description" SMALLINT ,
            "checkbook_fund_type" SMALLINT ,
            "checkbook_department_category" SMALLINT ,
            "checkbook_department" SMALLINT 
        );
        
    CREATE INDEX governance_checkbook_idx 
        ON governance.checkbook(date);
    CREATE INDEX checkbook_department_idx 
        ON governance.checkbook(checkbook_department);
    """
cnx.execute(table_create_checkbook_query)

In [None]:
checkbook.to_sql(
    'checkbook',schema='governance',con=cnx,
    if_exists='append',index=False
)

int_value_pairs.to_sql(
    'int_value_pairs',schema='common',con=cnx,
    if_exists='append',index=False
)