## Build earnings reports (Bayesian OO version)

E. Quinn 1/16/2021

Build people data structure from PDF earnings reports

## Import standard python datascience packages

In [1]:
import sys
import math
import re
import copy as cp
import numpy as np
import scipy as sc
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
%matplotlib inline

In [2]:
from datetime import datetime, timedelta, date
from datascience import *
import uuid
import random

In [3]:
sys.path.append("/home/gquinn/EG/school_committee/finance_subcommittee/py_egsc")

### Import custom SC classes - Bayesian versions

In [4]:
from person import Person
from ucoa_labels import UCOA_labels
from eg_acct_codes import EG_acct_codes
from pay_check import Pay_check, Check_lineitem
from roles_Bayes import *                          #import Bayesian roles.py
from scenario import Scenario
from salaries import *
from payperiod_Bayes import Payperiod

### Show the directory we are running in

In [5]:
!pwd

/home/gquinn/EG/school_committee/finance_subcommittee/notebooks


### Load RIDE UCOA labels 

In [6]:
ucoa_labels = UCOA_labels()
#help(ucoa_labels)

### EG accounting codes class

provides descriptions for EG accounting codes and mapping to UCOA codes

In [7]:
eg_acct_codes = EG_acct_codes()
#help(eg_acct_codes)

### Read PDF structure from pickle

In [8]:
#Load data (deserialize)
with open('../../finance_subcommittee/ll_1_1_2021.pkl', 'rb') as handle:
    ll = pickle.load(handle)
    
len(ll)

6

### Build a dictionary of people

In [9]:
def build_people(ll):
    people = {}
    for year in ll.keys():
        for page in ll[year].keys():
            for tb in ll[year][page].keys():
                names         = ll[year][page][tb]['names']
                positions     = ll[year][page][tb]['positions']
                for i in np.arange(len(names)):
                    name            = names[i]
                    if (name not in people.keys()):
                        people[name] = Person(name)
    return(people)

people = build_people(ll)
len(people)

898

### Add payperiods, checks, and lineitems

In [10]:
def add_roles(ll,people):
    for year in ll.keys():
        for page in ll[year].keys():
            for tb in ll[year][page].keys():
                check_numbers = ll[year][page][tb]['checks']
                names         = ll[year][page][tb]['names']
                check_dates   = ll[year][page][tb]['dates']
                fund          = ll[year][page][tb]['fund']
                acct          = ll[year][page][tb]['acct']
                obj           = ll[year][page][tb]['obj']
                positions     = ll[year][page][tb]['positions']
                rates         = ll[year][page][tb]['rates']
                earnings      = ll[year][page][tb]['earnings']
                obj_desc      = ucoa_labels.get_label('Obj',obj)
                acct_desc     = eg_acct_codes.get_eg_acct_desc(acct)
                acct_UCOA     = eg_acct_codes.get_eg_acct_UCOA(acct)
                ftekey        = acct+obj
            
                for i in np.arange(len(names)):             #loop through columns
                    name            = names[i]              #name
                    date_str        = check_dates[i]        #check date
                    check_number    = check_numbers[i]      #check number
                    position        = positions[i]          #position determines role
                    rate            = rates[i]              #rate
                    earned          = earnings[i]           #earnings
                
                    person          = people[name]
                    roles           = people[name].get_roles() #get roles we know about
                    salary_matrix   = {}
                    
                    if position not in roles.keys():           #if this position is new
                        if (position == 'TEACHER'):            #create a role object
                            role = Teacher(person,position)
                        elif (position in ['PARAPROFES','SPED PARAS']):
                            role = Para(person,position)       #role is Para
                            #role.set_empirical_priors(empirical_probabilities)
                        elif ('T/S' in position):
                            role = Substitute_teacher(person,position)  #Teacher sub
                        elif ('P/S' in position):
                            role = Substitute_para(person,position)     #Para sub
                        elif (position in ['YR SECRETA','SECRETARY']):
                            role = Office(person,position)              #Office roles
                        elif (position in ['CUSTODIAN','MAINTENANC','ELECTRICAN', \
                            'CUST PT','DIR MAINT','FACILITY DR','CUST PT']):
                            role = Facilities(person,position)         #Facilities roles
                            #role.set_empirical_priors(empirical_probabilities)
                        elif (position == 'COACH'):            
                            role = Coach(person,position)           #Coach
                        else:
                            role = Role(person,position)       #Role (generic role)
                        people[name].add_role(position,role)   #add new role to person
                    else:
                        role = roles[position]
                        
                    date_str        = check_dates[i]       #convert date string           
                    words = date_str.split('/')
                    check_date   = date(int(words[2]),int(words[0]),int(words[1])) 
                    
                    pperiod = Payperiod(role,check_date)     #get payperiod instance
                    schyr = pperiod.get_school_year()        #to find school year
                    syseq = pperiod.get_school_year_seq()    #to find sequence #
                    
                    if not role.has_payperiod(schyr,syseq):  #if payperiod doesn't exist
                        role.add_payperiod_by_index(schyr,syseq,pperiod)  #use this one
                    else:                                          #otherwise
                        pperiod = role.get_payperiod(schyr,syseq)  #use the old one

                    chk = pperiod.get_check(check_number)      #see if check exists
                    if chk is None:                            #          if not,
                        chk = Pay_check(pperiod,\
                                check_number,name,check_date)  #  create it,
                        pperiod.add_check(check_number,chk)    # and add it to payperiod                        

                    #create lineitem for this line of the earnings report
                    
                    litem =  Check_lineitem(chk,fund,acct,obj,\
                        position,rate,earned,acct_desc,obj_desc)
                    litem_no = chk.add_item(litem)             #add it to the check                    
    return  

########################################################################################

add_roles(ll,people)
len(people)

898

### Build chronological payperiod chain and priors

In [11]:
for name in people.keys():
    roles = people[name].get_roles()
    for role_name in roles.keys():
        role = roles[role_name]
        pps = role.get_payperiods()
        role.first_payperiod = None
        for syear in sorted(pps.keys()):
            for syseq in sorted(pps[syear].keys()):
                pp = pps[syear][syseq]
                if (role.first_payperiod is None):
                    role.first_payperiod = pp
                    role.first_school_year = pp.get_school_year()
                    role.first_school_year_seq = pp.get_school_year_seq()
                prevpp = pp.get_prev_payperiod()
                if (prevpp is not None):
                    pp.priors = cp.deepcopy(prevpp.get_priors())
                    pp.fte_priors = cp.deepcopy(prevpp.get_fte_priors())
                else:
                    pp.priors = cp.deepcopy(role.get_empirical_priors())
                    pp.fte_priors = {}
                pp.copy_priors_forward()
len(people)

898

### Decode step values, FTE, and payments from rate and earnings

In [12]:
for name in people.keys():
    roles = people[name].get_roles()
    for role_name in roles.keys():
        role = roles[role_name]
        pps = role.get_payperiods()
        for syear in sorted(pps.keys()):
            for syseq in sorted(pps[syear].keys()):
                pp = pps[syear][syseq]
                checks = pp.get_checks()
                for cknum in checks.keys():
                    chk = checks[cknum]
                    items = chk.get_items()
                    for i in items.keys():
                        role.decode_earnings(items[i])
                pp.copy_priors_forward()
len(people)

898

In [13]:
current_date = date.today()
fname = '../../finance_subcommittee/Bayesian_people_' + str(current_date.month) + '_' + \
    str(current_date.day) + '_' + str(current_date.year) + '.pkl'
with open(fname, 'wb') as handle:
    pickle.dump(people, handle)