In [9]:
#--clears all variables from environment before running program
%reset -f 

#=====================================
#--Import Packages--
#=====================================
import subprocess
import pandas as pd
from datetime import datetime
from datetime import timedelta
from dateutil import relativedelta
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import glob, os, sys
import json
import re

In [10]:
#=======================================
#--Read in Data and put in Usable Format
#=======================================

#--Read in each excel sheet from 'Service_fee_MI_2018.xlsx'--
service_fee_sheet = pd.read_excel('Service_fee_MI_2018.xlsx', sheet_name='service_fee')
tax_exempt_sheet = pd.read_excel('Service_fee_MI_2018.xlsx', sheet_name='tax_exempt')

#--Create dataframes for information from each Excel Sheet from the Service_fee_MI_2018.xlsx file--
tax_exempt_df = tax_exempt_sheet[['zip code', 'address']]
tax_exempt_df['address_and_zipcode'] = tax_exempt_df.address + ' ' + tax_exempt_df['zip code'].map(str)
service_fee_df = service_fee_sheet[['zip code', 'address']]
service_fee_df['address_and_zipcode'] = service_fee_df.address + ' ' + service_fee_df['zip code'].map(str)

#--Obtain all tax file names and put in list to run code on each file--
taxfile_lst = []
parent_dir = '2019_Returns'
for pdf_file in glob.glob(os.path.join(parent_dir, '*.pdf')):
    taxfile_lst.append(pdf_file)
    
#--Fix file names, remove \\ and replace with / for Windows compatibility (file won't run in Windows without it)
taxfile_lst_2 = []
for elem in taxfile_lst:
    taxfile_lst_2.append(re.sub("\\\\", "/", elem))
    
taxfile_lst_2 = sorted(taxfile_lst_2)
taxfile_lst_2

['2019_Returns/2019_LiveSoftware_1040SR.pdf',
 '2019_Returns/2019_LiveSoftware_1040_MI_SCH1_ADDITIONS.pdf',
 '2019_Returns/2019_LiveSoftware_1040_NR.pdf',
 '2019_Returns/2019_Rule17c_FAIL_4apartments.pdf']

In [4]:
#=====================================
#--Functions--
#=====================================

#--Original Code for pdftotext from poppler:
#--Resource 1: https://stackoverflow.com/questions/52683133/text-scraping-a-pdf-with-python-pdfquery
#--Resource 2: https://kaijento.github.io/2017/03/27/pdf-scraping-gwinnetttaxcommissioner.publicaccessnow.com/
def pdftotext(pdf, page=None):
    """Retrieve all text from a PDF file.
    Arguments:
        pdf Path of the file to read.
        page: Number of the page to read. If None, read all the pages.
    Returns:
        A list of lines of text.
    """
    if page is None:
        args = ['pdftotext', '-layout', '-q', pdf, '-']
    else:
        args = ['pdftotext', '-f', str(page), '-l', str(page), '-layout',
                '-q', pdf, '-']
    try:
        #--subprocess.check_output takes the output of a program and stores it in a string directly
        txt = subprocess.check_output(args, encoding='utf-8', universal_newlines=True) #--this works in Windows
#         txt = subprocess.check_output(args, universal_newlines=True) #--original that works in Mac
        lines = txt.splitlines()
    except subprocess.CalledProcessError:
        lines = []
    return lines

#--1. Convert from numerical (1,2) to (False,True)
def bool_conv(val):
#     output = None
    if val > 1:
        output = 'True'
    elif val == 1:
        output = 'False'
    else:
        output = 'Error, check value'
    return output


#--FINDING SUBSTRINGS IN FILES--
#--2. Find substring within file or sublist with 1 boundary--
def find_string_1bounds(bound1, file):
    output = None
    for elem in file:
        if bound1 in elem:
#             print(elem)
            output = elem
#         else:
#             output = bound1 + ' not in this file'
    return output


#--3. Find substring within file or sublist with 2 boundaries--
def find_string_2bounds(bound1, bound2, file):
    for elem in file:
        if bound1 and bound2 in elem:
            output = elem
#         else:
#             output = (bound1 + ' ' + bound2 + ' not in this file')
    return output


#--4. Find substring within file or sublist with 3 boundaries--
def find_string_3bounds(bound1, bound2, bound3, file):
    for elem in file:
        if bound1 and bound2 and bound3 in elem:
            output = elem
#         else:
#             output = (bound1 + ' ' + bound2 + ' ' + bound3 + ' not in this file')
    return output

#--5.Check if a form exists in the Tax File--
def check_form_exist_1bound(bound1,file):
    bool_lst = []

    #--Find if form is true, if so append to list
    for elem in file:
        if bound1 in elem:
            bool_lst.append('True')
        else:
            bool_lst.append('False')
    #--if form exists, set variable to True, otherwise set it to False
    if 'True' in bool_lst:
        exist = 'True'
    else:
        exist = 'False'
    return exist 

#--Create Sublist with upper and lower bounds--
def sublist_1up_1low(exist, file, upper1, lower1):
    try:
        if exist == 'True':
            count = -1
            for elem in file:
                count += 1
                if upper1 in elem:
                    upper = count

            count = -1
            for elem in file:
                count += 1
                if lower1 in elem:
                    lower = count
            sublist = file[upper:lower]
        else:
            print(upper1 +  ' form does not exist. Please check correct form is being run.')
    except:
        sys.exit('Warning: PDF file is not in correct format. You may need to export it in a different format to the disk. Check if file has page with Listing of Forms for This Return. Fix PDF file, then try to re-run it')
    return sublist

#--num1=-1 includes upper1, num1=0 does not include upper1 in sublist result
#--num2=-1 doesn't include lower1, num2=0 includes lower1 in sublist result
def sublist_1up_1low_ch_count(exist, file, upper1, lower1, num1, num2): #--can change the number of lines after the final string that you want included
    if exist == 'True':
        count = num1
        for elem in file:
            count += 1
            if upper1 in elem:
                upper = count
                
        count = num2 #--start at -1 to not include lower1, 0 includes lower1, etc.
        for elem in file:
            count += 1
            if lower1 in elem:
                lower = count
        sublist = file[upper:lower]
    else:
        print(upper1 +  ' form does not exist. Please check correct form is being run.')
    return sublist

#--Create Sublist with 2 upper and 1 lower bounds--
def sublist_2up_1low(exist, file, upper1, upper2, lower1):
    if exist == 'True':
        count = -1
        for elem in file:
            count += 1
            if upper1 and upper2 in elem:
                upper = count
                
        count = -1
        for elem in file:
            count += 1
            if lower1 in elem:
                lower = count
        sublist = file[upper:lower]
    else:
        pass
#         print(upper1 +  ' form does not exist. Please check correct form is being run.')

    return sublist

In [141]:
#--EXAMPLE FILE TO RUN: TESTING RULE 17 CODE-- Remove when complete
# text_file = pdftotext(taxfile_lst_2[1], page=None) #--run Sch1_Additions.pdf--
# mi1040CR_sublist = sublist_1up_1low_ch_count(mi1040CR_exist, text_file, mi1040CR_text, mi1040CR_text2, -1, 5)
# mi1040CR_58str = sublist_1up_1low_ch_count(mi1040CR_exist, mi1040CR_sublist, mi1040CR_text2, mi1040CR_text3, 0, -1)

In [5]:
#--CODE FOR TESTING--don't use in final file
taxfile_lst_2 = sorted(taxfile_lst_2)
taxfile_lst_2

['2019_Returns/2019_LiveSoftware_1040SR.pdf',
 '2019_Returns/2019_LiveSoftware_1040_MI_SCH1_ADDITIONS.pdf',
 '2019_Returns/2019_LiveSoftware_1040_NR.pdf',
 '2019_Returns/2019_Rule17c_FAIL_4apartments.pdf']

In [143]:
# mi1040CR_sublist

In [6]:
# #--CODE TO TEST VARIABLES FOR RULE 17 -- when complete, add to main code block below
# #--TODO: Match '602 SO TWENTY-SIXTH ST 100' from the service_fee sheet to '702 SO TWENTY SIXTH' in the 1040CR sublist
# #-----methods to try: fuzzy matching, try including '-' in place of spaces, etc. 


# #--Code for RULE 17: Address at line 58 in mi1040CR-- 
# #--mi1040CR CODE--

# # for file in taxfile_lst_2: #--when uncommented, indent code below to run all files
# file = taxfile_lst_2[3]
# print(file)
# text_file = pdftotext(file, page=None)

# #--6. MI 1040 CR--
# mi1040CR_text = '2019 MICHIGAN Homestead Property Tax Credit Claim MI-1040CR'
# mi1040CR_text2 = '58. Name and Address (including City, State and ZIP Code) of Housing Facility, Landowner, or Care Facility if you completed lines 54 through 57.'
# mi1040CR_text3 = 'DIRECT DEPOSIT'
# mi1040CR_exist = check_form_exist_1bound(mi1040CR_text, text_file)

# if mi1040CR_exist =='True':
# #         mi1040CR_sublist = sublist_1up_1low(mi1040CR_exist, text_file, mi1040CR_text, mi1040CR_text2)
#     mi1040CR_sublist = sublist_1up_1low_ch_count(mi1040CR_exist, text_file, mi1040CR_text, mi1040CR_text2, -1, 5) 

# else:
#     mi1040CR_sublist = 'False'

    
# #--Extract address string at 1040CR line 52a; create street and zipcode variables--
# mi1040CR52a_str1 = '(Number, Street, Apt. #, City, State, ZIP Code)'
# mi1040CR52a_str2 = '53. Total rent you paid (not more than 12 months)'
# mi1040CR_52str = sublist_1up_1low_ch_count(mi1040CR_exist, mi1040CR_sublist, mi1040CR52a_str1, mi1040CR52a_str2, 0, 0)
# # mi1040CR_52str[0].split('   ')
# # mi1040CR_52str = ['aa']

# #--if address string at 1040CR 52a is not empty, extract address-
# # NEED TO ACCOUNT FOR WHEN THERE WILL BE MORE THAN ONE ADDRESS OPTION
# #--try different approach other than using "if len(mi1040CR_52str) <= 6:" for discerning between 1 or 2 addresses being present
# try:
# #     if -- create rule for when there is only one address for 52a
#     if len(mi1040CR_52str) <= 6:
#         mi1040CR_52a_street1 = re.split(r'\s{4,}', mi1040CR_52str[0])[0]
#         mi1040CR_52astr_zip1 = re.split(r'\s{4,}', mi1040CR_52str[1])[0]
#         mi1040CR_52a_zip1 = re.findall(r'\d{5}', mi1040CR_52astr_zip1)[0]
#         mi1040CR_52a_street2 = 'False'; mi1040CR_52a_zip2 = 'False'
#         print('mi1040CR_52a_street1 if '+ mi1040CR_52a_street1)

#     else:
#         #--address 1:
#         mi1040CR_52a_street1 = re.split(r'\s{4,}', mi1040CR_52str[0])[0]
#         mi1040CR_52astr_zip1 = re.split(r'\s{4,}', mi1040CR_52str[1])[0]
#         mi1040CR_52a_zip1 = re.findall(r'\d{5}', mi1040CR_52astr_zip1)[0]

#         #--address 2:
#         mi1040CR_52a_street2 = re.split(r'\s{4,}', mi1040CR_52str[3])[0]
#         mi1040CR_52astr_zip2 = re.split(r'\s{4,}', mi1040CR_52str[4])[0]
#         mi1040CR_52a_zip2 = re.findall(r'\d{5}', mi1040CR_52astr_zip2)[0]
#         print('mi1040CR_52a_street1 else '+ mi1040CR_52a_street1)

#         #--TODO: account for number of addresses over 2

# #--otherwise, return False--
# except: 
#     mi1040CR_52a_street1 = 'False'; mi1040CR_52a_zip1 = 'False'; mi1040CR_52a_street2 = 'False'; mi1040CR_52a_zip2 = 'False'

# # #--Extract line 58 string; create street and zipcode variables--
# mi1040CR_text3 = 'DIRECT DEPOSIT'
# mi1040CR_58str = sublist_1up_1low_ch_count(mi1040CR_exist, mi1040CR_sublist, mi1040CR_text2, mi1040CR_text3, 0, -1)

# #--If Address string at 1040CR line 58 is not empty, do the following--
# try: 
#     mi1040CR_58str = mi1040CR_58str[0].lstrip().rstrip().upper().split(',')
#     mi1040CR_58street = mi1040CR_58str[0]
#     mi1040CR_58zip = re.findall(r'\d{5}', mi1040CR_58str[1])[0]
# #--otherwise, return False--
# except:
#     mi1040CR_58street = 'False'
#     mi1040CR_58zip = 'False'

# #--combine street and zipcodes--    
# mi1040CR_52add_zip1 = mi1040CR_52a_street1 + " " + str(mi1040CR_52a_zip1)
# mi1040CR_52add_zip2 = mi1040CR_52a_street2 + " " + str(mi1040CR_52a_zip2)
# mi1040CR_58add_zip = mi1040CR_58street + " " + str(mi1040CR_58zip)

# print(mi1040CR_52add_zip1)
# print(mi1040CR_52add_zip2)
# print(mi1040CR_58add_zip)
# # print('\n\n')

2019_Returns/2019_Rule17c_FAIL_4apartments.pdf
mi1040CR_52a_street1 else 702 SO TWENTY SIXTH
702 SO TWENTY SIXTH 49829
904 B GREENWOOD AVE 48162
False False


In [7]:
tax_exempt_df.head()

Unnamed: 0,zip code,address,address_and_zipcode
0,48013,3515 N WOODWARD AVE,3515 N WOODWARD AVE 48013
1,48015,DUNN FAMILY HOME,DUNN FAMILY HOME 48015
2,48015,8400 ENGLEMAN,8400 ENGLEMAN 48015
3,48017,CLAWSON MANOR,CLAWSON MANOR 48017
4,48017,255 WEST FOURTEEN MILE ROAD,255 WEST FOURTEEN MILE ROAD 48017


In [8]:
# #--MATCHING for RULE 17--
# #--TODO: Match '602 SO TWENTY-SIXTH ST 100' from the service_fee sheet to '702 SO TWENTY SIXTH' in the 1040CR sublist
# #--TODO: if returns "yes", don't raise a flag, if it returns "no", tell the volunteer to check manually

# #-----methods to try: fuzzy matching, try including '-' in place of spaces, etc. 
# #--Resource: https://www.rdegges.com/2013/easy-fuzzy-text-searching-with-postgresql/

# #--Example Addresses to try with
# ex1 = '603 FRANCES COURT 100' #--is in service_fee tab of spreadsheet
# ex2 = '602 SO TWENTY-SIXTH ST 100' #--is in service_fee tab of spreadsheet



# print(mi1040CR_52a_street1)
# print(mi1040CR_52a_zip1)
# print(mi1040CR_52a_street2)
# print(mi1040CR_52a_zip2)
# print(mi1040CR_58street)
# print(mi1040CR_58zip)

# address_lst = [mi1040CR_52a_street1, mi1040CR_52a_street2, mi1040CR_58street]
# zipcode_lst = [mi1040CR_52a_zip1, mi1040CR_52a_zip2, mi1040CR_58zip]
# #--Find address within TAX EXEMPT sheet: returns True/False--
# for address in address_lst:
#     if address != 'False':
#         find_address_te = str(tax_exempt_df['address'].eq(mi1040CR_58street).any()) #--matches exact address
#         print(address + ': ' + find_address_te) #--use for TESTING ONLY
        
        
#         find_address_te = str(tax_exempt_df['address'].eq(mi1040CR_58street).any()) #--matches exact address
#         print(address + ': ' + find_address_te)

# # Method of accounting for synonyms
# #--create synonym dictionary, check for value if key exists

702 SO TWENTY SIXTH
49829
904 B GREENWOOD AVE
48162
False
False
702 SO TWENTY SIXTH: False
702 SO TWENTY SIXTH: False
904 B GREENWOOD AVE: False
904 B GREENWOOD AVE: False


In [7]:
# mi1040CR_58street = 'CLEMENS TOWERS' #--example address

In [8]:
# def find_address_taxexempt(df_name, street, zipcode=None):
#     find_address_te = str(df_name['address'].eq(street).any())
#     return find_address_te

In [23]:
# #--Find address within TAX EXEMPT sheet: returns True/False--
# find_address_te = str(tax_exempt_df['address'].eq(mi1040CR_58street).any()) #--matches exact address
# print(find_address_te)
# #--Find address within SERVICE FEE sheet: returns True/False--
# find_address_sf = str(service_fee_df['address'].eq(mi1040CR_58street).any()) #--matches exact address
# print(find_address_sf)

False
False


In [10]:
#--Code for Rule 17a--

# error_message_17a = 'ERROR: Property is service fee.  Line 54b should be checked and 54a should not be checked.'
# error_message_17b = 'ERROR: Property is tax exempt.  Homestead credit not allowed.'
# error_message_17c_parta = 'ERROR: a) if service fee:  Property is service fee.  Must be entered in MI1040CR Part 5'
# error_message_17c_partb = 'ERROR: b)  if tax exempt:  Property is tax exempt.  Credit is not allowed.'

# if mi1040CR_exist == 'True' and mi1040CR_55 > 0 and mi1040CR_44 > 0 and find_address_sf =='True':
#     rule17a = 'True'
# else:
#     rule17a = 'False'
    
# if rule17a == 'True':
#     if mi1040CR_54b == 'True':
#         rule17a_df = no_errors
#     else:
#         rule17a_df = error_message_17a
#         final_message = final_message + rule17a_df + '\n'
# else:
#     rule17a_df = no_errors
# print(rule17a_df)        

# #--Code for Rule 17b--
# if mi1040CR_exist == 'True' and mi1040CR_55 > 0 and mi1040CR_44 > 0:
#     rule17b = 'True'
# else:
#     rule17b = 'False'
    
    
    
    
    
    
    
    
    
# # if rule17b == 'True':
# #     if find_address_te == 'True':
        





# #--Code for Rule 17c--

# #--CODE FOR GETTING ADDRESS AT 52a--

# #--CODE FOR 17c RULE:
# if rule17c =='True':
#     if mi1040CR_52a_street in servicefee:
#         address52a_servicefee = 'True'
#     else:
#         address52a_servicefee = 'False'
        
    #--Check if Address on Form_MI_1040CR line 52 column A is NOT included on "Service.fee.MI.2018" tab "service fee" OR "tax exempt"
        # address52a_servicefee = True/False
        # address52a_taxexempt = True/False
    #--else:
        #--

In [135]:
#--Create empty dataframe and empty string for all client data--
col_lst = ['Filename','DateTime_tool_was_run','TaxFormProcessDate','client_name', 'client_age', 'client_birthyr', 'spouse_exist','spouse_age', 'spouse_birthyr', 
              'Fed1040SR_exist', 'Fed1040_exist', 'Fed1040_4b', 'Fed1040_4d','Fed1040_5a',
              'mi1040exist', 'mi1040_7a', 'mi1040_7b', 'mi1040_7c', 'mi1040_8a', 'mi1040_8b', 'mi1040_8c', 'mi1040_9b', 'mi1040_12',
              'miSched1_exist_subtractions', 'miSched1_24', 'miSchedNR_exist', 'miSchNR_c1', 'miSchNR_c2', 'miSchNR_s1', 'miSchNR_s2',
               'month_residence_client', 'month_residence_spouse', 'mi1040CR_exist', 'mi1040CR_11', 'mi1040CR_31', 'mi1040CR_33', 'mi1040CR_44', 
               'mi1040CR_53', 'mi1040CR_54a', 'mi1040CR_54b', 'mi1040CR_55', 'mi1040CR_57',
               'Rule_1', 'Rule_2', 'Rule_4', 'Rule_5', 'Rule_10', 'Rule_11', 'Rule_12', 'Rule_13', 'Rule_14', 'Rule_15', 'Rule_16']

all_tax_df = pd.DataFrame(columns=col_lst)

all_tax_txt_file = ''

In [82]:
formcount = 0

for file in taxfile_lst_2:
    try:
        final_message = '' #--print out for volunteers
        text_file = pdftotext(file, page=None)

        #=====================================
        #--Check if forms exist in Tax File--
        #=====================================
        #--1. Federal 1040--
        fed1040_str = '1040 U.S. Individual Income Tax Return 2019'
        Fed1040_exist = check_form_exist_1bound(fed1040_str, text_file)

        #--2. Federal 1040-SR--
        fed1040SR_str = '1040-SR U.S. Tax Return for Seniors'
        Fed1040SR_exist = check_form_exist_1bound(fed1040SR_str, text_file)

        #--3. MI-1040--
        mi1040text = 'MICHIGAN Individual Income Tax Return MI-1040'
        mi1040text_page2 = '2019 MI-1040, Page 2 of 2'
        mi1040exist = check_form_exist_1bound(mi1040text, text_file)

        #--4. MI Schedule 1--
        str_sch1 = 'MICHIGAN Schedule 1 Additions and Subtractions'
        str1_sched1_24 = 'Pension Schedule. Include Form 4884'
        miSched1_exist = check_form_exist_1bound(str_sch1, text_file) #--checks for Sched 1 Additions and Subtractions pages (2 pages)
        miSched1_exist2 = check_form_exist_1bound(str1_sched1_24, text_file) #--Checks for Sche 1 Subtractions page only
        if miSched1_exist == 'True' and miSched1_exist2 == 'True':
            miSched1_exist_subtractions = 'True'
        else:
            miSched1_exist_subtractions = 'False'

        #--5. MI Schedule NR--
        schedNR_upper = 'MICHIGAN Nonresident and Part-Year Resident Schedule'
        schedNR_lower = 'here and on MI-1040, line 15........................................................................................................................   19.'
        miSchedNR_exist = check_form_exist_1bound(schedNR_upper, text_file)

        #--6. MI 1040 CR--
        mi1040CR_text = '2019 MICHIGAN Homestead Property Tax Credit Claim MI-1040CR'
        mi1040CR_text2 = '58. Name and Address (including City, State and ZIP Code) of Housing Facility, Landowner, or Care Facility if you completed lines 54 through 57.'
        mi1040CR_text3 = 'DIRECT DEPOSIT'
        mi1040CR_exist = check_form_exist_1bound(mi1040CR_text, text_file)

        #--7. Client and Spouse age information--
        age_upperbound1 = 'TAX YEAR: 2019'
        age_upperbound2 = 'PROCESS DATE: '
        age_lowerbound = 'LISTING OF FORMS FOR THIS RETURN'
        age_exist = 'True'
    #     print('Complete: Check if forms exist in Tax File') #--Used for debugging--

        #=====================================
        #-----Create Sublists for Forms------
        #=====================================

        #---MI-1040---
        if mi1040exist == 'True':
            mi1040sublist = sublist_1up_1low(mi1040exist, text_file, mi1040text, mi1040text_page2) #--NOTE: only gives page 1 of the MI-1040
        else:
            mi1040sublist = 'False'

        #---MI Schedule NR---
        if miSchedNR_exist == 'True':
            miSchedNR_sublist = sublist_1up_1low(miSchedNR_exist, text_file, schedNR_upper, schedNR_lower)
        else:
            miSchedNR_sublist = 'False'

        #--Client Age Sublist--
    #     age_sublist = sublist_2up_1low(age_exist, text_file, age_upperbound1, age_upperbound2, age_lowerbound)
        age_sublist = sublist_1up_1low(age_exist, text_file, age_upperbound2, age_lowerbound)

        #--1040CR--
        if mi1040CR_exist =='True':
    #         mi1040CR_sublist = sublist_1up_1low(mi1040CR_exist, text_file, mi1040CR_text, mi1040CR_text2)
            mi1040CR_sublist = sublist_1up_1low_ch_count(mi1040CR_exist, text_file, mi1040CR_text, mi1040CR_text2, -1, 5) 

        else:
            mi1040CR_sublist = 'False'
    #     print('Complete: Create Sublists for Forms') #--Used for debugging--

        #=====================================
        #------ Date/Time Tool was Run -------
        #=====================================
        # Returns a datetime object containing the local date and time
        dateTimeObj = datetime.now()
        run_code_datetime = dateTimeObj.strftime("%d-%b-%Y (%H:%M:%S.%f)")[:-4]

        #=====================================
        #------------timestamp---------------
        #=====================================
        tax_yr_text = 'TAX YEAR: 2019'
        process_date = 'PROCESS DATE: '

        #--Find and extract substring with timestamp information: labeled 'PROCESS DATE'
        timestamp_fullstr = find_string_2bounds(tax_yr_text, process_date, age_sublist)
        timestamp_str = timestamp_fullstr.split(process_date)[1] #--get date string after process_date text; add this one to dataframe
        match = re.search('\d{2}/\d{2}/\d{4}', timestamp_str) #--use RegEx to separate date in case there is any other information in the string
        timestamp = datetime.strptime(match.group(), '%m/%d/%Y').date() #--create datetime in case there is a need for getting extra info from process date

        #=====================================
        #-------Client & Spouse Info----------
        #=====================================
        client_text1 = 'CLIENT'
        client_text2 = 'BIRTH DATE'
        client_text3 = 'Age:'
        spouse_text1 = 'SPOUSE'

        #--1. Extract text that includes the age and birth_yr of the client and spouse (if spouse exists) from age_sublist--
        client_age_lst = []
        spouse_age_lst = []
        spouse_bool = ''

        for elem in age_sublist:
            if client_text1 in elem:
                client_age_lst.append(elem)      

        for elem in age_sublist: #--create string for elements in age_sublist, see if SPOUSE exists--
            if spouse_text1 in elem:
                spouse_bool = spouse_bool + 'True '
                spouse_age_lst.append(elem)
            else:
                spouse_bool = spouse_bool + 'False '

        #--2. Extract Client Name--        
        m = client_age_lst[0].split('BIRTH')[0]
        pattern2 = '\d{3}-\d{2}-\d{4}' #--remove social security number
        str_name = re.sub(pattern2, '', m)
        str_name1 = re.sub('CLIENT', '', str_name)
        str_name2 = re.sub(':', '', str_name1)
        client_name = str_name2.lstrip().rstrip()

        #--3a. Find age and birth_yr for client --
        client_age = int(client_age_lst[0].split('Age:')[1]) #--split client_age_lst on 'Age:' to extract age number and convert to int

        match = re.search('\d{2}/\d{2}/\d{4}', client_age_lst[0]) #--use regex to extract birthdate from birth info string, then convert to datetime to extract birth year
        client_birthyr = datetime.strptime(match.group(), '%m/%d/%Y').date()
        client_birthyr = client_birthyr.year

        #--3b. Find if spouse exists, if spouse exists, extract age and birth_yr for spouse --
        if 'True' in spouse_bool: #--check if spouse information is in sublist, if it is, find age and birth_yr, if not, set those variables to 0
            spouse = True
            spouse_age = int(spouse_age_lst[0].split('Age:')[1]) #--split client_age_lst on 'Age:' to extract age number and convert to int

            match = re.search('\d{2}/\d{2}/\d{4}', spouse_age_lst[0]) #--use regex to extract birthdate from birth info string, then convert to datetime to extract birth year
            spouse_birthyr = datetime.strptime(match.group(), '%m/%d/%Y').date()
            spouse_birthyr = spouse_birthyr.year
        else:
            spouse = False
            spouse_age = 0
            spouse_birthyr = 0

    #     print('Complete: Time and Client/Spouse Information') #--Used for debugging--

        #=====================================
        #-----Federal 1040 & 1040-SR ---------
        #----------Variables------------------
        #=====================================

        #======== 5a ========
        str1_5a = '5a'
        str2_5a = 'Social security benefits' 

        #--Locate string in file with information for 1040 4b
        string = find_string_2bounds(str1_5a, str2_5a, text_file)
        string.split(' b ') #--CHECK CODE HERE, HOW DOES SPLIT CHANGE IT? TAKE IT OUT?
        match = re.findall(r"Social security benefits(.+?)b Taxable amount", string) #--Identifying appropriate range for numerical value of 5a (given as a string within a list)
        match = match[0].split('5a')[1] #--extracts string from list, then removes unnecessary characters, so only blank spaces and number are remaining
        try:
            Fed1040_5a = int(match.lstrip().rstrip()) #--strips blank spaces and converts to integer
        except:
            Fed1040_5a = 0 

        #======== 4b ========
        str1_4b = '4a'
        str2_4b = 'IRA distributions'

        #--Locate and extract string in file with information for 1040 4b
        string = find_string_2bounds(str1_4b, str2_4b, text_file)
        Fed1040_4b = string.partition('4b')[2] #--partition based on 4b, then get string to the right of it

        try:
            if Fed1040_4b == '':
                Fed1040_4b = 0
            else:
                Fed1040_4b = int(Fed1040_4b.lstrip())
        except:
            print('Please look at value for 1040 4b')
            final_message = final_message + 'Please look at value for 1040 4b /n'


        #======== 4d from 1040-SR ========
        #--Accounting for 1040 and 1040-SR, 4d appears differently in each form--
        str1_4d = 'Pensions and annuities'
        str2_4d = '4c'
        str3_4d = '• Single or Married'

        #======== NOTE: KEEP CODE FOR 1040SR above code for 1040: Dependencies for Fed1040_4d value ========
        if Fed1040SR_exist == 'True':
            string = find_string_2bounds(str1_4d, str2_4d, text_file)
            Fed1040_4d = string.partition('4d')[2] #--partition string on 4d, get value needed
            if len(Fed1040_4d) == 0:
                Fed1040_4d = 0
            else:
                Fed1040_4d = int(Fed1040_4d.lstrip())
        else:
            Fed1040_4d = 0

        #--Gathers Taxable AMT Totals value for 4b + 4d on 1099-R Income Forms Summary--
        #--This is a work around for when there is a Fed 1040 with only a value for 4c or 4d--
        str1_1099 = '* 1099-R INCOME FORMS SUMMARY *'
        str2_1099 = 'TOTALS......'
        text1099_exist = check_form_exist_1bound(str1_1099, text_file)
        if text1099_exist == 'True':
    #         text_1099 = sublist_1up_1low_plus1(text1099_exist, text_file, str1_1099, str2_1099)
            text_1099 = sublist_1up_1low_ch_count(text1099_exist, text_file, str1_1099, str2_1099, -1, 0)        
            text_1099_totals = find_string_1bounds(str2_1099, text_1099).split()
            taxable_amt_totals = int(text_1099_totals[2])
        else:
            text_1099 = 'False'
            taxable_amt_totals = 0


        #======== 4d from 1040 ========
        if Fed1040_exist == 'True':
            for elem in text_file:
                if str3_4d in elem:
                    string = elem

            clean_string = string.replace(str3_4d, "TEST")
            clean_lst = clean_string.split()
            if len(clean_lst) == 1:
                if "TEST" in clean_lst:
                    Fed1040_4d = 0
                else:
                    Fed1040_4d = 'Error len1'
            elif len(clean_lst) == 2:
                if "TEST" in clean_lst:
        #             Fed1040_4d = 'Error len2a: Please check form for Federal 4d value.'
                    Fed1040_4c_or_4d = int(clean_lst[1])

                    #--Gathers Taxable AMT Totals value for 4b + 4d on 1099-R Income Forms Summary--
                    #--This is a work around for when there is a Fed 1040 with only a value for 4c or 4d--
                    str1_1099 = '* 1099-R INCOME FORMS SUMMARY *'
                    str2_1099 = 'TOTALS......'
                    text1099_exist = check_form_exist_1bound(str1_1099, text_file)
                    if text1099_exist == 'True':
                        text_1099 = sublist_1up_1low_plus1(text1099_exist, text_file, str1_1099, str2_1099)
                        text_1099_totals = find_string_1bounds(str2_1099, text_1099).split()
                        taxable_amt_totals = int(text_1099_totals[2])
                    else:
                        text_1099 = 'False'
                        taxable_amt_totals = 0

                    #--This subtracts the number in Federal 1040 4c, 4d string from Taxable AMT Totals to get value for 4d
                    if Fed1040_4c_or_4d + Fed1040_4b == taxable_amt_totals: #--if the string from 1040 + Fed1040_4b = tax_amt, then it's 4d
                        Fed1040_4d = int(Fed1040_4c_or_4d)
                    elif Fed1040_4c_or_4d + Fed1040_4b != taxable_amt_totals:
                        Fed1040_4d = 0
                    else:
                        Fed1040_4d = 'Error: Please check form'

                else:
                    Fed1040_4d = 'Error: Please check form for Federal 4d value.'
                    final_message = final_message + 'Error: Please check form for Federal 4d value. \n'
            elif len(clean_lst) == 3:
                if "TEST" in clean_lst:
                    Fed1040_4d = int(clean_lst[2])
                else:
                    Fed1040_4d = 'Error len 3'
            else:
                final_message = final_message + "Please check 1040 4d by hand \n"
                print("Please check 1040 4d by hand")
        else:
            pass
    #         print("Federal 1040 not in this Tax Form, 4d value comes from 1040-SR")
    #     print('Complete: 1040 values for 4b, 4d, 5a') #--Used for debugging--

        #================================
        #---------MI-1040----------------
        #---------Variables FINAL------
        #================================
        if mi1040exist == 'True':

            #======== 7a, 8a ========
            str_7a = 'Single'
            str_8a = 'If you check box “c,” complete'

            str7a8a = find_string_2bounds(str_7a, str_8a, text_file)
            remove1 = '* If you check box “c,” complete'
            str7a8a = str7a8a.replace(remove1, "") #--remove unnecessary words from string
            str7a8a_lst = str7a8a.lstrip().rstrip().split('a. ')  #--remove whitespace and split on a. to separate elements

            mi1040_7a_num = len(str7a8a_lst[1].lstrip().rstrip().split()) #--Filing Status Single: If val == 1, no X in 7a, if value == 2, there's an X in 7a
            mi1040_8a_num = len(str7a8a_lst[2].lstrip().rstrip().split()) #--Residency Status Resident: If val == 1, no X in 8a, if value == 2, there's an X in 8a
            mi1040_7a = bool_conv(mi1040_7a_num) #--Convert to boolean values--
            mi1040_8a = bool_conv(mi1040_8a_num) #--Convert to boolean values--

            #======== 7b, 8b ========
            str7b = 'Nonresident'
            str7b_2 = 'below:'
            str8b = '“c,” you must complete'
            remove1 = '*'

            count = -1
            for elem in mi1040sublist:
                count += 1
                if str7b and str7b_2 and str8b in elem: 
                    #--Information for whether 7b has a checkmark is in the string after the one with the 7b garbled text, or count + 1
                    str7b_only = count + 1
                    str8b_only = elem

            #---7b.---: The X for the mi1040_7b box is not in the same string as the text, but in the string after it--
            if 'X' in mi1040sublist[str7b_only]: #--Determine if there is an 'X' in the string at the index str7b_only
                mi1040_7b_num = 2 #--Length of 7a is 2 if there is an X present in the box, so keeping consistent for coding the Rules
            else:
                mi1040_7b_num = 1 #--Length of 7a is 1 if there is no X present in the box

            #---8b.---
            str8b_only = str8b_only.replace(str8b, "").replace(remove1, "") #--remove unnecessary words from Nonresident part of string
            str8b_lst = str8b_only.lstrip().rstrip().split('b. ')  #--remove whitespace and split on b. to separate elements
            mi1040_8b_num = len(str8b_lst[2].lstrip().rstrip().split())

            mi1040_7b = bool_conv(mi1040_7b_num)
            mi1040_8b = bool_conv(mi1040_8b_num)

            #======== 7c, 8c ========
            str_7c_8c = 'Part-Year Resident *'
            remove1 = 'Resident'
            remove2 = ' *'

            str7c8c = find_string_1bounds(str_7c_8c, text_file)
            str7c8c = str7c8c.replace(remove1, "").replace(remove2, "") #--remove unnecessary words from string
            str7c8c_lst = str7c8c.lstrip().rstrip().split('c. ')  #--remove whitespace and split on c. to separate elements
            mi1040_7c_num = len(str7c8c_lst[1].lstrip().rstrip().split()) #--Filing Status (Married Filing Separately): If value == 1, there is no X in 7c, if value == 2, there is an X in 7c
            mi1040_8c_num = len(str7c8c_lst[2].lstrip().rstrip().split()) #--Residency Status (Part-Year Resident): If value == 1, there is no X in 8c, if value == 2, there is an X in 8c

            mi1040_7c = bool_conv(mi1040_7c_num) #--Convert to boolean values--
            mi1040_8c = bool_conv(mi1040_8c_num) #--Convert to boolean values--

            #======== 9b ========
            str1_9b = 'blind, hemiplegic, paraplegic, quadriplegic, or totally and permanently disabled'
            str2_9b = ' 9b.' #--keep space in front of 9b. or will return other text--

            string = find_string_2bounds(str1_9b, str2_9b, text_file)
            try:
                s = string.split('x')[0]
                mi1040_9b = int(s.replace(str1_9b, '').replace(str2_9b, '').lstrip().rstrip())
            except:
                mi1040_9b = 0

            #======== 12 ========
            str_12  = 'Total. Add lines 10 and 11'

            mi1040_12_str = find_string_1bounds(str_12, text_file)
            mi1040_12 = mi1040_12_str.split(' 12. ')[2] #--split on 12. and get element with numerical value for line 12
            mi1040_12 = mi1040_12.split()  #--split again to separate 2 trailing 0's

            if len(mi1040_12) > 1: #--if the list has length > 1, there is a numerical value in line 12, if there is a length of 0 the value for line 12 will be 0
                mi1040_12 = int(mi1040_12[0])
            else:
                mi1040_12 = 0     

        else:
            dne = 'False'
            mi1040_7a = dne; mi1040_7b = dne; mi1040_7c = dne; mi1040_8a = dne; mi1040_8b = dne; mi1040_8c = dne
            mi1040_9b = 0; mi1040_12 = 0     
    #     print('Complete: 1040 variables 7-12') #--Used for debugging--


        #================================
        #---------MI-1040CR-------------
        #---------Variables-------------
        #================================
        mi1040CR_11str = '11. Renters: Enter rent you paid for 2019 from line 53 and/or 55'
        mi1040CR_31str = '..................................................................................... 31'
        mi1040CR_33str = 'If more than $60,000, STOP; you are not eligible for this credit'
        mi1040CR_44str = '....................................................... 44.'
        mi1040CR_53str = '53. Total rent you paid (not more than 12 months). Add total rent for each period'
        mi1040CR_54stra = 'a.   X    Subsidized Housing: complete line 55'
        mi1040CR_54strb = 'b.     X     Service Fee Housing: complete lines 55 and 56.'
        mi1040CR_55str = '.................................... 55.'
        mi1040CR_57str = 'Enter your prorated share of taxes from the type of facility checked on line 57 here and on line 10.'

        if mi1040CR_exist == 'True':
            #======== 11 ========
            mi1040CR_11text = find_string_1bounds(mi1040CR_11str, mi1040CR_sublist)
            mi1040CR_11split_amt = mi1040CR_11text.split('11.')[2].split()

            if len(mi1040CR_11split_amt) > 1:
                mi1040CR_11 = int(mi1040CR_11split_amt[0])
            elif len(mi1040CR_11split_amt) == 1:
                mi1040CR_11 = 0
            else:
                mi1040CR_11 = 'Warning: please check value by hand'

            #======== 31 ========
            mi1040CR_31text =  find_string_1bounds(mi1040CR_31str, mi1040CR_sublist).split('31.')[1].split()
            if len(mi1040CR_31text) > 1:
                mi1040CR_31 = int(mi1040CR_31text[0])
            elif len(mi1040CR_31text) == 1:
                mi1040CR_31 = 0
            else:
                mi1040CR_31 = 'Warning: please check value by hand'

            #======== 33 ========
            mi1040CR_33text =  find_string_1bounds(mi1040CR_33str, mi1040CR_sublist).split('33.')[1].split()
            if len(mi1040CR_33text) > 1:
                mi1040CR_33 = int(mi1040CR_33text[0])
            elif len(mi1040CR_33text) == 1:
                mi1040CR_33 = 0
            else:
                mi1040CR_33 = 'Warning: please check value by hand'

            #======== 44 ========
            mi1040CR_44 = check_form_exist_1bound(mi1040CR_44str, mi1040CR_sublist)
            mi1040CR_44text = find_string_1bounds(mi1040CR_44str, mi1040CR_sublist)
            mi1040CR_44split_amt = mi1040CR_44text.split('44. ')[1].split()
            if len(mi1040CR_44split_amt) > 1:
                mi1040CR_44 = int(mi1040CR_44split_amt[0])
            elif len(mi1040CR_44split_amt) == 1:
                mi1040CR_44 = 0
            else:
                mi1040CR_44 = 'Warning: please check value by hand'

            #======== 53 ========    
            mi1040CR_53text = find_string_1bounds(mi1040CR_53str, mi1040CR_sublist)
            mi1040CR_53split_amt = mi1040CR_53text.split('53.')[2].split()

            if len(mi1040CR_53split_amt) > 1:
                mi1040CR_53 = int(mi1040CR_53split_amt[0])
            elif len(mi1040CR_53split_amt) == 1:
                mi1040CR_53 = 0
            else:
                mi1040CR_53 = 'Warning: please check value by hand'        

            #======== 54a, 54b ========
            mi1040CR_54a = check_form_exist_1bound(mi1040CR_54stra, mi1040CR_sublist)
            mi1040CR_54b = check_form_exist_1bound(mi1040CR_54strb, mi1040CR_sublist)

            #======== 55 ========
            mi1040CR_55text = find_string_1bounds(mi1040CR_55str, mi1040CR_sublist)
            mi1040CR_55split_amt = mi1040CR_55text.split('55.')[1].split()

            if len(mi1040CR_55split_amt) > 1:
                mi1040CR_55 = int(mi1040CR_55split_amt[0])
            elif len(mi1040CR_55split_amt) == 1:
                mi1040CR_55 = 0
            else:
                mi1040CR_55 = 'Warning: please check value by hand'

            #======== 57 ========
            mi1040CR_57text = find_string_1bounds(mi1040CR_57str, mi1040CR_sublist)
            mi1040CR_57split_amt = mi1040CR_57text.split('57.')[1].split()

            if len(mi1040CR_57split_amt) > 1:
                mi1040CR_57 = int(mi1040CR_57split_amt[0])
            elif len(mi1040CR_57split_amt) == 1:
                mi1040CR_57 = 0
            else:
                mi1040CR_57 = 'Warning: please check value by hand'

            #======== 52a & 58 ========
            #---52a.---
            #--Extract address string at 1040CR line 52a; create street and zipcode variables--
            mi1040CR52a_str1 = '(Number, Street, Apt. #, City, State, ZIP Code)'
            mi1040CR52a_str2 = '53. Total rent you paid (not more than 12 months)'
            mi1040CR_52str = sublist_1up_1low_ch_count(mi1040CR_exist, mi1040CR_sublist, mi1040CR52a_str1, mi1040CR52a_str2, 0, -4)

            #--if address string at 1040CR 52a is not empty, extract address-
            try:
                #--account for when there is one address in 52a
                if len(mi1040CR_52str) <= 6:
                    mi1040CR_52a_street1 = re.split(r'\s{4,}', mi1040CR_52str[0])[0]
                    mi1040CR_52astr_zip1 = re.split(r'\s{4,}', mi1040CR_52str[1])[0]
                    mi1040CR_52a_zip1 = re.findall(r'\d{5}', mi1040CR_52astr_zip1)[0]
                    mi1040CR_52a_street2 = 'False'; mi1040CR_52a_zip2 = 'False'

                else:
                    #--address 1:
                    mi1040CR_52a_street1 = re.split(r'\s{4,}', mi1040CR_52str[0])[0]
                    mi1040CR_52astr_zip1 = re.split(r'\s{4,}', mi1040CR_52str[1])[0]
                    mi1040CR_52a_zip1 = re.findall(r'\d{5}', mi1040CR_52astr_zip1)[0]

                    #--address 2:
                    mi1040CR_52a_street2 = re.split(r'\s{4,}', mi1040CR_52str[3])[0]
                    mi1040CR_52astr_zip2 = re.split(r'\s{4,}', mi1040CR_52str[4])[0]
                    mi1040CR_52a_zip2 = re.findall(r'\d{5}', mi1040CR_52astr_zip2)[0]


                    #--TODO: account for number of addresses over 2

            #--otherwise, return False--
            except: 
                mi1040CR_52a_street1 = 'False'; mi1040CR_52a_zip1 = 'False'; mi1040CR_52a_street2 = 'False'; mi1040CR_52a_zip2 = 'False'

            #---58.---
            #--Extract line 58 string; create street and zipcode variables--
            mi1040CR_58str = sublist_1up_1low_ch_count(mi1040CR_exist, mi1040CR_sublist, mi1040CR_text2, mi1040CR_text3, 0, -1)

            #--If Address string at 1040CR line 58 is not empty, do the following--
            try: 
                mi1040CR_58str = mi1040CR_58str[0].lstrip().rstrip().upper().split(',')
                mi1040CR_58street = mi1040CR_58str[0]
                mi1040CR_58zip = re.findall(r'\d{5}', mi1040CR_58str[1])[0]
            #--otherwise, return False--
            except:
                mi1040CR_58street = 'False'
                mi1040CR_58zip = 'False'


        else:
            mi1040CR_11 = 0; mi1040CR_31 = 0; mi1040CR_33 = 0; mi1040CR_44 = 0; mi1040CR_53=0; mi1040CR_54a = 0; mi1040CR_54b = 0
            mi1040CR_55 = 0; mi1040CR_57 = 0; mi1040CR_52a_street = 'False'; mi1040CR_52a_zip = 'False'
            mi1040CR_58street = 'False'; mi1040CR_58zip = 'False'

    #     print('Complete: mi1040CR variables') #--Used for debugging--

        #================================
        #---------MI Schedule 1----------
        #---------Variables--------------
        #===============================

        #======== 24 ========
        str1_sched1_24 = 'Pension Schedule. Include Form 4884'

        if miSched1_exist_subtractions == 'True':
            try:
    #             print('miSched1 True')
                sched24 = find_string_1bounds(str1_sched1_24, text_file) #--Find and extract substring with Schedule 1 line 24
                sched1_line24 = sched24.split(' 24. ')[1] #--Extract string value after 24.
                sched1_line24 = sched1_line24.split() #--Separate trailing 0's from numerical value, if numerical value exists

                if len(sched1_line24) > 1:
                    miSched1_24 = int(sched1_line24[0])
                else:
                    miSched1_24 = 0
            except:
                miSched1_24 = 0
                print('MI Schedule 1: only additions sheet')
        else:
    #         print('miSched1 False')
            miSched1_24 = 0
    #     print('Complete: miSchedule 1 variables') #--Used for debugging--

        #=====================================================================
        #---------MI Schedule NR: Loop 1---------
        #--Checks if Nonresident or Part-Year Resident checked
        #--Extracts strings with dates; Removes excess text and 2019 from strings
        #--Sets variables to 0's if no dates in boxes or Schedule NR doesn't exist
        #=====================================================================

        from_str1 = 'FROM: '
        to_str1 = 'Enter dates of Michigan residency in 2019*'
        non_res_str = 'X     Nonresident'
        partyr_res_str = 'b.    X     Part-Year Resident of Michigan'

        if miSchedNR_exist == 'True':
            #--Check if individual was non_resident or part_yr_resident--
            non_resident = check_form_exist_1bound(non_res_str, miSchedNR_sublist) #--returns string
            part_yr_resident = check_form_exist_1bound(partyr_res_str, miSchedNR_sublist)

            #--Extract FROM: and TO: Residency Date strings--
            from_text = find_string_1bounds(from_str1, miSchedNR_sublist) #--From Dates
            to_text = find_string_1bounds(to_str1, miSchedNR_sublist) #--To Dates

            #--Check for dates--
            from_match = re.search('\d{2}-\d{2}-', from_text) #--returns <class 're.Match'>
            to_match = re.search('\d{2}-\d{2}-', to_text)

            #--if the match exists, there's a date in the string
            if from_match != None:
                #--remove 2019 from from_text and to_text--
                remove_yr = '2019'
                from_str_no_yr = from_text.replace(remove_yr,"") 
                to_str_no_yr = to_text.replace(to_str1, "").replace(remove_yr, "") #--removes excess text and year
    #             print('Yes, dates in residency status boxes')
                #--YES dates in Schedule NR, proceed to next loop and parse dates
            else:
                #--NO dates in Schedule NR, set values to 0's
    #             print('No dates in Schedule NR')
                from_str_no_yr = 'False'; to_str_no_yr = 'False'
                miSchNR_c1 = 0; miSchNR_c2 = 0; miSchNR_s1 = 0; miSchNR_s2 = 0; month_residence_client = 0; month_residence_spouse = 0

        else:
            non_resident = 'False'; part_yr_resident = 'False'; from_text = 'False'; to_text = 'False'
            miSchNR_c1 = 0; miSchNR_c2 = 0; miSchNR_s1 = 0; miSchNR_s2 = 0; month_residence_client = 0; month_residence_spouse = 0
    #     print('Complete: miSChedule NR Loop1') #--Used for debugging--

        #==============================================
        #---------MI Schedule NR: Loop 2---------
        #--Checks 2 main conditions, whether part-yr resident or nonresident
        #--If part-yr resident, then extracts variables for client 
        #--If part-yr resident and spouse exists, grabs spouse variables
        #==============================================

        if miSchedNR_exist == 'True' and part_yr_resident == 'True':
            if len(from_str_no_yr.split()) == 1: #--only 'FROM:' is in the list
                #--set values to 0's
                miSchNR_c1 = 0; miSchNR_c2 = 0; miSchNR_s1 = 0; miSchNR_s2 = 0; month_residence_client = 0; month_residence_spouse = 0

            elif len(from_str_no_yr.split()) > 1: #--dates are in list as well as 'FROM:'
                #--get client FROM: variables
                from_dates_lst = from_str_no_yr.replace('FROM:', "").split()  #--remove FROM:, then create date list
                miSchNR_c1 = from_dates_lst[0] + '2019'
                client_from_dt = datetime.strptime(miSchNR_c1, '%m-%d-%Y').date() #--create datetime object from date string

                #--get client variables TO: variables
                to_dates_lst = to_str_no_yr.replace('TO:', "").split()  #--remove TO:, then create date list
                miSchNR_c2 = to_dates_lst[0] + '2019'
                client_to_dt = datetime.strptime(miSchNR_c2, '%m-%d-%Y').date() #--create datetime object from date string

                #--get client # months in residence in MI--
                r = relativedelta.relativedelta(client_to_dt, client_from_dt)
                month_residence_client = r.months #--number of months client has lived in Michigan

                if spouse == True:
                    #--if dates exist: return values for spouse dates
                    miSchNR_s1 = from_dates_lst[1] + '2019'
                    spouse_from_dt = datetime.strptime(miSchNR_s1, '%m-%d-%Y').date() #--create datetime object from date string

                    miSchNR_s2 = to_dates_lst[1] + '2019'
                    spouse_to_dt = datetime.strptime(miSchNR_s2, '%m-%d-%Y').date() #--create datetime object from date string

                    #--get spouse # months in residence in MI--
                    r = relativedelta.relativedelta(spouse_to_dt, spouse_from_dt)
                    month_residence_spouse = r.months #--number of months client has lived in Michigan

                else:
                    #--if no spouse, return 0's for spouse values 
                    miSchNR_s1 = 0; miSchNR_s2 = 0; month_residence_spouse = 0  
            else:
                print("WARNING: Did you mean to check Part-Year Resident, no date values available for Schedule NR. Please check form.")

        elif miSchedNR_exist == 'True' and part_yr_resident == 'False':
            if len(from_str_no_yr.split()) == 1:
                #--set values to 0's
                miSchNR_c1 = 0; miSchNR_c2 = 0; miSchNR_s1 = 0; miSchNR_s2 = 0; month_residence_client = 0; month_residence_spouse = 0

            elif len(from_str_no_yr.split()) > 1: #--dates are in list as well as 'FROM:'
                m = "WARNING: Did you mean to check Nonesident, date are values available for Schedule NR. Please check form. \n"
                final_message = final_message + m
            #--check for date values for client
            #--if date values exist: print("Did you mean to check Nonresident, date values are available for Schedule NR")
            #--else: if date values don't exist: set all values to 0's 
            else:
                m = 'WARNING: No dates in Schedule NR, check form' #--Redo this part
                final_message = final_message + m

        else:
            pass #--Placeholder for now--

    #     print('Complete: miSChedule NR Loop2')


        #===========================================================
        #--RULE 1: 

        # Check if the client is receiving Social Security and under 
        # the age of 66.  May be eligible for MI disability exemption.
        #===========================================================
        #--Text for print out messages --
        no_errors = 'No warnings or errors'

        #--Leave if/else with spouse, or will not capture ages correctly--
        rule1 = None
        rule1_error_message = 'WARNING: Check if client is eligible for Michigan disability exemption'

        if spouse == True: 
            if mi1040exist == 'True' and Fed1040_5a > 0 and (client_age < 66 or spouse_age < 66):
                rule1 = True
            else:
                rule1 = False

            if rule1 == True:
                if mi1040_9b == 0:
                    rule1_df = rule1_error_message
                    final_message = final_message + rule1_df + '\n'
                else: 
                    #--if MI1040_9b > 0 then there are no warnings
    #                 print('Rule 1 = PASS')
                    rule1_df = no_errors
            else:
                rule1_df = no_errors + '1'

        else: #--If there is no spouse
            if mi1040exist == 'True' and Fed1040_5a > 0 and (client_age < 66):
                rule1 = True
            else:
                rule1 = False

            if rule1 == True:
                if mi1040_9b == 0:
                    rule1_df = rule1_error_message
                    final_message = final_message + rule1_df + '\n'
                else:
                    rule1_df = no_errors
            else:
                rule1_df = no_errors

        #==========================================================
        #--RULE 2: 

        # If client is 66 or older, MI does not allow a disability 
        # exemption to be claimed for TPD (only for specific cases listed)
        #==========================================================
        rule2_error_message = 'WARNING: Client is 66 or older.  Only eligible for this credit if deaf, blind, hemiplegic, paraplegic, or quadriplegic.'
        rule2 = None
        if mi1040exist == 'True' and ((client_age >= 66 and spouse_age >= 66) or client_age >= 66): #--account for there being no sposue
            rule2 = True
        else:
            rule2 = False

        if rule2 == True:
            if mi1040_9b > 0:
                rule2_df = rule2_error_message
                final_message = final_message + rule2_df + '\n'
            else:
                rule2_df = no_errors
        else:
            rule2_df = no_errors

        #=============================================================
        #--RULE 4: 

        # If the client is born before 1946 and has retirement income, 
        # this income can be subtracted from MI income.  
        # Check for required MI tax from is present. 
        #=============================================================

        rule4_error_message = """ERROR: Client has retirement income that can be subtracted from MI income. Check that the MI subtractions from income for retirement income are entered correctly."""
        sum10404b4d = Fed1040_4b + Fed1040_4d
        if spouse == True: 
            if mi1040exist == 'True' and (sum10404b4d > 0) and ((client_birthyr < 1946 or spouse_birthyr < 1946)):
                rule4 = 'True'
            else:
                rule4 = 'False'

            if rule4 == 'True':
                if miSched1_exist_subtractions == 'True':
                    rule4_df = no_errors
                else:
                    rule4_df = rule4_error_message
                    final_message = final_message + rule4_df + '\n'
            else:
                rule4_df = no_errors

        else: #--if there's no spouse--  
            if mi1040exist == 'True' and (sum10404b4d > 0) and client_birthyr < 1946:
                rule4 = 'True'
            else:
                rule4 = 'False'

            if rule4 == 'True':
                if miSched1_exist_subtractions == 'True':
                    rule4_df = no_errors
                else:
                    rule4_df = rule4_error_message
                    final_message = final_message + rule4_df + '\n'
            else:
                rule4_df = no_errors
    #     print('Complete Rules 1-4') #--Used for debugging--



        #========================================================
        #--RULE 5: 

        # Check that the correct amount of retirement income is 
        # subtracted if born before 1946 with retirement income.
        #========================================================

        rule5_error_message = 'ERROR: Check that all MI retirement income subtractions are entered.'
        min_val_1 = min(Fed1040_4b + Fed1040_4d, 52808)
        min_val_2 = min(Fed1040_4b + Fed1040_4d, 105615)

        if spouse == True:
            if mi1040exist == 'True' and miSched1_exist_subtractions == 'True' and (sum10404b4d > 0) and (client_birthyr < 1946 or spouse_birthyr < 1946):
                rule5 = 'True'
    #             print('True1')
            else:
                rule5 = 'False'

            if rule5 == 'True':
                if mi1040_7a == 'True' or mi1040_7c == 'True':
                    if miSched1_24 == min_val_1:
    #                     print('no errors 7ac')
                        rule5_df = no_errors
                    else:
                        rule5_df = rule5_error_message
                        final_message = final_message + rule5_error_message + '\n'
    #                     print('error 5a')

                elif mi1040_7b == 'True':
                    if miSched1_24 == min_val_2:
    #                     print('no errors 7b')
                        rule5_df = no_errors
                    else:
                        rule5_df = rule5_error_message
                        final_message = final_message + rule5_error_message + '\n'
    #                     print('error 5b')

                else:
                    rule5_df = no_errors
    #                 print('No errors, else')
            else:
    #             print('no errors1')
                rule5_df = no_errors


        else: #--if there is no spouse
            if mi1040exist == 'True' and miSched1_exist_subtractions == 'True' and sum10404b4d > 0 and client_birthyr < 1946:
                rule5 = 'True'
    #             print('True2')
            else:
                rule5 = 'False'
    #             print('False')

            if rule5 == 'True':
                if mi1040_7a == 'True' or mi1040_7c == 'True':
                    if miSched1_24 == min_val_1:
    #                     print('no errors 7a2')
                        rule5_df = no_errors
                    else:
                        rule5_df = rule5_error_message
                        final_message = final_message + rule5_error_message + '\n'
    #                     print('error 5a2')

                elif mi1040_7b == 'True':
                    if miSched1_24 == min_val_2:
    #                     print('no errors 7b2')
                        rule5_df = no_errors
                    else:
                        rule5_df = rule5_error_message
                        final_message = final_message + rule5_error_message + '\n'
    #                     print('error 5b2')
                else:
                    rule5_df = no_errors
    #                 print('No errors, else2')
            else:
                rule5_df = no_errors
    #             print('No errors, else3')


        #=========================================================
        #--RULE 10: 

        # Check if the client if the client is eligible for the 
        # homestead property tax credit based on income and residency.  
        # If eligible based on these two criteria, check that the 
        # credit is entered.  Set warning to check if not. 
        #=========================================================

        rule10_error_message = "WARNING: Check if client is eligible for Homestead Property Tax Credit"

        if mi1040_8a == 'True' and mi1040exist == 'True' and mi1040_12 < 60000: #--don't need # months residence here, 8a refers to resident
            rule10 = 'True'
        elif mi1040_8c == 'True' and mi1040exist == 'True' and mi1040_12 < 60000 and (month_residence_client >= 6 or month_residence_spouse >= 6):
            rule10 = 'True'
        else:
            rule10 = 'False'

        if rule10 == 'True':
            if mi1040CR_exist == 'True':
                rule10_df = no_errors
            else:
                rule10_df = rule10_error_message
                final_message = final_message + rule10_df + '\n'
        else:
            rule10_df = no_errors

        #=================================================================
        #--RULE 11: 

        # If the client is not eligible for the homestead property tax credit 
        # based on residency qualification (less than 6 months or NR), 
        # ensure that it is not included in the tax return.
        #=================================================================
        rule11_error_message = 'ERROR: Client was not a Michigan resident for at least 6 months and therefore is not eligible for the Homestead Property Tax Credit'

        #--IF TRUE--
        if spouse == True:
            if mi1040exist == 'True' and mi1040_8c == 'True' and mi1040_7b == 'True' and (month_residence_client <= 6 and month_residence_spouse <= 6):
    #             print('yes')
                rule11 = 'True'
            else:
                rule11 = 'False'
    #             print('False1')
        else:
            if mi1040exist == 'True' and mi1040_8c == 'True' and (month_residence_client <= 6):
    #             print('yes2')
                rule11 = 'True'
            else:
                rule11 = 'False'
    #             print('False2')

        #--THEN CHECK--
        if rule11 == 'True' and mi1040CR_exist == 'False':
            rule11_df = no_errors
    #         print('rule 11 NO errors')
        elif rule11 == 'True' and mi1040CR_exist == 'True':
            rule11_df = rule11_error_message
            final_message = final_message + rule11_df + '\n'
    #         print(rule11_df)
        else:
            rule11_df = no_errors
    #         print('rule 11 NO errors')
    #     print('Complete Rules 5-11')

        #=================================================================
        #--RULE 12: 

        # If the client lives in service fee housing, check that subsidized
        # housing is not also checked for homestead credit.
        #=================================================================
        rule12_error_message = 'ERROR: If HPTC includes Service Fee Housing, do not select Subsidized Housing even if rent is subsidized.'

        if mi1040CR_exist == 'True' and mi1040CR_54b == 'True':
            rule12 = 'True'
        elif mi1040CR_exist == 'True' and mi1040CR_54b == 'False':
            #--this makes sure 54a and 54b aren't checked at the same time
            rule12 = 'False'
        else:
            rule12 = 'False'

        if rule12 =='True':
            if mi1040CR_54a == 'False':
                rule12_df = no_errors
            else:
                rule12_df = rule12_error_message
                final_message = final_message + rule12_df + '\n'
        else:
            rule12_df = no_errors

        #=================================================================
        #--RULE 13: 

        # Check if Total Household Resources are low compared to rent
        #=================================================================
        rule13_error_message = 'WARNING: Check if there are additional Total Household Resources (THR) that need to be included or if the return should be paper filed with a letter explaining rent vs. THR.'
        temp = mi1040CR_33 * 0.75
        mi1040CR53and55 = mi1040CR_53 + mi1040CR_55

        if mi1040CR_exist == 'True' and mi1040CR_44 > 0:
            rule13 = 'True'
        else:
            rule13 = 'False'

        if rule13 == 'True':
            if mi1040CR53and55 < temp:
                rule13_df = no_errors
    #             print(rule13_df)
            else:
                rule13_df = rule13_error_message
                final_message = final_message + rule13_df + '\n'
    #             print(rule13_df)
        else:
            rule13_df = no_errors

        #=================================================================
        #--RULE 14: 

        # If homestead property tax credit is included, check for 
        # insurance deduction from Total Household resources
        #=================================================================
        rule14_error_message_line31_is0 = 'WARNING: Check if the client insures a car or pays out-of-pocket insurance expenses (such as supplemental Medicare plan)'
        rule14_error_message_line31_gr0 = 'WARNING: Check to ensure that Medicare premiums included on the SSA-1099 are not included in line MI-1040CR line 31'

        if mi1040CR_exist == 'True' and mi1040CR_44 > 0:
            rule14 = 'True'
        else:
            rule14 = 'False'

        if rule14 == 'True': 
            if mi1040CR_31 == 0:
                rule14_df = rule14_error_message_line31_is0
                final_message = final_message + rule14_df + '\n'
            elif mi1040CR_31 > 500:
                rule14_df = rule14_error_message_line31_gr0
                final_message = final_message + rule14_df + '\n'
            else:
                rule14_df = no_errors
        else:
            rule14_df = no_errors

        #=================================================================
        #--RULE 15: 

        # Check that rent is not entered into two locations in the 1040CR
        # form for homestead property tax credit
        #=================================================================
        rule15_error_message = 'WARNING: Rent payments entered in both Parts 4 and 5 for MI-1040CR. Verify this is correct.'

        if mi1040CR_exist == 'True' and mi1040CR_53 > 0:
            rule15 = 'True'
        else:
            rule15 = 'False'

        if rule15 == 'True':
            if mi1040CR_55 == 0 and mi1040CR_57 == 0:
                rule15_df = no_errors
            else:
                rule15_df = rule15_error_message
                final_message = final_message + rule15_df + '\n'

        else:
            rule15_df = no_errors

        #=================================================================
        #--RULE 16: 

        # Check that Special Housing (Co-op, etc.) pro-rated taxes
        # are realistic
        #=================================================================
        rule16_error_message = 'WARNING (MI-1040CR line 57): Check that the prorated taxes for the Special Housing were entered correctly.'

        if mi1040CR_exist == 'True' and mi1040CR_57 > 0:
            rule16 = 'True'
        else:
            rule16 = 'False'

        if rule16 == 'True':
            if mi1040CR_57 < 2500:
                rule16_df = no_errors
            else:
                rule16_df = rule16_error_message
                final_message = final_message + rule16_df + '\n'
        else:
            rule16_df = no_errors

        #========================================================
        #--RULE 17a: 

        # Check address in MI1040CR line 58 (Part 5) for service fee
        #========================================================






        #========================================================
        #--RULE 17b: 

        # Check address in MI1040CR line 58 (Part 5) for tax exempt
        #========================================================





        #========================================================
        #--RULE 17c: 

        # Check address in MI1040CR line 52 (Part IV) for 
        # service fee or tax exempt
        #========================================================







    #     print('Complete Rules 1-17') #--Used for debugging--

        #=================================================================
        #---Create Dateframe of Variables and Values--
        #=================================================================
        pd.set_option('display.max_columns', None) #--Setup to visualize all columns in dataframe during printout
        pd.set_option('display.max_rows', None)

        #--col_lst is in a cell above this one--

        var_lst = [file, run_code_datetime,timestamp_str ,client_name, client_age, client_birthyr, spouse, spouse_age, spouse_birthyr,
                   Fed1040SR_exist, Fed1040_exist,Fed1040_4b, Fed1040_4d, Fed1040_5a,
                  mi1040exist, mi1040_7a, mi1040_7b, mi1040_7c, mi1040_8a, mi1040_8b, mi1040_8c, mi1040_9b, mi1040_12,
                  miSched1_exist_subtractions, miSched1_24, miSchedNR_exist, miSchNR_c1, miSchNR_c2, miSchNR_s1, miSchNR_s2,
                   month_residence_client, month_residence_spouse, mi1040CR_exist, mi1040CR_11, mi1040CR_31, mi1040CR_33, mi1040CR_44, 
                   mi1040CR_53, mi1040CR_54a, mi1040CR_54b, mi1040CR_55, mi1040CR_57, 
                   rule1_df, rule2_df, rule4_df, rule5_df, rule10_df, rule11_df, rule12_df, rule13_df, rule14_df, rule15_df, rule16_df]

        #--create dataframe for single client values
        tax_df = pd.DataFrame([var_lst], columns=col_lst)


        #=================================================================
        #--Append single client values to all_client tax dataframe--
        #=================================================================
        all_tax_df = all_tax_df.append(tax_df)

        #=================================================================
        #--Combine Rules Text for .txt file: single client--
        #=================================================================
    #     results = 'RESULTS FOR: ' + client_name + '\n' + "Today's Date: " + str(run_code_datetime) + '\n' +  '====================================================== \n\n' + final_message

        results = 'RESULTS FOR: ' + client_name + '\n' + "Today's Date: " + str(run_code_datetime) + '\n'
        results = results + 'Filename: ' + file + '\n' +  '====================================================== \n' + final_message


        if len(results.split()) < 17:
            results = results + 'There are no Errors or Warnings for this client tax file.'
        else:
            pass

        all_tax_txt_file = all_tax_txt_file + results + '\n\n\n\n'
        formcount = formcount + 1
    #     print('Form complete: ' + str(formcount), 'Filename: ' + file) #--gives form number only
        print('Form ' + str(formcount) + ' complete;  ', 'Filename: ' + file) #--gives form number and file name
        
    except:
        #--if file doesn't run in tool, run the code below, which gives the name of the file that needs to be checked by hand
        name = file.replace('2019_Returns/', "")
        name = name.replace('.pdf', "")

        results = 'RESULTS FOR: ' + name + '\n'
        results = results + 'Filename: ' + file + '\n' +  '====================================================== \n'
        results = results + '1. CHECK THIS FORM BY HAND. 2. ADD THIS FILENAME TO GOOGLE DOC "Returns that broke tool".'
        all_tax_txt_file = all_tax_txt_file + results + '\n\n\n\n'
        formcount = formcount + 1
        print('\n*** ERROR in Form ' + str(formcount) + ' ***\nForm ' + str(formcount) + ' not complete;', '  Filename: ' + file  + '\n1. Check this form by hand, 2. Add Filename to Google Doc "Returns that broke tool" *** \n') #--gives form number and file name
        

mi1040CR_52a_street1 2666 SUNSHINE AVE
Form 1 complete;   Filename: 2019_Returns/2019_LiveSoftware_1040SR.pdf
mi1040CR_52a_street1 2666 SUNSHINE AVE
Form 2 complete;   Filename: 2019_Returns/2019_LiveSoftware_1040_MI_SCH1_ADDITIONS.pdf
mi1040CR_52a_street1 False
Form 3 complete;   Filename: 2019_Returns/2019_LiveSoftware_1040_NR.pdf
mi1040CR_52a_street1 702 SO TWENTY SIXTH
Form 4 complete;   Filename: 2019_Returns/2019_Rule17c_FAIL_4apartments.pdf


In [13]:
#=================================================================
#--EXPORT DATAFRAME to .csv and RULES TO .txt FILE--
#=================================================================

#--Create text file for all clients--
file = 'Client_output.txt'
txt_file = open(file, 'w')
n = txt_file.write(all_tax_txt_file)
txt_file.close()

#--Export Dataframe to.csv and .xls--
file_name = '2019_client_tax_information.csv'
all_tax_df.to_csv(file_name)