<a href="https://colab.research.google.com/github/clarkde5/jams-pub/blob/main/colabs/jams-env-doctr-lease.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Environment**

## **Bootstrap public repo**

In [None]:
!git clone https://github.com/clarkde5/jams-pub src/jams-pub

## **Pull private resources**

In [None]:
runOptional = True
def CallPrivateEnvSetup():
  from pathlib import Path
  from google.colab import userdata
  try:
    rsa_private_key = userdata.get('PrivateKey')
  except:
    rsa_private_key = ""
    print("PrivateKey is required, but not found in secrets / colab userdata")
    return

  !python src/jams-pub/env-setup.py "{rsa_private_key}"

  home = str(Path.home())
  if not Path(f"{home}/.ssh/id_rsa").is_file():
    print("env-setup failed to create rsa key")
    return

  if not Path(f"src/jams").is_dir():
    !git clone git@github.com:clarkde5/jams.git src/jams

if __name__ == "__main__":
  if runOptional == True:
    CallPrivateEnvSetup()
  else:
    print("Not running optional cell")

## **(Optional) VS Code Integration**

In [None]:
runOptional = False
if runOptional == True:
  !pip install -U git+https://github.com/amitness/colab-connect.git

  from colabconnect import colabconnect

  colabconnect()
else:
  print("Not running optional cell")

# **Parse Lease Output**

**NOTE:** Requires [Pull private resources](https://colab.research.google.com/github/clarkde5/jams-pub/blob/main/colabs/jams-env-doctr.ipynb#scrollTo=CyQkr3hoftql)

In [None]:
runOptional = True
def getContractsForPage(page_idx,page):
  import re
  CurrentFound = page_idx != 0
  contracts = []

  for block in page["blocks"]:
    for line in block["lines"]:
      for word_idx,word in enumerate(sorted(line["words"], key = lambda x: x["geometry"][0][1])):
        if re.search("CURRENT",word["value"]):
          CurrentFound = True

        if not CurrentFound:
          continue

        if re.search("\d{3}-\d{7}-\d{3}",word["value"]):
          #print("Found Contract Number: " + word["value"])
          #contracts.append(word)
          contracts.append({"contract_number": word["value"], "pdf_y": word["geometry"][0][1]+page_idx, "page": page_idx+1})
        else:
          continue

  return contracts

def getSerialNumbersForPage(page_idx,page):
  import re
  CurrentFound = page_idx != 0
  serialNumbers = []

  for block in page["blocks"]:
    for line in block["lines"]:
      for word_idx,word in enumerate(sorted(line["words"], key = lambda x: x["geometry"][0][1])):
        if re.search("CURRENT",word["value"]):
          CurrentFound = True

        if not CurrentFound:
          continue

        if re.search("SERIAL",word["value"]):
          serial_number_word = line["words"][2]
          #print("Found SerialNumber: " + serial_number_word["value"])
          serialNumbers.append({"serial_number": serial_number_word["value"], "pdf_y": serial_number_word["geometry"][0][1]+page_idx, "page": page_idx+1})
        else:
          continue

  return serialNumbers

def getPaymentDue(page_idx,page):
  import re
  CurrentFound = page_idx != 0
  PleaseFound = False
  prices = []

  for block in page["blocks"]:
    for line in block["lines"]:
      for word_idx,word in enumerate(sorted(line["words"], key = lambda x: x["geometry"][0][1])):
        if re.search("PLEASE",word["value"]):
          PleaseFound = True

        if not PleaseFound:
          continue

        if re.search("CURRENT",word["value"]):
          CurrentFound = True

        if not CurrentFound:
          continue

        if re.search("-{0,1}\d+\.\d{2}",word["value"]):
          prices.append({"price": word["value"], "pdf_y": word["geometry"][0][1]+page_idx, "page": page_idx+1})
        else:
          continue

  return prices

def main():
  import json
  import re

  f = open('src/jams/output/Aug 23 Lease-docTR.json')
  data = json.load(f)
  #print(data["pages"][0]["blocks"][0]["lines"][0]["words"][0]["value"])

  #page_idx = 4
  #page = data["pages"][page_idx]
  contracts = []
  serialNumbers = []
  paymentDue = []

  for page_idx,page in enumerate(data["pages"]):
    contracts += getContractsForPage(page_idx,page)
    serialNumbers += getSerialNumbersForPage(page_idx,page)
    paymentDue += getPaymentDue(page_idx,page)

  #print(data["pages"][0]["blocks"][0]["lines"][0]["words"])
  #print(sorted(data["pages"][0]["blocks"][0]["lines"][0]["words"], key = lambda x: x["geometry"][0][1]))
  #return

  contractSerials = contracts + serialNumbers + paymentDue
  contractSerials = sorted(contractSerials, key = lambda x: x["pdf_y"])
  currentPage = 0

  for contractSerialPair in contractSerials:
    if contractSerialPair["page"] != currentPage:
      currentPage = contractSerialPair["page"]
      print("\r\nPage: " + str(currentPage) + "\r\n")

    if "contract_number" in contractSerialPair:
      print("Contract Number: " + contractSerialPair["contract_number"])
    elif "serial_number" in contractSerialPair:
      print("Serial Number: " + contractSerialPair["serial_number"])
    elif "price" in contractSerialPair:
        print("Due: " + contractSerialPair["price"])
    else:
      print("Error unknown pair: " + contractSerialPair)

  #for equipmentContractPair_idx,equipmentContractPair in enumerate(sorted(equipmentContract, key = lambda x: x["geometry"][0][1])):
  #  if re.search("\d{3}-\d{7}-\d{3}",equipmentContractPair["value"]):
  #    print("ContractNumber: " + equipmentContractPair["value"] + ",Y: " + str(equipmentContractPair["geometry"][0][1]))
  #  else:
  #    print("SerialNumber: " + equipmentContractPair["value"] + ",Y: " + str(equipmentContractPair["geometry"][0][1]))
  #json_formatted_str = json.dumps(data["pages"][0], indent=2)

  #print(json_formatted_str)

if __name__ == "__main__":
  if runOptional == True:
    main()
  else:
    print("Not running optional cell")