In [2]:
import re
import json
from itertools import chain

class NmiChecker:
    
    def __init__(self, json_file_path):
        with open(json_file_path) as json_file:
            self.json_data = json.load(json_file)

    @staticmethod
    def load_string_from_file(filename):
      try:
          with open(filename, 'r') as file:
              content = file.read()
              content = content.replace('\n', ' ')
              clean_string = ' '.join(content.split())
              clean_string = clean_string.strip()
              clean_string = re.sub('\W+',' ', clean_string )
              return clean_string
      except FileNotFoundError:
          print(f"The file '{filename}' was not found.")
          return None
      except Exception as e:
          print(f"An error occurred while reading the file: {e}")
          return None
    
    def generate(self, nmi):
        if len(nmi) > 10:
            nmi = nmi[0:10]
        ascii_values = chain(map(lambda x: ord(x) * 2, nmi[-1::-2]), map(lambda x: ord(x), nmi[-2::-2]))
        ascii_digits = ''.join(map(lambda x: str(x), ascii_values))
        digits = map(lambda x: ord(x) - ord('0'), ascii_digits)
        reduction = sum(digits)
        return (10 - (reduction % 10)) % 10

    def find_special_string(self, text):
        """
        -> [A-HJ-NP-Z0-9]*: Matches zero or more uppercase letters and digits (excluding 'O' and 'I')
        -> [0-9]: Matches exactly one digit
        -> [A-HJ-NP-Z0-9]*: Matches zero or more uppercase letters and digits (excluding 'O' and 'I')
        """
        pattern = r'\b[A-HJ-NP-Z0-9]*[0-9][A-HJ-NP-Z0-9]*\b'
        match = re.search(pattern, text)
        if match:
            found_string = match.group()
            if 'O' not in found_string and 'I' not in found_string:
                print(f"String found {found_string}")
                return found_string, True
        print("NMI not found in text")
        return None, False

    def compare_checksum(self, text):
        result, found = self.find_special_string(text)
        if found:
            generated_digit = self.generate(result)
            last_character = int(result[-1])
            if generated_digit == last_character:
                return result, True
            else:
                print(f"Checksum failed for string {result} and generated_digit {generated_digit}")
                return None, False
        return None, False

    def check_in_json_range(self, numeric_value):
        num_value = int(numeric_value)
        for key, value in self.json_data.items():
            for range_entry in value['ranges']:
                if num_value >= int(range_entry['from']) and num_value <= int(range_entry['to']):
                    return (value['id'], value['state']), True
        print(f"{numeric_value} not in range of any AEOMO Range")
        return None, False

    def check(self, text):
        output, result = self.compare_checksum(text)
        if not result:
            return None, False
        numeric_data = ''.join(filter(str.isdigit, output))
        return self.check_in_json_range(numeric_data)



In [3]:
# Loading class and checking text output
checker = NmiChecker('aemo.json')
test_str = checker.load_string_from_file("203211.txt")
print(test_str)


Tax Invoice 202211 203211 Issue Date If undelivered please return to PO Box 3122 Newstead QLD 4006 17 Nov 2022 FINAL INVOICE ABN 42 636 908 220 Account Enquiries Pie Town Pty Ltd 450 Nicholson Street Fitzroy North VIC 3068 Electricity Account Previous Amount Payments Received 1 572 59 1 572 59 CR Opening Balance 6 29 New Charges 679 87 1300 707 042 customerservice gee com au www gee com au Office Hours Monday Friday 8 30am 5 00pm AEST Moving Out Visit our website or call us 5 business days before you move out Faults Emergencies Call Energex 24 hours on 13 62 62 Invoice Summary Amount Due 686 16 Usage and Supply For the Period 25 October 2022 7 November 2022 Your Account will be direct debited on Previous Amount Payment Received 11 11 2022 thank you Credit Card Merchant Service Fee Incl GST 1572 59 1572 59 CR 6 29 Due Date Opening Balance Customer Number Electricity charges please see over for details Standard Feed in Tariff 680 55 0 68 CR GST included in new charges New Total Charges 6

In [4]:
checker.check(test_str)

String found 202211
Checksum failed for string 202211 and generated_digit 4


(None, False)