# CheckSum for nHealth

In [378]:
import random

In [469]:
raw_ids = ['PA0000712', 'PA0030821', 'PB4000394', 'PC0101041', 'PD0031157', 'PG0001218']

In [470]:
# Takes in the ID with characters
# Converts the characters to ASCII int
# Returns the digits which are then used to generate the checksum via Luhn algorithm
# we use the UPPERCASE ascii as it is always 2 digits
def convertIDToDigits(string):
    # strip off the first two characters and convert them to ascii 
    # append the numbers to the remaining digits
    # strip out any spaces or dashes
    s = string.replace('-', '')
    digits = s[2:]
    chars = s[:2]
    # reverse the list so when we append the numbers we preserve the order
    chars = list(reversed(chars))
    for c in chars:
        num = ord(c.upper())
        digits = str(num) + digits
    return(digits)

In [471]:
def convertIDToVarChar(string):
    # re-assemble the ID back to the original format
    # take the first four digits
    digits = string[4:]
    d_1 = digits[:4]
    d_2 = digits[4:]
    chars = string[:4]
    l_1 = chr(int(chars[:2]))
    l_2 = chr(int(chars[2:]))
    return l_1 + l_2 + '-' + d_1 + '-' + d_2

In [472]:
# from https://github.com/mmcloughlin/luhn/blob/master/luhn.py

def checksum(string):
    """
    Compute the Luhn checksum for the provided string of digits. Note this
    assumes the check digit is in place.
    """
    digits = list(map(int, string))
    odd_sum = sum(digits[-1::-2])
    even_sum = sum([sum(divmod(2 * d, 10)) for d in digits[-2::-2]])
    return (odd_sum + even_sum) % 10

def verify(string):
    """
    Check if the provided string of digits satisfies the Luhn checksum.
    >>> verify('356938035643809')
    True
    >>> verify('534618613411236')
    False
    """
    # if it contains only digits
    if not string.isdigit():
        string = convertIDToDigits(string)
    
    return (checksum(string) == 0)

def generate(string):
    """
    Generate the Luhn check digit to append to the provided string.
    >>> generate('35693803564380')
    9
    >>> generate('53461861341123')
    4
    """
    cksum = checksum(string + '0')
    return (10 - cksum) % 10

def append(string):
    """
    Append Luhn check digit to the end of the provided string.
    >>> append('53461861341123')
    '534618613411234'
    """
    return string + str(generate(string))

In [473]:
#raw_ids = ['PA-134-4561', 'PB-546-4522']
computed_ids = []
participant_ids = []

In [474]:
for id in raw_ids:
    id = convertIDToDigits(id)
    new_id = append(id)
    computed_ids.append(new_id)

In [475]:
print(computed_ids)

['806500007125', '806500308218', '806640003943', '806701010415', '806800311573', '807100012184']


In [476]:
# convert these basck to the required format
for id in computed_ids:
    p_id = convertIDToVarChar(id)
    participant_ids.append(p_id)
print(participant_ids)

['PA-0000-7125', 'PA-0030-8218', 'PB-4000-3943', 'PC-0101-0415', 'PD-0031-1573', 'PG-0001-2184']


In [477]:
# check LUHN
for id in participant_ids:
    print("id:", id, verify(id))

id: PA-0000-7125 True
id: PA-0030-8218 True
id: PB-4000-3943 True
id: PC-0101-0415 True
id: PD-0031-1573 True
id: PG-0001-2184 True


In [478]:
# augment the id and run the check again
# should be false
for id in participant_ids:
    # take the 6th digit
    d = id[6]
    #randomly replace it to generate a new ID
    new_id = id.replace(d, str(random.randint(1,9)))
    print("id:", new_id, verify(new_id))

id: PA-7777-7125 False
id: PA-3333-8218 False
id: PB-4999-3943 False
id: PC-0909-0495 False
id: PD-0031-1573 True
id: PG-0005-2584 False


In [479]:
for id in participant_ids:
    print("id:", id, verify(id))

id: PA-0000-7125 True
id: PA-0030-8218 True
id: PB-4000-3943 True
id: PC-0101-0415 True
id: PD-0031-1573 True
id: PG-0001-2184 True


In [480]:
# recheck that the computed_ids are still valid
for id in computed_ids:
    print("id:", id, verify(id))

id: 806500007125 True
id: 806500308218 True
id: 806640003943 True
id: 806701010415 True
id: 806800311573 True
id: 807100012184 True


In [481]:
# print out the final participant_id generated
print("From the raw input\n")
print(raw_ids)
print("\nWe generate these Ids with check-sums. Hyphens optional \n")
print(participant_ids)

From the raw input

['PA0000712', 'PA0030821', 'PB4000394', 'PC0101041', 'PD0031157', 'PG0001218']

We generate these Ids with check-sums. Hyphens optional 

['PA-0000-7125', 'PA-0030-8218', 'PB-4000-3943', 'PC-0101-0415', 'PD-0031-1573', 'PG-0001-2184']
