# Algorithm: Making Determinations

The next step, now that we can identify relevant records in each supporting dataset, is to use that information to make the PII redaction decision for a particular card. This will follow the procedure we outlined on the same flowchart as before. This is copied below for your reference.

<img src="Pics/flowchart.png" />

The outer loop is there to indicate that we will process all of the 250 incident cards for PII. However, we can start by writing a Python function that makes that determination for just one card.

In [1]:
import pandas as pd

# reading in all of the data
data_Card = pd.read_csv( "Datasets/Cards_Box9.csv" )
data_Form26 = pd.read_csv("Datasets/WRAForm26.csv")
data_FAR = pd.read_csv("Datasets/TuleLake_FAR_ALL_FINAL4.csv")

# indexing the supporting datasets
g_FAR = data_FAR.groupby(data_FAR['LastName'].str.lower())
g_dataForm26 = data_Form26.groupby(data_Form26['LastName'].str.lower())

In [2]:
def lookup_form26(last, first, other):  # note the additional "other" parameter
    """Looks up a name in Form 26, starting with the last name index."""
    last = last.lower()
    if g_dataForm26.groups.get(last) is None:
        return None  # No match on last name
    for i in list(g_dataForm26.groups.get(last)):
        val = data_Form26.values[i]
        if first is not None and pd.notna(val[1]):
            if first.lower() == val[1].lower():
                return val  # if both names match, return this record
        elif other is not None and pd.notna(val[1]):
            if other.lower() == val[1].lower():
                return val  # card other name matches form 26 first name
    return None  # if we reach the end of the family group w/o finding a matching value.

In [3]:
def lookup_far(last, first, other):
    """Looks up a name in FAR, starting with the last name index."""
    last = last.lower()
    if g_FAR.groups.get(last) is None:
        return None  # No match on last name
    for i in list(g_FAR.groups.get(last)):
        val = data_FAR.values[i]
        if first is not None and pd.notna(val[2]):
            if first.lower() == val[2].lower():
                return val  # if both names match, return this record
        elif other is not None and pd.notna(val[3]):
            if other.lower() == val[3].lower():
                return val  # card other name matches form 26 first name
    return None  # if we reach the end of the family group w/o finding a matching value.

In [33]:
from datetime import datetime, timedelta as delta

age_of_majority = 18
weeks_per_year = 52.1429


def reasonsToRedact(index):  # i is the integer index of the card requested
    """Determines if a particular card, indicated by it's data_Card index, 
    is releasable. It returns a list of reasons the data cannot be released
    or an empty list if it may be released."""
    result = []  # this is where we will add redaction reasons
    
    # check the not inmate column first, as these row don't have names
    if pd.notna( data_Card.iloc[ index, 1 ] ):  
        # if nan, that indicates not an incarceree
        result.append('Not an incarceree')
        return result
    
    # first we read some card data into local variables
    # we have to make sure to replace NaN with None
    last_name = data_Card.iloc[ index, 2 ]
    last_name = None if pd.isna(last_name) else last_name
    first_name = data_Card.iloc[ index, 3 ]
    first_name = None if pd.isna(first_name) else first_name
    other_name = data_Card.iloc[ index, 4 ]
    other_name = None if pd.isna(other_name) else other_name
    incident_date_str = data_Card.iloc[ index, 5 ]
    incident_date = datetime.strptime(incident_date_str,'%m/%d/%y')
    incident_date = incident_date.replace(year=incident_date.year - 100)  # b/c Python assumes 2-digit years are 20XX
    incident_year = int(data_Card.iloc[ index, 6 ])
    
    far_match = lookup_far(last_name, first_name, other_name)
    if far_match is not None:
        birth_date_str = far_match[4]
        birth_date = datetime.strptime(birth_date_str,'%m/%d/%Y')
        weeks_until_majority = (age_of_majority + 1) * weeks_per_year
        majority_date = birth_date + delta(weeks=weeks_until_majority)
        if majority_date > incident_date:
            result.append("FAR birthdate indicates still a minor")
    f26_match = lookup_form26(last_name, first_name, other_name)
    if f26_match is not None:
        birth_year = f26_match[2]
        majority_year = birth_year + 19
        if majority_year > incident_year:
            result.append("Form 26 indicates still a minor")
    if far_match is None and f26_match is None:
        result.append('No supporting data found')
    return result

In [34]:
# let's do some testing
print(data_Card.values[1])
print(reasonsToRedact(1))
print('\n')
print(data_Card.values[2])
print(reasonsToRedact(2))
print('\n')
print(data_Card.values[3])
print(reasonsToRedact(3))
print('\n')
print(data_Card.values[6])
print(reasonsToRedact(6))
print('\n')
print(data_Card.values[112])
print(reasonsToRedact(112))

['Box9-0692.jpg' nan 'Ebesu' 'Kikumatsu' nan '7/24/42' 1942]
['No supporting data found']


['Box9-0642.jpg' nan 'Doi' 'Satomi' nan '8/6/42' 1942]
[]


['Box9-0765.jpg' nan 'Endo' nan 'Herbert' '8/25/42' 1942]
['FAR birthdate indicates still a minor', 'Form 26 indicates still a minor']


['Box9-0632.jpg' nan 'Doi' 'Kanjiro' nan '10/3/42' 1942]
[]


['Box9-0196.jpg' 'Y' nan nan nan '3/11/44' 1944]
['Not an incarceree']


## Testing for a Different Age

This box 9 incident card dataset has already been redacted, which is why we can share it with you. So
how can we know that the algorithm works well and how many determinations it can make?

Since we made the age_of_majority a variable, we can change it just for testing purposes..

In [35]:
age_of_majority = 75

print(data_Card.values[111])
print(reasonsToRedact(111))

['Box9-1053.jpg' nan 'Fujii' 'Yasuko' nan '3/7/44' 1944]
['FAR birthdate indicates still a minor']


**NOTE**: You will have to change the variable back to 18 before the code will run normally.

Let's go ahead and do that and then make sure that all of the PII has already been redacted.

In [37]:
age_of_majority = 18

count = 0
for i in range(0, 113):
    reasons = reasonsToRedact(i)
    if(len(reasons) > 0):  # should be redacted!
        if 'Not an incarceree' in reasons:
            # oops nevermind, it was a staffer..
            continue
        else:
            count = count + 1
            print(str(data_Card.values[i]) +" for these reasons " + str(reasons))
print(count)

['Box9-0692.jpg' nan 'Ebesu' 'Kikumatsu' nan '7/24/42' 1942] for these reasons ['No supporting data found']
['Box9-0765.jpg' nan 'Endo' nan 'Herbert' '8/25/42' 1942] for these reasons ['FAR birthdate indicates still a minor', 'Form 26 indicates still a minor']
['Box9-0780.jpg' nan 'Enjoki' nan 'George' '1/10/43' 1943] for these reasons ['No supporting data found']
['Box9-0015.jpg' nan 'Tujii' 'Yoshio' nan '11/4/43' 1943] for these reasons ['No supporting data found']
['Box9-0168.jpg' nan 'Antoku' 'Teru' 'Charles' '11/4/43' 1943] for these reasons ['No supporting data found']
['Box9-0169.jpg' nan 'Antoku' nan 'Charles' '11/4/43' 1943] for these reasons ['No supporting data found']
['Box9-0201.jpg' nan 'CHI-no-maki' nan nan '11/4/43' 1943] for these reasons ['No supporting data found']
['Box9-0509.jpg' nan 'Ayoama' 'Dan' nan '11/4/43' 1943] for these reasons ['No supporting data found']
['Box9-0641.jpg' nan 'Doi' 'Noburo' nan '11/4/43' 1943] for these reasons ['No supporting data found']