In [1]:
import re
from collections import Counter

In [2]:
rgx = re.compile("(\w{3}):(\S+)")

In [3]:
def parse_passports(fn):
    with open(fn, mode="r") as f:
        one_string = f.read()
        split = [s.replace('\n', ' ') for s in one_string.split('\n\n')]
        return [rgx.findall(passport) for passport in split]

In [4]:
passports = parse_passports("./inputs/4")

In [5]:
passports[:2]

[[('hgt', '176cm'),
  ('iyr', '2013'),
  ('hcl', '#fffffd'),
  ('ecl', 'amb'),
  ('byr', '2000'),
  ('eyr', '2034'),
  ('cid', '89'),
  ('pid', '934693255')],
 [('hcl', '#b5c3db'),
  ('ecl', 'grn'),
  ('hgt', '155cm'),
  ('pid', '#baec97'),
  ('iyr', '2017'),
  ('byr', '1939'),
  ('eyr', '2020')]]

In [6]:
required_prefixes = ['byr', 'iyr', 'eyr', 'hgt', 'hcl', 'ecl', 'pid']
optional_prefixes = ['cid']
all_prefixes = required_prefixes + optional_prefixes

In [7]:
for passport in passports:
    assert len(passport) <= 8
    assert all(f in all_prefixes for (f, v) in passport)

In [8]:
def passport_valid(passport):
    count_prefix = Counter([prefix for (prefix, value) in passport])
    
    # every required prefix appears exactly once
    for required in required_prefixes:
        if count_prefix[required] != 1:
            return False
    
    # every optional prefix appears zero or one time
    for optional in optional_prefixes:
        c = count_prefix[optional]
        if not (c == 0 or c == 1):
            return False

    return True

In [9]:
test_passports = parse_passports("./inputs/4_test")

In [10]:
num_valid_test = sum([passport_valid(p) for p in test_passports])

assert num_valid_test == 2

In [11]:
sum([passport_valid(p) for p in passports])

230

In [14]:
def year_valid(low, high):
    rgx = "\d{4}"

    def valid(s):
        if not re.match(rgx, s):
            return False
    
        year = int(s)
    
        return year >= low and year <= high

    return valid

byr_valid = year_valid(1920, 2002)
iyr_valid = year_valid(2010, 2020)
eyr_valid = year_valid(2020, 2030)

def hgt_valid(s):
    rgx = "(\d+)(in|cm)"
    
    m = re.match(rgx, s)
    
    if not m:
        return False
    
    num = int(m.group(1))
    unit = m.group(2)
    
    if unit == "cm":
        if not (num >= 150 and num <= 193):
            return False
    else:
        if not (num >= 59 and num <= 76):
            return False

    return True

def hcl_valid(s):
    rgx = "#[0-9a-f]{6}$"
    
    return re.match(rgx, s)

ecl_set = set(['amb', 'blu', 'brn', 'gry', 'grn', 'hzl', 'oth'])
def ecl_valid(s):
    return s in ecl_set

def pid_valid(s):
    rgx = "\d{9}$"
    
    return re.match(rgx, s)

In [15]:
prefix_validation = {'byr': byr_valid, 'iyr': iyr_valid,
                     'eyr': eyr_valid, 'hgt': hgt_valid, 'hcl': hcl_valid,
                     'ecl': ecl_valid, 'pid': pid_valid, 'cid': lambda _: True}

In [16]:
def passport_valid_2(passport):
    v1 = passport_valid(passport)
    
    if not v1:
        print("Not valid from phase 1")
        return False
    
    for (prefix, value) in passport:
        if not prefix_validation[prefix](value):
            print(f"Invalid {prefix} {value}")
            return False

    return True

In [17]:
invalid_passports = test_passports = parse_passports("./inputs/4_test_invalid")
valid_passports = test_passports = parse_passports("./inputs/4_test_valid")

In [18]:
assert sum([passport_valid_2(p) for p in invalid_passports]) == 0

Invalid eyr 1972
Invalid eyr 1967
Invalid hcl dab227
Invalid hgt 59cm


In [19]:
assert sum([passport_valid_2(p) for p in valid_passports]) == 4

In [20]:
# test cases from problem

assert byr_valid('2002')
assert not byr_valid('2003')

assert hgt_valid('60in')
assert hgt_valid('190cm')
assert not hgt_valid('190in')
assert not hgt_valid('190')

assert hcl_valid('#123abc')
assert not hcl_valid('#123abz')
assert not hcl_valid('123abc')

assert ecl_valid('brn')
assert not ecl_valid('wat')

assert pid_valid('000000001')
assert not pid_valid('0123456789')

In [21]:
sum([passport_valid_2(p) for p in passports])

Invalid eyr 2034
Invalid pid #baec97
Invalid eyr 1972
Invalid ecl #473aaf
Invalid hcl z
Invalid byr 2022
Invalid hcl z
Invalid byr 2005
Invalid byr 2023
Not valid from phase 1
Invalid pid 6645616064
Invalid pid 32592758
Invalid eyr 2040
Invalid hgt 161in
Invalid ecl #ba14f0
Not valid from phase 1
Invalid hcl 936807
Invalid hcl b62ef0
Invalid pid 7039815301
Invalid iyr 1978
Invalid hgt 70cm
Invalid iyr 2003
Not valid from phase 1
Invalid ecl #ed7ddc
Not valid from phase 1
Invalid hgt 77
Invalid hcl f9d9dd
Invalid eyr 2039
Invalid byr 2016
Invalid hgt 67cm
Not valid from phase 1
Invalid hgt 162in
Not valid from phase 1
Invalid byr 2029
Invalid pid 2236240873
Not valid from phase 1
Invalid eyr 2034
Invalid eyr 1978
Invalid pid 64469711
Invalid byr 2028
Invalid hgt 64cm
Invalid byr 2007
Invalid eyr 2002
Invalid hgt 162in
Invalid hcl z
Invalid hcl 931e2c
Invalid eyr 1933
Invalid iyr 2026
Invalid pid 168cm
Not valid from phase 1
Invalid eyr 1927
Not valid from phase 1
Invalid byr 2011
Not va

156