Count the number of valid passports - those that have all required fields. Treat cid as optional. In your batch file, how many passports are valid?

In [1]:
import itertools
import re
import pandas as pd
import numpy as np

In [2]:
data = [re.sub(r'\n', '', x) for x in open("input.txt").readlines()]

In [3]:
# data

In [4]:
passport_lists = []
person = []
for passport in data:
    if passport != '':
        person.append(passport.split())
    if passport == '':
        passport_lists.append(person)
        person = []
passport_lists.append(person) # One last entry that doesn't have an empty string following

In [5]:
# passport_lists

In [6]:
passport_dict = {}
passport_df = pd.DataFrame()
for passport_list in passport_lists:
    for info_list in passport_list:
        for info in info_list:
            key = re.findall(r'^(.+):', info).pop()
            value = re.findall(r':(.+)$', info)
            passport_dict[key] = value
#     print(pd.DataFrame.from_dict(passport_dict))
    passport_df = pd.concat([passport_df, pd.DataFrame(passport_dict)], sort = True)
    passport_dict = {}

In [7]:
passport_df['valid'] = pd.notnull(passport_df[['byr', 'ecl', 'eyr', 'hcl', 'hgt', 'iyr', 'pid']]).all(1)
passport_df.head()

Unnamed: 0,byr,cid,ecl,eyr,hcl,hgt,iyr,pid,valid
0,2000,89.0,amb,2034,#fffffd,176cm,2013,934693255,True
0,1939,,grn,2020,#b5c3db,155cm,2017,#baec97,True
0,1960,,dne,1972,z,152cm,2023,526669252,True
0,1926,,#473aaf,2028,#c0946f,73in,2016,565318180,True
0,1940,277.0,oth,2030,#62e117,170cm,2019,472686027,True


In [8]:
print(f"The number of valid passports is {sum(passport_df['valid'])} " )

The number of valid passports is 230 


## Part 2

You can continue to ignore the cid field, but each other field has strict rules about what values are valid for automatic validation:

- byr (Birth Year) - four digits; at least 1920 and at most 2002.
- iyr (Issue Year) - four digits; at least 2010 and at most 2020.
- eyr (Expiration Year) - four digits; at least 2020 and at most 2030.
- hgt (Height) - a number followed by either cm or in:
- If cm, the number must be at least 150 and at most 193.
- If in, the number must be at least 59 and at most 76.
- hcl (Hair Color) - a # followed by exactly six characters 0-9 or a-f.
- ecl (Eye Color) - exactly one of: amb blu brn gry grn hzl oth.
- pid (Passport ID) - a nine-digit number, including leading zeroes.
- cid (Country ID) - ignored, missing or not.

In [13]:
def field_validation (row):
    if row[['byr', 'ecl', 'eyr', 'hcl', 'hgt', 'iyr', 'pid']].isnull().values.any():
        return False
    # Birth year four digits; at least 1920 and at most 2002
    if len(row['byr']) != 4:
        return False
    if int(row['byr']) < 1920 or int(row['byr']) > 2002:
        return False
    # Issue year four digits; at least 2010 and at most 2020.
    if len(row['iyr']) != 4:
        return False
    if int(row['iyr']) < 2010 or int(row['iyr']) > 2020:
        return False
    # eyr (Expiration Year) - four digits; at least 2020 and at most 2030
    if len(row['eyr']) != 4:
        return False
    if int(row['eyr']) < 2020 or int(row['eyr']) > 2030:
        return False
    # hgt (Height) - a number followed by either cm or in:
    # If cm, the number must be at least 150 and at most 193.
    # If in, the number must be at least 59 and at most 76.
    if bool(re.search(r'^\d+[cm|in]', row['hgt'])) == False:
        return False
    if not(row['hgt'].endswith("cm") or row['hgt'].endswith("in")):
        return False
    if re.match(r'^\d+cm', row['hgt']):
        cm_height = int(re.findall(r'^\d+', row['hgt']).pop())
        if cm_height < 150 or cm_height > 193:
            return False
    else:
        in_height = int(re.findall(r'^\d+', row['hgt']).pop())
        if in_height < 59 or in_height > 76:
            return False
    # hcl (Hair Color) - a # followed by exactly six characters 0-9 or a-f.
    if re.search(r'^#[0-9a-f]{6}$', row['hcl']) is None:
        return False
    # ecl (Eye Color) - exactly one of: amb blu brn gry grn hzl oth.
    if row['ecl'] not in ['amb', 'blu', 'brn', 'gry', 'grn', 'hzl', 'oth'] or len(row['ecl']) != 3:
        return False
    # pid (Passport ID) - a nine-digit number, including leading zeroes.
    if re.match(r'^[0-9]{9}$', row['pid']) is None:
        return False
    return True
    
passport_df['valid_part_2'] = passport_df.apply(field_validation, axis=1)
passport_df.head(10)

Unnamed: 0,byr,cid,ecl,eyr,hcl,hgt,iyr,pid,valid,valid_part_2
0,2000,89.0,amb,2034,#fffffd,176cm,2013,934693255,True,False
0,1939,,grn,2020,#b5c3db,155cm,2017,#baec97,True,False
0,1960,,dne,1972,z,152cm,2023,526669252,True,False
0,1926,,#473aaf,2028,#c0946f,73in,2016,565318180,True,False
0,1940,277.0,oth,2030,#62e117,170cm,2019,472686027,True,True
0,1959,,oth,2022,#733820,159cm,2017,938461813,True,True
0,2002,140.0,hzl,2021,z,186cm,2011,17324328,True,False
0,2022,,#fa362b,2037,6b3837,76cm,1984,3164234967,True,False
0,2005,326.0,zzz,1945,z,75cm,1934,9247286687,True,False
0,2005,254.0,lzr,2021,z,157cm,2020,152cm,True,False


In [14]:
print(f"The number of valid passports for part 2 is {sum(passport_df['valid_part_2'])} " )

The number of valid passports for part 2 is 156 
