In [91]:
import pandas as pd
import numpy as np
import string
import logging

In [56]:
raw_data = pd.read_table('passport.txt', names = ['response'], skip_blank_lines=False).squeeze('columns')
raw_data = raw_data.append(pd.Series(np.nan), ignore_index=True)

In [15]:
def create_passport(records):
    passport = {}
    for row in records:
        elements = row.split()
        for element in elements:
            key, value = element.split(":")
            passport[key] = value
    return passport

In [21]:
def test_passport(passport):
    required_fields = ['byr', 'iyr', 'eyr', 'hgt', 'hcl', 'ecl', 'pid']
    for field in required_fields:
        if field not in passport:
            return False
    return True

False

In [59]:
raw_passport = []
valid_passport_count = 0
passport = {}
for row in raw_data:
    if pd.notnull(row):
        raw_passport.append(row)
    else:
        passport = create_passport(raw_passport)
        valid = test_passport(passport)
        if valid:
            valid_passport_count += 1
        raw_passport = []
valid_passport_count

182

## Part 2

In [183]:
log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
logging.basicConfig(level=logging.INFO, format=log_fmt)
logger = logging.getLogger(__name__)

In [194]:
def strict_test_passport(passport):
    required_fields = ['byr', 'iyr', 'eyr', 'hgt', 'hcl', 'ecl', 'pid']
    for field in required_fields:
        if field not in passport:
            #logger.info(f"Missing field: {field}")
            return False
    if int(passport['byr']) < 1920 or int(passport['byr']) > 2002:
        #logger.info(f"Birth Year: {passport['byr']}")
        return False
    if int(passport['iyr']) < 2010 or int(passport['iyr']) > 2020:
        return False
    if int(passport['eyr']) < 2020 or int(passport['eyr']) > 2030:
        return False
    if passport['hgt'][-2:] == 'cm':
        if int(passport['hgt'][:-2]) < 150 or int(passport['hgt'][:-2]) > 193:
            return False
    elif passport['hgt'][-2:] == 'in':
        if int(passport['hgt'][:-2]) < 59 or int(passport['hgt'][:-2]) > 76:
            return False
    else:
        return False
    if passport['hcl'][0:1] != '#':
        #logger.info(f"Hair Color doesn't start #: {passport['hcl']}")
        return False
    if len(passport['hcl']) != 7:
        return False
    if not all(c in string.hexdigits for c in passport['hcl'][1:]):
        #logger.info(f"Hair Color isn't hex: {passport['hcl']}")
        return False
    if passport['ecl'] not in ['amb', 'blu', 'brn', 'gry', 'grn', 'hzl', 'oth']:
        return False
    if len(passport['pid']) != 9:
        #logger.info(f"Passport ID wrong length: {passport['pid']}")
        return False
    if not all(c in string.digits for c in passport['pid']):
        #logger.info(f"Passport ID isn't digits: {passport['pid']}")
        return False
    return True
strict_test_passport(passport)

True

In [172]:
bad_passports = pd.read_table('bad_passport.txt', names = ['response'], skip_blank_lines=False).squeeze('columns')
bad_passports = bad_passports.append(pd.Series(np.nan), ignore_index=True)

good_passports = pd.read_table('good_passport.txt', names = ['response'], skip_blank_lines=False).squeeze('columns')
good_passports = good_passports.append(pd.Series(np.nan), ignore_index=True)

In [185]:
raw_passport = []
valid_passport_count = 0
passport = {}
for row in bad_passports:
    if pd.notnull(row):
        raw_passport.append(row)
    else:
        passport = create_passport(raw_passport)
        valid = strict_test_passport(passport)
        if valid:
            valid_passport_count += 1
        raw_passport = []
valid_passport_count

2022-03-11 16:10:25,643 - __main__ - INFO - Hair Color doesn't start #: dab227
2022-03-11 16:10:25,644 - __main__ - INFO - Birth Year: 2007
2022-03-11 16:10:25,644 - __main__ - INFO - Birth Year: 1892
2022-03-11 16:10:25,648 - __main__ - INFO - Hair Color doesn't start #: 1f7352
2022-03-11 16:10:25,649 - __main__ - INFO - Hair Color isn't hex: #b6652z
2022-03-11 16:10:25,650 - __main__ - INFO - Passport ID wrong length: 09315471912
2022-03-11 16:10:25,650 - __main__ - INFO - Passport ID isn't digits: 09315471a


0

In [170]:
raw_passport = []
valid_passport_count = 0
passport = {}
for row in good_passports:
    if pd.notnull(row):
        raw_passport.append(row)
    else:
        passport = create_passport(raw_passport)
        valid = strict_test_passport(passport)
        if valid:
            valid_passport_count += 1
        raw_passport = []
valid_passport_count

4

In [187]:
raw_passport = []
valid_passport_count = 0
passport = {}
for row in raw_data:
    if pd.notnull(row):
        raw_passport.append(row)
    else:
        passport = create_passport(raw_passport)
        #logger.info(f"Passport: {passport}")
        valid = strict_test_passport(passport)
        #logger.info(f"Valid: {valid}")
        if valid:
            logger.info(f"Passport: {passport}")
            valid_passport_count += 1
        raw_passport = []
valid_passport_count

2022-03-11 16:10:49,529 - __main__ - INFO - Passport: {'hcl': '#602927', 'iyr': '2018', 'byr': '1938', 'ecl': 'blu', 'eyr': '2024', 'hgt': '172cm', 'pid': '839621424'}
2022-03-11 16:10:49,530 - __main__ - INFO - Passport: {'ecl': 'gry', 'byr': '1931', 'iyr': '2017', 'pid': '459927933', 'eyr': '2028', 'hgt': '67in', 'hcl': '#fffffd'}
2022-03-11 16:10:49,530 - __main__ - INFO - Passport: {'cid': '322', 'hgt': '163cm', 'byr': '1969', 'hcl': '#a97842', 'pid': '472877556', 'iyr': '2019', 'ecl': 'amb', 'eyr': '2030'}
2022-03-11 16:10:49,531 - __main__ - INFO - Passport: {'hcl': '#733820', 'ecl': 'brn', 'byr': '2000', 'eyr': '2022', 'iyr': '2014', 'cid': '320', 'pid': '751634349', 'hgt': '180cm'}
2022-03-11 16:10:49,532 - __main__ - INFO - Passport: {'pid': '258660154', 'byr': '1921', 'hgt': '161cm', 'eyr': '2030', 'cid': '217', 'iyr': '2012', 'hcl': '#4dd6d4', 'ecl': 'grn'}
2022-03-11 16:10:49,532 - __main__ - INFO - Passport: {'hgt': '170cm', 'byr': '1978', 'eyr': '2022', 'pid': '399347273'

2022-03-11 16:10:49,558 - __main__ - INFO - Passport: {'ecl': 'brn', 'byr': '1962', 'hcl': '#866857', 'iyr': '2020', 'hgt': '152cm', 'pid': '701556397', 'cid': '121', 'eyr': '2029'}
2022-03-11 16:10:49,559 - __main__ - INFO - Passport: {'iyr': '2016', 'hgt': '168cm', 'byr': '1999', 'cid': '286', 'hcl': '#18171d', 'pid': '223995430', 'eyr': '2022', 'ecl': 'blu'}
2022-03-11 16:10:49,559 - __main__ - INFO - Passport: {'pid': '227780276', 'ecl': 'blu', 'iyr': '2017', 'byr': '1985', 'hcl': '#6b5442', 'hgt': '183cm', 'eyr': '2028'}
2022-03-11 16:10:49,560 - __main__ - INFO - Passport: {'hgt': '190cm', 'ecl': 'oth', 'eyr': '2030', 'cid': '223', 'hcl': '#888785', 'iyr': '2010', 'pid': '115829664', 'byr': '1967'}
2022-03-11 16:10:49,561 - __main__ - INFO - Passport: {'eyr': '2025', 'iyr': '2013', 'byr': '1958', 'ecl': 'grn', 'hcl': '#ceb3a1', 'hgt': '153cm', 'pid': '815357118'}
2022-03-11 16:10:49,561 - __main__ - INFO - Passport: {'pid': '038013822', 'hgt': '180cm', 'iyr': '2013', 'hcl': '#623

2022-03-11 16:10:49,587 - __main__ - INFO - Passport: {'eyr': '2024', 'hcl': '#6b5442', 'iyr': '2014', 'hgt': '68in', 'pid': '577055593', 'ecl': 'grn', 'byr': '1996'}
2022-03-11 16:10:49,587 - __main__ - INFO - Passport: {'hcl': '#efcc98', 'eyr': '2022', 'ecl': 'grn', 'hgt': '167cm', 'byr': '1978', 'iyr': '2010', 'pid': '180446111'}
2022-03-11 16:10:49,588 - __main__ - INFO - Passport: {'hcl': '#341e13', 'ecl': 'brn', 'iyr': '2019', 'pid': '589837530', 'cid': '157', 'byr': '1925', 'hgt': '183cm', 'eyr': '2020'}
2022-03-11 16:10:49,588 - __main__ - INFO - Passport: {'ecl': 'oth', 'iyr': '2012', 'eyr': '2028', 'hcl': '#866857', 'pid': '716964854', 'byr': '1940', 'cid': '113', 'hgt': '193cm'}
2022-03-11 16:10:49,589 - __main__ - INFO - Passport: {'byr': '1985', 'iyr': '2011', 'hcl': '#866857', 'pid': '454558712', 'eyr': '2025', 'cid': '301', 'hgt': '62in', 'ecl': 'blu'}
2022-03-11 16:10:49,589 - __main__ - INFO - Passport: {'hcl': '#733820', 'eyr': '2025', 'ecl': 'amb', 'pid': '855788635'

110

In [197]:
raw_passport = []
valid_passport_count = 0
temp = []
passport = {}
for row in raw_data:
    if pd.notnull(row):
        raw_passport.append(row)
    else:
        passport = create_passport(raw_passport)
        #logger.info(f"Passport: {passport}")
        valid = strict_test_passport(passport)
        #logger.info(f"Valid: {valid}")
        if valid:
            logger.info(f"Passport: {passport}")
            valid_passport_count += 1
            temp.append(passport['hgt'])
        raw_passport = []
valid_passport_count
#set(temp)

2022-03-11 16:17:32,413 - __main__ - INFO - Passport: {'hcl': '#602927', 'iyr': '2018', 'byr': '1938', 'ecl': 'blu', 'eyr': '2024', 'hgt': '172cm', 'pid': '839621424'}
2022-03-11 16:17:32,413 - __main__ - INFO - Passport: {'ecl': 'gry', 'byr': '1931', 'iyr': '2017', 'pid': '459927933', 'eyr': '2028', 'hgt': '67in', 'hcl': '#fffffd'}
2022-03-11 16:17:32,414 - __main__ - INFO - Passport: {'cid': '322', 'hgt': '163cm', 'byr': '1969', 'hcl': '#a97842', 'pid': '472877556', 'iyr': '2019', 'ecl': 'amb', 'eyr': '2030'}
2022-03-11 16:17:32,415 - __main__ - INFO - Passport: {'hcl': '#733820', 'ecl': 'brn', 'byr': '2000', 'eyr': '2022', 'iyr': '2014', 'cid': '320', 'pid': '751634349', 'hgt': '180cm'}
2022-03-11 16:17:32,415 - __main__ - INFO - Passport: {'pid': '258660154', 'byr': '1921', 'hgt': '161cm', 'eyr': '2030', 'cid': '217', 'iyr': '2012', 'hcl': '#4dd6d4', 'ecl': 'grn'}
2022-03-11 16:17:32,416 - __main__ - INFO - Passport: {'hgt': '170cm', 'byr': '1978', 'eyr': '2022', 'pid': '399347273'

2022-03-11 16:17:32,442 - __main__ - INFO - Passport: {'ecl': 'brn', 'byr': '1962', 'hcl': '#866857', 'iyr': '2020', 'hgt': '152cm', 'pid': '701556397', 'cid': '121', 'eyr': '2029'}
2022-03-11 16:17:32,442 - __main__ - INFO - Passport: {'iyr': '2016', 'hgt': '168cm', 'byr': '1999', 'cid': '286', 'hcl': '#18171d', 'pid': '223995430', 'eyr': '2022', 'ecl': 'blu'}
2022-03-11 16:17:32,443 - __main__ - INFO - Passport: {'pid': '227780276', 'ecl': 'blu', 'iyr': '2017', 'byr': '1985', 'hcl': '#6b5442', 'hgt': '183cm', 'eyr': '2028'}
2022-03-11 16:17:32,443 - __main__ - INFO - Passport: {'hgt': '190cm', 'ecl': 'oth', 'eyr': '2030', 'cid': '223', 'hcl': '#888785', 'iyr': '2010', 'pid': '115829664', 'byr': '1967'}
2022-03-11 16:17:32,444 - __main__ - INFO - Passport: {'eyr': '2025', 'iyr': '2013', 'byr': '1958', 'ecl': 'grn', 'hcl': '#ceb3a1', 'hgt': '153cm', 'pid': '815357118'}
2022-03-11 16:17:32,445 - __main__ - INFO - Passport: {'pid': '038013822', 'hgt': '180cm', 'iyr': '2013', 'hcl': '#623

2022-03-11 16:17:32,471 - __main__ - INFO - Passport: {'eyr': '2024', 'hcl': '#6b5442', 'iyr': '2014', 'hgt': '68in', 'pid': '577055593', 'ecl': 'grn', 'byr': '1996'}
2022-03-11 16:17:32,472 - __main__ - INFO - Passport: {'hcl': '#efcc98', 'eyr': '2022', 'ecl': 'grn', 'hgt': '167cm', 'byr': '1978', 'iyr': '2010', 'pid': '180446111'}
2022-03-11 16:17:32,472 - __main__ - INFO - Passport: {'hcl': '#341e13', 'ecl': 'brn', 'iyr': '2019', 'pid': '589837530', 'cid': '157', 'byr': '1925', 'hgt': '183cm', 'eyr': '2020'}
2022-03-11 16:17:32,473 - __main__ - INFO - Passport: {'ecl': 'oth', 'iyr': '2012', 'eyr': '2028', 'hcl': '#866857', 'pid': '716964854', 'byr': '1940', 'cid': '113', 'hgt': '193cm'}
2022-03-11 16:17:32,473 - __main__ - INFO - Passport: {'byr': '1985', 'iyr': '2011', 'hcl': '#866857', 'pid': '454558712', 'eyr': '2025', 'cid': '301', 'hgt': '62in', 'ecl': 'blu'}
2022-03-11 16:17:32,474 - __main__ - INFO - Passport: {'hcl': '#733820', 'eyr': '2025', 'ecl': 'amb', 'pid': '855788635'

109