# --- Day 4: Passport Processing ---
byr (Birth Year) - four digits; at least 1920 and at most 2002.
iyr (Issue Year) - four digits; at least 2010 and at most 2020.
eyr (Expiration Year) - four digits; at least 2020 and at most 2030.
hgt (Height) - a number followed by either cm or in:
If cm, the number must be at least 150 and at most 193.
If in, the number must be at least 59 and at most 76.
hcl (Hair Color) - a # followed by exactly six characters 0-9 or a-f.
ecl (Eye Color) - exactly one of: amb blu brn gry grn hzl oth.
pid (Passport ID) - a nine-digit number, including leading zeroes.

In your batch file, how many passports are valid?

In [1]:
import pandas as pd
import numpy as np

In [2]:
# read through each line - append to previous line unless empty. If empty the appended line is written to the output list
with open('day4.txt','r') as f:
        lst=[]
        s=''
        for line in f.readlines():
                if len(line)==1:
                    lst.append(s.replace("\n"," ").strip())
                    s=''
                else:
                    s=s+line
        lst.append(s.replace("\n"," ").strip())
print(len(lst))
print()
print(lst[:5])

260

['byr:1985 eyr:2021 iyr:2011 hgt:175cm pid:163069444 hcl:#18171d', 'eyr:2023 hcl:#cfa07d ecl:blu hgt:169cm pid:494407412 byr:1936', 'ecl:zzz eyr:2036 hgt:109 hcl:#623a2f iyr:1997 byr:2029 cid:169 pid:170290956', 'hcl:#18171d ecl:oth pid:266824158 hgt:168cm byr:1992 eyr:2021', 'byr:1932 ecl:hzl pid:284313291 iyr:2017 hcl:#efcc98 eyr:2024 hgt:184cm']


In [3]:
# turn list into dataframw
df=pd.DataFrame(lst)
df.columns=['txt']
df.sample(3)

Unnamed: 0,txt
216,eyr:2027 hcl:#db3405 byr:1938 pid:194516631 ci...
138,byr:2029 ecl:amb hgt:175in iyr:2015 hcl:#ceb3a...
71,pid:859849571 ecl:amb hcl:#6b5442 hgt:193cm by...


In [4]:
# define function to extract value for key:value pair
def xtract(sss,col):
    try:
        res=[c for c in sss.split() if col in c][0].split(':')[1]
    except:
        res=np.nan
    return res

In [5]:
# extarct data into own columns
df['byr']=df['txt'].apply(xtract,col='byr').apply(pd.to_numeric)
df['iyr']=df['txt'].apply(xtract,col='iyr').apply(pd.to_numeric)
df['eyr']=df['txt'].apply(xtract,col='eyr').apply(pd.to_numeric)
df['hgt']=df['txt'].apply(xtract,col='hgt')
df['hcl']=df['txt'].apply(xtract,col='hcl')
df['ecl']=df['txt'].apply(xtract,col='ecl')
#df['cid']=df['txt'].apply(xtract,col='cid') not relevant
df['pid']=df['txt'].apply(xtract,col='pid')
df.sample(3)

Unnamed: 0,txt,byr,iyr,eyr,hgt,hcl,ecl,pid
65,iyr:2012 pid:900043442 hcl:#ceb3a1 cid:124 byr...,1941.0,2012.0,2025.0,156cm,#ceb3a1,blu,900043442
101,hgt:161cm eyr:2030 cid:221 pid:994494879 hcl:#...,1957.0,2012.0,2030.0,161cm,#733820,blu,994494879
118,byr:2002 hgt:169cm pid:629420566 eyr:2026 ecl:...,2002.0,2019.0,2026.0,169cm,#341e13,grn,629420566


# Part 1 

In [6]:
# how many valid
len(df.dropna())

222

In [27]:
# function to validate height
def verhgt(sss):
    res = False
    sss=str(sss)
    
    if sss[-2:]=='cm':
        res = int(sss[:-2]) in range(150,194)
    if sss[-2:]=='in':
        res = int(sss[:-2]) in range(59,77)
    return res

In [8]:
# function to validate hcl
def verhcl(sss):
    sss=str(sss)
    res=True
    if len(sss)!=7 or sss[:1]!="#":
        res=False
    
    return res
        

In [28]:
# verification conditions
con1 = df.byr.isin(range(1920,2003))
con2 = df.iyr.isin(range(2010,2021))
con3 = df.eyr.isin(range(2020,2031))
con4 = df.hgt.apply(verhgt)
con5 = df.hcl.apply(verhcl)
eye=['amb','blu','brn','gry','grn','hzl', 'oth']
con6 = df.ecl.isin(eye)
con7 = df.pid.str.len()==9

In [42]:
dfOK=df[con1&con2&con3&con4&con5&con6&con7]
dfOK

Unnamed: 0,txt,byr,iyr,eyr,hgt,hcl,ecl,pid
4,byr:1932 ecl:hzl pid:284313291 iyr:2017 hcl:#e...,1932.0,2017.0,2024.0,184cm,#efcc98,hzl,284313291
6,eyr:2027 hgt:185cm hcl:#373b34 pid:807766874 i...,1955.0,2015.0,2027.0,185cm,#373b34,hzl,807766874
7,iyr:2017 hcl:#7d3b0c hgt:174cm byr:1942 eyr:20...,1942.0,2017.0,2025.0,174cm,#7d3b0c,blu,424955675
8,eyr:2026 byr:1950 hcl:#ceb3a1 hgt:182cm iyr:20...,1950.0,2016.0,2026.0,182cm,#ceb3a1,amb,440353084
10,hcl:#ceb3a1 eyr:2020 pid:348696077 hgt:163cm e...,1921.0,2016.0,2020.0,163cm,#ceb3a1,hzl,348696077
...,...,...,...,...,...,...,...,...
250,byr:1938 iyr:2017 hcl:#623a2f cid:191 eyr:2027...,1938.0,2017.0,2027.0,174cm,#623a2f,amb,287108745
252,ecl:amb hgt:177cm hcl:#b6a3ce eyr:2025 byr:196...,1967.0,2018.0,2025.0,177cm,#b6a3ce,amb,506927066
253,byr:1964 hgt:173cm eyr:2030 cid:106 pid:587635...,1964.0,2012.0,2030.0,173cm,#fb5993,hzl,587635596
257,ecl:grn hcl:#efcc98 byr:1935 eyr:2025 iyr:2018...,1935.0,2018.0,2025.0,65in,#efcc98,grn,396444938
