In [155]:
import re
from functools import reduce
import pandas as pd

#global
INPUT_LOCATION = 'day4.txt'
SEPARATOR = '\n'

In [156]:
clean_data = list(map(lambda x: x.rstrip() if len(x) > 1 else x,open(INPUT_LOCATION, 'r').readlines()))  

In [157]:
# all passport iterator
class Passporter:
    keys = [ 'byr', 'iyr', 'eyr', 'hgt', 'hcl', 'ecl', 'pid', 
    # 'cid' is ignored
    ]
    data_format = re.compile(r'(\w{3}):(\S*)')
    
    def __init__(self, p):
        self.p = p
        self.index = 0
        self.current = []
    
    def __iter__(self):
        self.index = 0
        return self
    
    def __next__(self):
        # l = 100
        l = len(self.p)
        if self.index == l:
            raise StopIteration 
        self.current = []
        
        while (self.p[self.index] != SEPARATOR):
            self.current.append(self.p[self.index])
            self.index += 1
            if self.index == l:
                raise StopIteration 
        
        if len(self.current) > 0:
            list_tuples = self.data_format.findall(" ".join(self.current))
            list_dict = list( map(lambda x: {x[0]: x[1]}, list_tuples))
            pas = reduce(lambda x, y: {**x, **y}, list_dict)
            return pas
        self.index += 1
        return self.__next__()


In [158]:
p = pd.DataFrame(iter(Passporter(clean_data)))
p

Unnamed: 0,byr,pid,eyr,hgt,iyr,ecl,hcl,cid
0,2010,#1bb4d8,2021,186cm,2020,grt,,
1,1933,937877382,2029,187cm,2019,amb,#888785,
2,1935,,2020,183cm,2019,hzl,#18171d,
3,1992,138000309,2024,183cm,2013,oth,#7d3b0c,135
4,1929,346059944,1924,176cm,2016,hzl,#fffffd,150
...,...,...,...,...,...,...,...,...
274,1951,866047540,2030,64in,2014,amb,#733820,59
275,1962,671492881,2023,69in,2015,brn,#623a2f,
276,1950,924256973,2028,154cm,2020,oth,#b6652a,
277,2021,76948864,2036,116,1930,,z,348


In [159]:
# iteraror which returns passport with valid fields only
class ValidFieldIterator(Passporter):
   def __next__(self):
       pas = super().__next__()
       if all(k in pas.keys() for k in self.keys):
           return pas
       return self.__next__()

In [164]:
# iterator which returns valid fields and accepted values
class ValidValuesIterator(ValidFieldIterator):
    HCL_PATTERN = r'^#[\w\d]{6}$'
    PID_PATTERN = r'^[0-9]{9}$'
    EYECOLOR_PATTERN = ['amb', 'blu', 'brn', 'gry', 'grn', 'hzl', 'oth']

    def is_beetween(self,data,min,max,um=''):
        data=int(data)
        if data >= min and data <= max : 
            return True
        return False    

    def chk_pattern(self, pattern, data, dataname = ''):
        match = re.search(pattern, data)
        if (match == None):
            # print('**KO ==> ', data )
            return False
        # print('OK ==> ', data )
        return True
    
    def chk_hgt(self,hgt):
        try:
            height = re.search(r'([0-9]+)(cm|in)',hgt)
            if height.group(2) == 'cm': 
                return self.is_beetween(int(height.group(1)),150,193,'cm')
            if height.group(2) == 'in': 
                return self.is_beetween(int(height.group(1)),59,76,'in')
        except:
            return False
    
    def __next__(self):
        p = super().__next__()
        rules = [
                    self.chk_pattern(self.HCL_PATTERN, p['hcl'],'hcl'),
                    self.chk_pattern(self.PID_PATTERN, p['pid'],'pid'),
                    p['ecl'] in self.EYECOLOR_PATTERN,
                    self.is_beetween(p['byr'], 1920, 2002, 'byr'),
                    self.is_beetween(p['iyr'], 2010, 2020, 'iyr'),
                    self.is_beetween(p['eyr'], 2020, 2030, 'eyr'),
                    self.chk_hgt(p['hgt']),   
            ]
        if all(value for value in rules):
            return p
        
        else: return self.__next__()


In [160]:
# passports with valid fields
pd.DataFrame(ValidFieldIterator(clean_data))

Unnamed: 0,pid,eyr,ecl,hgt,iyr,byr,hcl,cid
0,937877382,2029,amb,187cm,2019,1933,#888785,
1,138000309,2024,oth,183cm,2013,1992,#7d3b0c,135
2,346059944,1924,hzl,176cm,2016,1929,#fffffd,150
3,897123249,2030,amb,165cm,2011,1948,#18171d,99
4,827609097,2029,gry,72in,2017,1963,#cfa07d,
...,...,...,...,...,...,...,...,...
208,15714997,1993,blu,64cm,2020,1995,#b6652a,
209,866047540,2030,amb,64in,2014,1951,#733820,59
210,671492881,2023,brn,69in,2015,1962,#623a2f,
211,924256973,2028,oth,154cm,2020,1950,#b6652a,


In [165]:
# valid values iterator
valid = pd.DataFrame(ValidValuesIterator(clean_data))

Unnamed: 0,pid,eyr,ecl,hgt,iyr,byr,hcl,cid
0,937877382,2029,amb,187cm,2019,1933,#888785,
1,138000309,2024,oth,183cm,2013,1992,#7d3b0c,135
2,897123249,2030,amb,165cm,2011,1948,#18171d,99
3,827609097,2029,gry,72in,2017,1963,#cfa07d,
4,432183209,2028,hzl,152cm,2016,1984,#6b5442,
...,...,...,...,...,...,...,...,...
142,802041641,2028,brn,184cm,2013,1969,#c0946f,
143,866047540,2030,amb,64in,2014,1951,#733820,59
144,671492881,2023,brn,69in,2015,1962,#623a2f,
145,924256973,2028,oth,154cm,2020,1950,#b6652a,
