# Day 4

## Part 1: Problem statement

Problem is specified [here](https://adventofcode.com/2020/day/4).

### Import packages

In [1]:
import pandas as pd

### Solution

Most of the time I would get the data before solving the problem. In this case, processing the data in multiple steps is the problem, so I've chosen not to structure the solution as a function.

In [7]:
raw_data = pd.read_csv('04_data.csv', header=None)
print(raw_data.head(5))

               0         1          2         3            4         5  \
0       iyr:2015   cid:189    ecl:oth  byr:1947  hcl:#6c4ab1  eyr:2026   
1      hgt:174cm       NaN        NaN       NaN          NaN       NaN   
2  pid:526744288       NaN        NaN       NaN          NaN       NaN   
3            NaN       NaN        NaN       NaN          NaN       NaN   
4  pid:688706448  iyr:2017  hgt:162cm   cid:174      ecl:grn  byr:1943   

             6         7  
0          NaN       NaN  
1          NaN       NaN  
2          NaN       NaN  
3          NaN       NaN  
4  hcl:#808e9e  eyr:2025  


Here I remove combine the clusters of passport codes into Python lists and remove the NaNs. I also separate the keys and values as this will be easier to work with down the line.

In [3]:
# New data structures that contains the passport code keys and values. Each list contains a new passport.
keys = [[]]
values = [[]]
k = 0
for i in range(len(raw_data)):
    # If you hit a row that is all NaNs, start a new list corresponding to a new passport
    if pd.isna(raw_data.iloc[i, 0]):
        # New row
        k += 1
        keys.append([])
        values.append([])
    else:
        j = 0
        while j < 8:
            if not pd.isna(raw_data.iloc[i, j]):
                # Add the key and value
                key = raw_data.iloc[i, j].split(':')[0]
                value = raw_data.iloc[i, j].split(':')[1]
                keys[k].append(key)
                values[k].append(value)
            j += 1
print(keys[0:10])

[['iyr', 'cid', 'ecl', 'byr', 'hcl', 'eyr', 'hgt', 'pid'], ['pid', 'iyr', 'hgt', 'cid', 'ecl', 'byr', 'hcl', 'eyr'], ['ecl', 'hcl', 'cid', 'pid', 'iyr', 'eyr', 'byr', 'hgt'], ['pid', 'hgt', 'hcl', 'byr', 'iyr', 'cid', 'ecl', 'eyr'], ['cid', 'pid', 'iyr', 'ecl', 'byr', 'hgt', 'hcl', 'eyr'], ['ecl', 'hcl', 'pid', 'hgt', 'cid', 'byr', 'eyr'], ['pid', 'ecl', 'hgt', 'byr', 'eyr', 'iyr', 'hcl'], ['byr', 'eyr', 'ecl', 'iyr', 'pid', 'hcl', 'hgt'], ['cid', 'byr', 'hcl', 'pid', 'iyr', 'eyr', 'hgt'], ['ecl', 'pid', 'eyr', 'byr', 'iyr', 'hcl', 'hgt']]


Here I go through every passport and check whether all 7 of the necessary keys are present.

In [4]:
# codes are the passport key codes
codes = ['byr', 'iyr', 'eyr', 'hgt', 'hcl', 'ecl', 'pid'] # 'cid' is optional
# Number of passports
n_passports = len(keys)
# Number of potential codes
n_codes = len(codes)
# Count of valid passports
count = 0
# New keys and values data structure that only retains the valid ones
new_keys = []
new_values = []
for i in range(n_passports):
    for j in range(n_codes):
        if codes[j] not in keys[i]:
            break
        if j == n_codes - 1:
            count += 1
            new_keys.append(keys[i])
            new_values.append(values[i])
        
print(count)

264


## Part 2

In [5]:
def validate_code(key, value):
    '''Validate the code with key and value.'''
    
    if key == 'byr':
        value = int(value)
        if value < 1920 or value > 2002:
            return False
        
    elif key == 'iyr':
        value = int(value)
        if value < 2010 or value > 2020:
            return False
        
    elif key == 'eyr':
        value = int(value)
        if value < 2020 or value > 2030:
            return False
        
    elif key == 'hgt':
        value = str(value)
        if value[-2:] == 'cm':
            if int(value[:-2]) < 150 or int(value[:-2]) > 193:
                return False
        elif value[-2:] == 'in':
            if int(value[:-2]) < 59 or int(value[:-2]) > 76:
                return False
        else:
            return False
        
    elif key == 'hcl':
        value = str(value)
        if len(value) != 7:
            return False
        if value[0] != '#':
            return False
        for i in range(1, 7):
            if value[i] not in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f']:
                return False
        
    elif key == 'ecl':
        if str(value) not in ['amb', 'blu', 'brn', 'gry', 'grn', 'hzl', 'oth']:
            return False
        
    elif key == 'pid':
        value = str(value)
        if len(value) != 9:
            return False
        for i in range(9):
            if value[i] not in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']:
                return False
    
    return True
        

In [6]:
count = 0
for i in range(len(new_keys)):
    m = len(new_keys[i])
    for j in range(m):
        check = validate_code(new_keys[i][j], new_values[i][j])
        if check == False:
            break
        if j == m - 1:
            count +=1
print(count)

224
