In [1]:
import pandas as pd
import numpy as np
f = open("day_4_input.txt")
#Separate entries by two newlines, and then separate fields by space.
input_list = f.read().split("\n\n")
input_list = [item.replace("\n", " ").split(" ") for item in input_list]

In [2]:
df = pd.DataFrame(input_list)

In [3]:
#Set up input list as a dataframe
df = pd.DataFrame(input_list)
#Stack to associate them with their appropriate IDs, then re-index to remove the useless second row denoting field number
df = df.stack().reset_index()[["level_0", 0]].set_index("level_0")
#Split the remaining column by the colon, and expand.
df = df[0].str.split(":", expand=True)

#Re-index again with the new "0" column, which is the field name. Then unstack to make the fields the new columns and the respective values filled in
df = df.reset_index().set_index(['level_0', 0]).unstack(-1)

#For ease of reference just re-set columns
df.columns = df.columns.unique(level=1)

In [4]:
df

Unnamed: 0_level_0,byr,cid,ecl,eyr,hcl,hgt,iyr,pid
level_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,1939,,hzl,2027,#602927,186cm,2019,552194973
1,1996,,brn,2020,#866857,164cm,2015,657988073
2,1951,321,brn,2022,#fffffd,62in,2017,#6ef4e1
3,1980,129,brn,2025,#fffffd,150cm,2011,420023864
4,1925,,amb,2029,#ceb3a1,187cm,2016,223151011
...,...,...,...,...,...,...,...,...
286,1967,,gmt,2033,,59cm,2021,2498700612
287,1953,,oth,,#b6652a,,2013,442586860
288,1967,,oth,2022,#866857,151cm,2017,095687847
289,1991,,hzl,2024,#866857,61cm,1930,983640144


In [5]:
def day_4_part_1():
    #duplicate the dataframe so we don't have to iterate on it
    df_copy = df.copy()

    #create a new column named "num_valid" which is just how many non-NaN values are found
    df_copy['num_valid'] = df_copy.count(axis=1)

    #Create two masks, one for passports with 8 valid values and another for 7 with 'cid' missing
    mask1 = df_copy['num_valid'] == 8
    mask2 = (df_copy['num_valid'] == 7) & (df_copy['cid'].isna())

    return len(df_copy.loc[mask1 | mask2])

In [6]:
day_4_part_1()

235

In [7]:
def day_4_part_2():
    #duplicate the dataframe so we don't have to iterate on it
    df_copy = df.copy()

    constraints = {
        "byr": {
            "max": 2002,
            "min": 1920
        },
        "iyr": {
            "max": 2020,
            "min": 2010
        },
        "eyr": {
            "max": 2030,
            "min": 2020
        },
        "hgt": {
            "max": {
                "cm": 193,
                "in": 76
            },
            "min": {
                "cm": 150,
                "in": 59
            }
        },
        "hcl": r"(^#[0-9a-f]{6}$)",
        "ecl": ["amb", "blu", "brn", "gry", "grn", "hzl", "oth"],
        "pid": r"(^[0-9]{9}$)"
    }
    #Start creating masks based on the contraints

    #Validate years
    df_copy[['byr', 'iyr', 'eyr']] = df_copy[['byr', 'iyr', 'eyr']].astype(float)
    byr_mask = (df_copy['byr'] >= constraints['byr']['min']) & (df_copy['byr'] <= constraints['byr']['max'])
    iyr_mask = (df_copy['iyr'] >= constraints['iyr']['min']) & (df_copy['iyr'] <= constraints['iyr']['max'])
    eyr_mask = (df_copy['eyr'] >= constraints['eyr']['min']) & (df_copy['eyr'] <= constraints['eyr']['max'])

    #Height
    df_copy['units'] = df_copy['hgt'].str[-2:]
    df_copy['hgt'] =   df_copy['hgt'].str[:-2]
    #Temporary mask to delete invalid units
    temp_mask = ~df_copy['units'].isin(['cm', 'in'])
    
    #null out invalid units
    df_copy.loc[temp_mask, 'units'] = np.nan
    df_copy.loc[temp_mask, 'hgt'] =   np.nan
    df_copy['hgt'] = df_copy['hgt'].astype(float)
    cm_mask = ((df_copy['hgt'] >= constraints['hgt']['min']['cm']) & (df_copy['hgt'] <= constraints['hgt']['max']['cm'])) & (df_copy['units'] == 'cm')
    in_mask = ((df_copy['hgt'] >= constraints['hgt']['min']['in']) & (df_copy['hgt'] <= constraints['hgt']['max']['in'])) & (df_copy['units'] == 'in')
    hgt_mask = cm_mask | in_mask

    #Hair color
    hcl_mask = ~df_copy['hcl'].str.extract(constraints['hcl']).isna()[0]

    #Eye color
    ecl_mask = df_copy['ecl'].isin(constraints['ecl'])

    #PID
    pid_mask = ~df_copy['pid'].str.extract(constraints['pid']).isna()[0]

    #join all the masks together and return the length
    mask = byr_mask & iyr_mask & eyr_mask & hgt_mask & hcl_mask & ecl_mask & pid_mask
    return len(df_copy[mask])


In [8]:
day_4_part_2()

194