In [63]:
import polars as pl
import polars.selectors as cs

from pathlib import Path

In [6]:
DATA_PATH = Path('..', 'data')
list(DATA_PATH.glob('*'))

[WindowsPath('../data/input_1.txt'), WindowsPath('../data/input_2.txt')]

In [199]:
MAX_MAP = {
    'red': 12,
    'green': 13,
    'blue': 14,
}

In [214]:
df = (
    pl
    .read_csv(DATA_PATH / 'input_2.txt', has_header=False, separator='\n')
)

In [215]:
df_game_set = (
    df
    .with_columns(
        pl.col('column_1').str.split(': '),
    )
    .select(
        pl.col('column_1').list[0].str.replace('Game ', '').cast(pl.Int32).alias('id_game'),
        pl.col('column_1').list[1].alias('sets').str.split('; ')
    )
    .explode('sets')
    .with_columns(
        pl.lit(1).alias('id_set')
    )
    .with_columns(
        pl.col('id_set').cum_sum().over('id_game')
    )
    .with_columns(
        pl.col('sets').str.split(', ')
    )
    .explode('sets')
    .with_columns(
        pl.col('sets').str.extract_groups('(\d+) ([a-zA-Z]+)'),
    )
    .unnest('sets')
    .rename({'1': 'value', '2': 'color'})
    .with_columns(
        pl.col('value').cast(pl.Int64),
    )
    .pivot(
        index=['id_game', 'id_set'],
        columns='color',
        values='value',
    )
    .fill_null(0)
)
df_game_set.head()

id_game,id_set,blue,red,green
i32,i32,i64,i64,i64
1,1,12,15,2
1,2,5,17,8
1,3,17,8,0
1,4,1,4,9
2,1,6,6,2


In [216]:
(
    df_game_set
    .pipe(
        lambda x: (
            x
            .with_columns([
                pl.col(color).le(MAX_MAP[color])
                for color in x.select(~cs.contains('id')).columns
            ])
            .with_columns(
                pl
                .fold(
                    acc=True,
                    function=lambda s1, s2: s1 & s2,
                    exprs=~cs.contains('id')
                )
                .alias('is_valid')
            )
        )
    )
    .group_by('id_game', maintain_order=True)
    .agg(pl.col('is_valid').all())
    .filter(pl.col('is_valid'))
    .sum()
)

id_game,is_valid
i32,u32
2716,49


In [240]:
(
    df_game_set
    .group_by('id_game', maintain_order=True)
    .agg(
        pl.all().max()
    )
    .select(
        ~cs.contains('id')
    )
    .fold(
        lambda s1, s2: s1 * s2
    )
    .sum()
)

72227