In [1]:
import pandas as pd
import numpy as np
from dataclasses import dataclass
from pathlib import Path

# Problem 1

In [2]:
# When is the next bus arriving after "timestamp"?

with open('bus.txt') as fp:
    data = fp.read().splitlines()
timestamp = int(data[0])
bus_schedule = [ int(t) for t in data[1].split(',') if t != 'x' ] 

def next_bus(timestamp, cadence):
    return timestamp + (cadence - timestamp % cadence)

bus = pd.Series(bus_schedule, name = 'bus')
depart_times = pd.Series([next_bus(timestamp, cadence) 
                          for cadence in bus_schedule ],
                        name ='depart_times')
wait_times= pd.Series( [depart_time -  timestamp 
                        for depart_time in depart_times ],
                     name='wait_times')

df = pd.concat([bus, depart_times, wait_times], axis=1)
df

Unnamed: 0,bus,depart_times,wait_times
0,17,1014526,15
1,41,1014545,34
2,643,1014654,143
3,23,1014530,19
4,13,1014520,9
5,29,1014536,25
6,433,1014519,8
7,37,1014540,29
8,19,1014524,13


In [3]:
best_idx = df.depart_times.idxmin()
df.at[best_idx, 'bus'] *  df.at[best_idx, 'wait_times'] 

3464

# Problem 2 

In [4]:
def load_data(data):
    bus_schedule = data.split(',')
    df = pd.Series(data=bus_schedule, name="cadence", dtype='object').to_frame()
    df['time_offset'] = pd.Series(data=df.index.copy())
    df = df.query('cadence != "x"').reset_index(drop=True)
    df.cadence = df.cadence.astype(int)
    df = df.set_index(df.cadence.copy())
    return df

data = Path('bus.txt').read_text().splitlines()[1]

Maybe this is cheating, but I peeked at my data and...
something is weird here. Some time offsets
are longer than the bus cadence and could be moved back 
one heartbeat without changing the solution to the problem.

In [5]:
df = load_data(data)
df

Unnamed: 0_level_0,cadence,time_offset
cadence,Unnamed: 1_level_1,Unnamed: 2_level_1
17,17,0
41,41,7
643,643,17
23,23,25
13,13,30
29,29,46
433,433,48
37,37,54
19,19,67


In [6]:
index = (df.time_offset > df.cadence)
df.loc[index, 'time_offset'] = df.loc[index, 'time_offset'] % df.loc[index, 'cadence']
df

Unnamed: 0_level_0,cadence,time_offset
cadence,Unnamed: 1_level_1,Unnamed: 2_level_1
17,17,0
41,41,7
643,643,17
23,23,2
13,13,4
29,29,17
433,433,48
37,37,17
19,19,10


Now we see why they obfuscated the input.... there's four buses 
that meet at time equal negative 17...and we can start our clock 
there and simplify things.

Once we set our clock to -17, we should only consider times where 
all those buses we've already solved are synchronized (so, 
the clock ticks at the least-common-multiple of all the buses 
that have been solved).

In [7]:
@dataclass
class ProblemTwoClock:
    now: int = 0
    cadence: int = 0
        
    def tick(self):
        self.now += self.cadence
        return self.now
    
    def add_bus_cadence(self, bus):
        self.cadence = np.lcm(self.cadence, bus)
        

In [8]:
buses_to_merge = df.query('cadence == 17 or time_offset == 17').index
buses_to_merge

Int64Index([17, 643, 29, 37], dtype='int64', name='cadence')

In [9]:
new_cadence = np.lcm.reduce(buses_to_merge)
clock = ProblemTwoClock(now=-17, cadence=new_cadence)

In [10]:
df = df.drop(index=buses_to_merge)
df

Unnamed: 0_level_0,cadence,time_offset
cadence,Unnamed: 1_level_1,Unnamed: 2_level_1
41,41,7
23,23,2
13,13,4
433,433,48
19,19,10


The rest of the buses need to be solved iteratively.
For each "heartbeat" of the clock, see if any of the remaining
buses are in the right place. If so, add them to the 
clock cadence and remove them from the search list.

Repeat until search list is empty, then print the time on the
clock.

In [11]:
while (df.shape[0] > 0):
    time = clock.tick()
    
    check_list = df.cadence.to_list()
    for bus in check_list:
        cadence, time_offset = df.loc[bus, :]
        if (time + time_offset) % cadence == 0:
            # a match!
            clock.add_bus_cadence(cadence)
            df = df.drop(index=[bus])

In [12]:
clock.now

760171380521445