In [2]:
from cookies import *
import requests
import datetime
import pandas as pd
import pytz

In [3]:
def generate_params(day, detector):
    gmt = pytz.timezone('GMT')
    
    start_datetime = gmt.localize(datetime.datetime.combine(day, datetime.time(0, 0)))

    # end_datetime = gmt.localize(datetime.datetime.combine(day, datetime.time(23, 59))) # Comment out below two if you don't want weekly datasets

    end_day = day + datetime.timedelta(days=89) 
    end_datetime = gmt.localize(datetime.datetime.combine(end_day, datetime.time(23, 59)))
    
    s_time_id = int(start_datetime.timestamp())
    e_time_id = int(end_datetime.timestamp())

    params = {
        'report_form': '1',
        'dnode': 'VDS',
        'content': 'loops',
        'export': 'xls',
        'station_id': detector,
        's_time_id': str(s_time_id),
        's_time_id_f': start_datetime.strftime('%m/%d/%Y %H:%M'),
        'e_time_id': str(e_time_id),
        'e_time_id_f': end_datetime.strftime('%m/%d/%Y %H:%M'),
        'tod': 'all',
        'tod_from': '0',
        'tod_to': '0',
        'dow_0': 'on',
        'dow_1': 'on',
        'dow_2': 'on',
        'dow_3': 'on',
        'dow_4': 'on',
        'dow_5': 'on',
        'dow_6': 'on',
        'holidays': 'on',
        'q': 'flow',
        'q2': 'del_60',
        'gn': 'hour',
        'agg': 'on',
        'lane1': 'on',
        'lane2': 'on',
        'lane3': 'on',
    }

    return params

In [4]:
# start_date = datetime.date(2023, 1, 1)
# end_date = datetime.date(2025, 5, 6)
# delta = datetime.timedelta(days=90)

# current = start_date
# while current <= end_date:
#     detector = '426453'
#     params = generate_params(current, detector = detector)
#     response = requests.get('https://pems.dot.ca.gov/', params=params, cookies=cookies, headers=headers)

#     filename = f"./dumbarton_det_files_update/oppo 3300' W of toll Plaza {detector}/{str(current)}_{detector}.xlsx"
#     with open(filename, 'wb') as file:
#         file.write(response.content)

#     print(f'{filename} created successfully.')
#     current += delta

In [12]:
start_date = datetime.date(2022, 1, 1)
end_date   = datetime.date(2025, 5, 6)
delta      = datetime.timedelta(days=90)

combined_chunks = []

current = start_date
while current <= end_date:
    detector = '426469'
    params   = generate_params(current, detector=detector)
    r        = requests.get('https://pems.dot.ca.gov/', params=params, cookies=cookies, headers=headers)

    # write out the raw file
    folder   = f"./dumbarton_det_files_update/3300' W of toll Plaza {detector}"
    filename = f"{folder}/{current}_{detector}.xlsx"
    with open(filename, 'wb') as f:
        f.write(r.content)

    # immediately read it into a DataFrame and store
    df = pd.read_excel(filename)
    combined_chunks.append(df)

    print(f"{filename} created and loaded ({len(df)} rows).")
    current += delta

# once the loop is done, concat all your chunks
combined_df = pd.concat(combined_chunks, ignore_index=True)

# save the single combined file
out_path = f"./3300TollPlaza.xlsx"
combined_df.to_excel(out_path, index=False)
print(f"All files concatenated into {combined_df.shape[0]} rows × {combined_df.shape[1]} cols, saved to {out_path}.")


./dumbarton_det_files_update/3300' W of toll Plaza 426469/2022-01-01_426469.xlsx created and loaded (2159 rows).
./dumbarton_det_files_update/3300' W of toll Plaza 426469/2022-04-01_426469.xlsx created and loaded (2160 rows).
./dumbarton_det_files_update/3300' W of toll Plaza 426469/2022-06-30_426469.xlsx created and loaded (2160 rows).
./dumbarton_det_files_update/3300' W of toll Plaza 426469/2022-09-28_426469.xlsx created and loaded (2160 rows).
./dumbarton_det_files_update/3300' W of toll Plaza 426469/2022-12-27_426469.xlsx created and loaded (2159 rows).
./dumbarton_det_files_update/3300' W of toll Plaza 426469/2023-03-27_426469.xlsx created and loaded (2160 rows).
./dumbarton_det_files_update/3300' W of toll Plaza 426469/2023-06-25_426469.xlsx created and loaded (2160 rows).
./dumbarton_det_files_update/3300' W of toll Plaza 426469/2023-09-23_426469.xlsx created and loaded (2160 rows).
./dumbarton_det_files_update/3300' W of toll Plaza 426469/2023-12-22_426469.xlsx created and loa