# Metro Bike Trip-Data Downloader
Downloads every quarterly ZIP from
https://bikeshare.metro.net/about/data/  ➜  ./trip_data

In [11]:
### 1 Install / import packages  (run once)
# pip install -r requirements.txt

import os, re, time, requests, zipfile, textwrap
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from tqdm import tqdm


In [12]:
# 2 Config – set link, directory
PAGE_URL   = "https://bikeshare.metro.net/about/data/"
TARGET_DIR = "../src/scraper/trip_data"
ZIP_REGEX  = re.compile(r"trips?.*\.zip$", re.I)

os.makedirs(TARGET_DIR, exist_ok=True)


In [13]:
# DEBUG – print the first 700 characters that Requests sees
r = requests.get(
    PAGE_URL,
    headers={
        "User-Agent": "Mozilla/5.0 (compatible; MetroBikeCrawler/1.0)",
        "Accept": "text/html",
    },
    timeout=30,
)
print("HTTP status:", r.status_code)
print(textwrap.shorten(r.text.replace("\n", " "), width=700, placeholder=" […] "))


HTTP status: 200
<!doctype html> <!--[if lt IE 7]><html lang="en-US" class="no-js lt-ie9 lt-ie8 lt-ie7"><![endif]--> <!--[if (IE 7)&!(IEMobile)]><html lang="en-US" class="no-js lt-ie9 lt-ie8"><![endif]--> <!--[if (IE 8)&!(IEMobile)]><html lang="en-US" class="no-js lt-ie9"><![endif]--> <!--[if gt IE 8]><!--> <html lang="en-US" class="no-js"> <!--<![endif]--> <head> <meta charset="utf-8"> <meta http-equiv="X-UA-Compatible" content="IE=edge"> <script type="text/javascript"> /* <![CDATA[ */ var […] 


In [21]:
# 3 Helper: collect_zip_links – more tolerant regex & proper browser-like headers

ZIP_RE = re.compile(r"trips?.*\.zip$", re.I)
TRIP_KEY = "trips"

def collect_zip_links(page_url=PAGE_URL):
    sess = requests.Session()
    sess.headers.update({
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                      "AppleWebKit/537.36 (KHTML, like Gecko) "
                      "Chrome/124.0.0.0 Safari/537.36",
        "Accept-Language": "en-US,en;q=0.9",
        "Accept": "text/html",
    })
    html = sess.get(page_url, timeout=30).text
    soup = BeautifulSoup(html, "html.parser")

    links = {
        urljoin(page_url, a["href"])
        for a in soup.select("a[href]")
        if ZIP_RE.search(a["href"]) and TRIP_KEY in a["href"].lower()
    }
    return sorted(links, reverse=True)

zip_urls = collect_zip_links()
print(f"Found {len(zip_urls)} ZIP files")
zip_urls[:5]


Found 35 ZIP files


['https://bikeshare.metro.net/wp-content/uploads/2025/04/metro-trips-2025-q1.zip',
 'https://bikeshare.metro.net/wp-content/uploads/2025/01/metro-trips-2024-q4.zip',
 'https://bikeshare.metro.net/wp-content/uploads/2024/10/metro-trips-2024-q3.zip',
 'https://bikeshare.metro.net/wp-content/uploads/2024/07/metro-trips-2024-q2.zip',
 'https://bikeshare.metro.net/wp-content/uploads/2024/04/metro-trips-2024-q1.zip']

In [22]:
# 4 Helper: download one ZIP with streaming + progress bar
def download_zip(url, dst_dir=TARGET_DIR, session=None):
    fname = os.path.join(dst_dir, os.path.basename(url))
    if os.path.exists(fname):
        print("✓", os.path.basename(fname), "already present – skipped")
        return fname

    sess = session or requests.Session()
    with sess.get(url, stream=True, timeout=120) as r:
        r.raise_for_status()
        total = int(r.headers.get("content-length", 0))
        tmp   = fname + ".part"
        with open(tmp, "wb") as f, tqdm(total=total, unit="B", unit_scale=True,
                                        desc=os.path.basename(fname)) as bar:
            for chunk in r.iter_content(1 << 15):
                f.write(chunk)
                bar.update(len(chunk))
    os.rename(tmp, fname)
    print("✓", os.path.basename(fname), "downloaded")
    return fname


In [23]:
# 5 Download: all ZIPs
with requests.Session() as sess:
    sess.headers["User-Agent"] = "metro-bikeshare-crawler/1.0"
    for url in zip_urls:
        try:
            download_zip(url, session=sess)
        except Exception as exc:
            print("Exception: ", url, exc)
            time.sleep(5)


metro-trips-2025-q1.zip: 100%|██████████| 1.96M/1.96M [00:00<00:00, 13.5MB/s]


✓ metro-trips-2025-q1.zip downloaded


metro-trips-2024-q4.zip: 100%|██████████| 2.48M/2.48M [00:00<00:00, 15.6MB/s]


✓ metro-trips-2024-q4.zip downloaded


metro-trips-2024-q3.zip: 100%|██████████| 2.75M/2.75M [00:00<00:00, 6.05MB/s]


✓ metro-trips-2024-q3.zip downloaded


metro-trips-2024-q2.zip: 100%|██████████| 2.94M/2.94M [00:00<00:00, 7.50MB/s]


✓ metro-trips-2024-q2.zip downloaded


metro-trips-2024-q1.zip: 100%|██████████| 2.46M/2.46M [00:00<00:00, 10.9MB/s]


✓ metro-trips-2024-q1.zip downloaded


metro-trips-2023-q4.zip: 100%|██████████| 2.66M/2.66M [00:00<00:00, 5.40MB/s]


✓ metro-trips-2023-q4.zip downloaded


metro-trips-2023-q3.zip: 100%|██████████| 2.68M/2.68M [00:00<00:00, 6.50MB/s]


✓ metro-trips-2023-q3.zip downloaded


metro-trips-2023-q2.zip: 100%|██████████| 2.23M/2.23M [00:00<00:00, 9.06MB/s]


✓ metro-trips-2023-q2.zip downloaded


metro-trips-2023-q1.zip: 100%|██████████| 1.47M/1.47M [00:00<00:00, 14.4MB/s]


✓ metro-trips-2023-q1.zip downloaded


metro-trips-2022-q4.zip: 100%|██████████| 1.62M/1.62M [00:00<00:00, 5.40MB/s]


✓ metro-trips-2022-q4.zip downloaded


metro-trips-2022-q3.zip: 100%|██████████| 1.71M/1.71M [00:00<00:00, 5.02MB/s]


✓ metro-trips-2022-q3.zip downloaded


metro-trips-2022-q2.zip: 100%|██████████| 1.38M/1.38M [00:00<00:00, 16.4MB/s]


✓ metro-trips-2022-q2.zip downloaded


metro-trips-2022-q1.zip: 100%|██████████| 1.12M/1.12M [00:00<00:00, 5.39MB/s]


✓ metro-trips-2022-q1.zip downloaded


metro-trips-2021-q4.zip: 100%|██████████| 1.27M/1.27M [00:00<00:00, 7.04MB/s]


✓ metro-trips-2021-q4.zip downloaded


la_metro_gbfs_trips_Q1_2017-2.zip: 100%|██████████| 577k/577k [00:00<00:00, 5.11MB/s]


✓ la_metro_gbfs_trips_Q1_2017-2.zip downloaded


MetroBikeShare_2016_Q3_trips-2.zip: 100%|██████████| 900k/900k [00:00<00:00, 16.3MB/s]


✓ MetroBikeShare_2016_Q3_trips-2.zip downloaded


metro-trips-2021-q3.zip: 100%|██████████| 1.16M/1.16M [00:00<00:00, 3.97MB/s]


✓ metro-trips-2021-q3.zip downloaded


metro-trips-2021-q2.zip: 100%|██████████| 1.63M/1.63M [00:00<00:00, 6.01MB/s]


✓ metro-trips-2021-q2.zip downloaded


metro-trips-2021-q1-1.zip: 100%|██████████| 819k/819k [00:00<00:00, 4.77MB/s]


✓ metro-trips-2021-q1-1.zip downloaded


metro-trips-2020-q2-v2.zip: 100%|██████████| 1.05M/1.05M [00:00<00:00, 6.85MB/s]


✓ metro-trips-2020-q2-v2.zip downloaded


metro-trips-2020-q4.zip: 100%|██████████| 751k/751k [00:00<00:00, 7.34MB/s]


✓ metro-trips-2020-q4.zip downloaded


metro-trips-2020-q3.zip: 100%|██████████| 828k/828k [00:00<00:00, 4.35MB/s]


✓ metro-trips-2020-q3.zip downloaded


metro-bike-share-trips-2020-q1.zip: 100%|██████████| 1.45M/1.45M [00:00<00:00, 19.7MB/s]


✓ metro-bike-share-trips-2020-q1.zip downloaded


metro-bike-share-trips-2019-q4.csv.zip: 100%|██████████| 1.76M/1.76M [00:01<00:00, 1.27MB/s]


✓ metro-bike-share-trips-2019-q4.csv.zip downloaded


metro-bike-share-trips-2019-q3-1.zip: 100%|██████████| 1.75M/1.75M [00:00<00:00, 4.43MB/s]


✓ metro-bike-share-trips-2019-q3-1.zip downloaded


metro-bike-share-trips-2019-q2.csv.zip: 100%|██████████| 1.29M/1.29M [00:00<00:00, 4.82MB/s]


✓ metro-bike-share-trips-2019-q2.csv.zip downloaded


metro-bike-share-trips-2019-q1.csv.zip: 100%|██████████| 1.11M/1.11M [00:00<00:00, 5.87MB/s]


✓ metro-bike-share-trips-2019-q1.csv.zip downloaded


metro-bike-share-trips-2018-q4.csv.zip: 100%|██████████| 1.41M/1.41M [00:00<00:00, 4.48MB/s]


✓ metro-bike-share-trips-2018-q4.csv.zip downloaded


metro-bike-share-trips-2018-q3.csv.zip: 100%|██████████| 1.78M/1.78M [00:00<00:00, 5.38MB/s]


✓ metro-bike-share-trips-2018-q3.csv.zip downloaded


metro-bike-share-trips-2016-q4.csv.zip: 100%|██████████| 772k/772k [00:00<00:00, 21.8MB/s]


✓ metro-bike-share-trips-2016-q4.csv.zip downloaded


metro-bike-share-trips-2018-q2.csv.zip: 100%|██████████| 1.45M/1.45M [00:00<00:00, 5.41MB/s]


✓ metro-bike-share-trips-2018-q2.csv.zip downloaded


metro-bike-share-trips-2018-q1.csv.zip: 100%|██████████| 1.23M/1.23M [00:00<00:00, 4.14MB/s]


✓ metro-bike-share-trips-2018-q1.csv.zip downloaded


metro-bike-share-trips-2017-q4-v2.csv.zip: 100%|██████████| 1.39M/1.39M [00:00<00:00, 7.29MB/s]


✓ metro-bike-share-trips-2017-q4-v2.csv.zip downloaded


la_metro_gbfs_trips_Q2_2017.csv.zip: 100%|██████████| 1.12M/1.12M [00:00<00:00, 4.62MB/s]


✓ la_metro_gbfs_trips_Q2_2017.csv.zip downloaded


metro-bike-share-trips-2017-q3.csv.zip: 100%|██████████| 1.21M/1.21M [00:00<00:00, 13.4MB/s]

✓ metro-bike-share-trips-2017-q3.csv.zip downloaded





In [24]:
# Optional: quick integrity check of the ZIPs
def test_zip(path):
    with zipfile.ZipFile(path) as zf:
        bad = zf.testzip()
        return bad is None

bad_files = [f for f in os.listdir(TARGET_DIR)
             if f.endswith(".zip") and not test_zip(os.path.join(TARGET_DIR, f))]
print("Corrupt ZIPs:", bad_files or "None =)")


Corrupt ZIPs: None 🎉
