<a href="https://colab.research.google.com/github/calebarr/AIS/blob/main/AIS_Downloader_TeamShared.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Mount Google Drive
# from google.colab import drive
import os
from datetime import datetime, timedelta
import requests
import zipfile
import pandas as pd

# print("Mounting Google Drive...")
# drive.mount('/content/drive')

In [None]:
# AIS Data Downloader - Google Colab Version (Team-Ready)


# Define the AIS downloader function
def download_ais_data(start_date_str, end_date_str, save_folder):
    os.makedirs(save_folder, exist_ok=True)
    print(f"Files will be saved to: {save_folder}")

    # Convert string dates to datetime objects
    start_date = datetime.strptime(start_date_str, "%Y-%m-%d")
    end_date = datetime.strptime(end_date_str, "%Y-%m-%d")

    start_year = start_date.year
    end_year = end_date.year
    if start_year != end_year:
        raise ValueError("Start and end dates must be in the same year for this script.")

    # Generate the date list
    dates_to_download = [
        (start_date + timedelta(days=i)).strftime("%Y-%m-%d")
        for i in range((end_date - start_date).days + 1)
    ]

    # Download each file in the date range
    for date_str in dates_to_download:
        date_obj = datetime.strptime(date_str, "%Y-%m-%d")
        filename = f"AIS_{date_obj.strftime('%Y_%m_%d')}.zip"
        # GS: Adjusted url by removing hardocded 2020 year and using start_year instead
        url = f"https://coast.noaa.gov/htdata/CMSP/AISDataHandler/{start_year}/{filename}"

        print(f"Downloading {filename}...")
        response = requests.get(url)

        if response.status_code == 200:
            file_path = os.path.join(save_folder, filename)
            with open(file_path, "wb") as f:
                f.write(response.content)
            print(f"Saved: {file_path}")
        else:
            print(f"Failed to download {filename} (HTTP {response.status_code})")

# Now call the function with your chosen parameters
download_ais_data(
    start_date_str="2020-01-05",
    end_date_str="2020-01-06",
    save_folder="../assets/ais_data"
)




Files will be saved to: ../assets/ais_data
Downloading AIS_2020_01_05.zip...
Saved: ../assets/ais_data\AIS_2020_01_05.zip
Downloading AIS_2020_01_06.zip...


In [None]:
# Step 2 - Unzip the file and put it in a dataframe

def unzip_and_load_data(zip_file_path, dataframe=None):
    if dataframe is None:
        dataframe = pd.DataFrame()
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(os.path.dirname(zip_file_path))
        for file_name in zip_ref.namelist():
            if file_name.endswith('.csv'):
                csv_file_path = os.path.join(os.path.dirname(zip_file_path), file_name)
                print(f"Loading data from {csv_file_path}...")
                temp_df = pd.read_csv(csv_file_path, low_memory=False)
                dataframe = pd.concat([dataframe, temp_df], ignore_index=True)
    return dataframe


In [None]:
data = unzip_and_load_data(
    zip_file_path="../assets/ais_data/AIS_2020_01_05.zip"
)

data.head()