# AIS Vessel Data Download and Aggregating Tool

In [1]:
# These libraries are built-in to Python
import zipfile
import os

# These are packages you need to install
import requests
from bs4 import BeautifulSoup
import pandas as pd

 **Downloads all the daily files for the selected year and month the AIS website**

In [2]:
def download_zip_files(year: str, month: str):
    url = f"https://coast.noaa.gov/htdata/CMSP/AISDataHandler/{year}/index.html"
    page = os.path.dirname(url)
    # Directory to save the downloaded files
    download_dir = month + '-' + year + '-zip'
    os.makedirs(download_dir, exist_ok=True)
    # Fetch the page content
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    # Find all links on the page
    links = soup.find_all('a', href=True)
    # Filter and download files that match the regex pattern
    for link in links:
        file_url = link['href']
        filename = os.path.basename(file_url)
        # Check if the filename matches the regex pattern
        try:
            link_month = filename.split('_')[2]
        except IndexError:
            continue
        if link_month == month:
            print(f"Downloading {filename}...")
            download_link = os.path.join(page, filename)
            file_response = requests.get(download_link)
            # Save the file to the download directory
            with open(os.path.join(download_dir, filename), 'wb') as f:
                f.write(file_response.content)
    print("Download complete.")


**Unzips all your downloaded files**

In [3]:
def unzip_files(year: str, month: str):
    # Directory containing the zip files
    zip_dir = month + '-' + year + '-zip'
    # Directory to extract the contents to
    extract_dir = month + '-' + year
    os.makedirs(extract_dir, exist_ok=True)

    # Iterate over all files in the directory
    for filename in os.listdir(zip_dir):
        if filename.endswith(".zip"):
            zip_path = os.path.join(zip_dir, filename)
            print(f"Unzipping {filename}...")

            # Open the zip file
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                # Extract all the contents to the specified directory
                zip_ref.extractall(extract_dir)

    print("Unzipping complete.")

In [4]:
def shrink_dataset(df):
    df['BaseDateTime'] = pd.to_datetime(df['BaseDateTime'], format='%Y-%m-%dT%H:%M:%S')
    filtered = df[df['BaseDateTime'].dt.hour.isin([0, 6, 12, 18])] # Select 0, 6, 12, 18 hours 
    filtered = filtered[filtered['BaseDateTime'].dt.minute.between(0, 1)] # Select 0 - 1 minutes
    filtered = filtered[filtered['BaseDateTime'].dt.second.between(0, 59)] # Select 0 - 59 seconds
    return filtered

In [28]:
year = '2022' # Put year here
month = '09' # Put month here

# download_zip_files(year, month)
# unzip_files(year, month)

extract_dir = month + '-' + year

month_day_dfs = []

# Iterate over all files in the directory
for filename in os.listdir(extract_dir):
    if filename.endswith(".csv"):
        file_path = os.path.join(extract_dir, filename)
        print(f"Reading and processing {filename}...")

        df = pd.read_csv(file_path)
        # Filter for vessels with length greater than 200
        df = df[df['Length'] > 200]

        # Filter for specific status codes (70-89, 1003, 1004, 1016, 1017, 1024)
        valid_status_codes = list(range(70, 90)) + [1003, 1004, 1016, 1017, 1024]
        df = df[df['VesselType'].isin(valid_status_codes)]

        df = shrink_dataset(df)

        # Filter out vessels that are not off the U.S. East Coast
        # U.S. East Coast approximate coordinates: Latitude 25-45N, Longitude -80 to -65W
        east_coast_vessels = df[(df['LAT'] >= 25) & (df['LAT'] <= 45) &
                                    (df['LON'] >= -80) & (df['LON'] <= -45)]

        # Append the filtered DataFrame to the list
        month_day_dfs.append(east_coast_vessels)

sept_df = pd.concat(month_day_dfs)

Reading and processing AIS_2022_09_22.csv...
Reading and processing AIS_2022_09_23.csv...
Reading and processing AIS_2022_09_21.csv...
Reading and processing AIS_2022_09_09.csv...
Reading and processing AIS_2022_09_08.csv...
Reading and processing AIS_2022_09_20.csv...
Reading and processing AIS_2022_09_18.csv...
Reading and processing AIS_2022_09_30.csv...
Reading and processing AIS_2022_09_24.csv...
Reading and processing AIS_2022_09_25.csv...
Reading and processing AIS_2022_09_19.csv...
Reading and processing AIS_2022_09_27.csv...
Reading and processing AIS_2022_09_26.csv...
Reading and processing AIS_2022_09_03.csv...
Reading and processing AIS_2022_09_17.csv...
Reading and processing AIS_2022_09_16.csv...
Reading and processing AIS_2022_09_02.csv...
Reading and processing AIS_2022_09_14.csv...
Reading and processing AIS_2022_09_28.csv...
Reading and processing AIS_2022_09_29.csv...
Reading and processing AIS_2022_09_01.csv...
Reading and processing AIS_2022_09_15.csv...
Reading an

In [29]:
sept_df.head(10)

Unnamed: 0,MMSI,BaseDateTime,LAT,LON,SOG,COG,Heading,VesselName,IMO,CallSign,VesselType,Status,Length,Width,Draft,Cargo,TransceiverClass
63,372400000,2022-09-22 00:00:03,39.35739,-73.9234,0.8,47.0,123.0,MSC MARIA SAVERIA,IMO9467421,3FLE8,70.0,0.0,365.0,48.0,15.5,71.0,A
77,248554000,2022-09-22 00:00:00,25.75294,-79.61893,11.1,182.8,184.0,SEAVIOLET,IMO9790983,9HA4701,80.0,0.0,274.0,48.0,9.3,80.0,A
403,215797000,2022-09-22 00:00:00,36.87735,-75.61022,0.9,17.2,90.0,KONSTANTINA,IMO8906755,9HBG8,70.0,0.0,236.0,32.0,12.0,71.0,A
449,316023339,2022-09-22 00:00:02,43.14636,-79.19246,3.3,175.1,177.0,THUNDER BAY,IMO9601039,CFN6288,70.0,0.0,225.0,23.0,14.8,70.0,A
478,636015975,2022-09-22 00:00:03,36.91007,-75.38251,0.0,78.3,169.0,AGIOS DIMITRIOS,IMO9349605,D5DU8,71.0,1.0,299.0,40.0,11.9,71.0,A
542,220594000,2022-09-22 00:00:04,36.98413,-76.14792,10.5,112.1,113.0,GUNDE MAERSK,IMO9359014,OUIY2,70.0,0.0,366.0,42.0,15.5,71.0,A
552,636020952,2022-09-22 00:00:07,35.49153,-74.96687,6.1,6.5,9.0,MSC SAMIRA III,IMO9434462,5LBN8,71.0,0.0,212.0,30.0,10.1,71.0,A
556,248264000,2022-09-22 00:00:03,37.06923,-73.22968,7.0,262.6,238.0,TUGELA,IMO9505065,9HA2292,70.0,0.0,229.0,32.0,11.3,70.0,A
602,636013689,2022-09-22 00:00:01,36.59468,-75.19926,0.5,77.6,82.0,YM UBERTY,IMO9337444,A8OR4,70.0,0.0,333.0,42.0,14.5,71.0,A
684,538006047,2022-09-22 00:00:01,39.60901,-75.57445,14.7,334.8,335.0,PAMPERO,IMO9689548,V7JG5,80.0,0.0,226.0,,,80.0,A


In [30]:
sept_df.shape

(17741, 17)

In [31]:
unique_vessels_sept = sept_df.nunique()['MMSI']
unique_vessels_sept

800

In [32]:
sept_df_copy = sept_df.copy()

In [33]:
# Step 1: Round the timestamp to the nearest 6-hour mark
# This uses rounding in pandas: "H" means to round to the nearest hour, and 6H rounds to the nearest 6 hours
sept_df_copy['RoundedTime'] = sept_df_copy['BaseDateTime'].dt.round('6H')

# Step 2: Drop duplicates based on MMSI and the rounded time
# This keeps the first occurrence of each MMSI at the rounded 6-hour interval
deduplicated_sept_df = sept_df_copy.drop_duplicates(subset=['MMSI', 'RoundedTime'])

deduplicated_sept_df['BaseDateTime'] = deduplicated_sept_df['RoundedTime']
deduplicated_sept_df.drop(columns=['RoundedTime'], inplace=True)

  sept_df_copy['RoundedTime'] = sept_df_copy['BaseDateTime'].dt.round('6H')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  deduplicated_sept_df['BaseDateTime'] = deduplicated_sept_df['RoundedTime']
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  deduplicated_sept_df.drop(columns=['RoundedTime'], inplace=True)


In [34]:
deduplicated_sept_df.shape

(12061, 17)

In [35]:
deduplicated_sept_df.head(10)

Unnamed: 0,MMSI,BaseDateTime,LAT,LON,SOG,COG,Heading,VesselName,IMO,CallSign,VesselType,Status,Length,Width,Draft,Cargo,TransceiverClass
63,372400000,2022-09-22,39.35739,-73.9234,0.8,47.0,123.0,MSC MARIA SAVERIA,IMO9467421,3FLE8,70.0,0.0,365.0,48.0,15.5,71.0,A
77,248554000,2022-09-22,25.75294,-79.61893,11.1,182.8,184.0,SEAVIOLET,IMO9790983,9HA4701,80.0,0.0,274.0,48.0,9.3,80.0,A
403,215797000,2022-09-22,36.87735,-75.61022,0.9,17.2,90.0,KONSTANTINA,IMO8906755,9HBG8,70.0,0.0,236.0,32.0,12.0,71.0,A
449,316023339,2022-09-22,43.14636,-79.19246,3.3,175.1,177.0,THUNDER BAY,IMO9601039,CFN6288,70.0,0.0,225.0,23.0,14.8,70.0,A
478,636015975,2022-09-22,36.91007,-75.38251,0.0,78.3,169.0,AGIOS DIMITRIOS,IMO9349605,D5DU8,71.0,1.0,299.0,40.0,11.9,71.0,A
542,220594000,2022-09-22,36.98413,-76.14792,10.5,112.1,113.0,GUNDE MAERSK,IMO9359014,OUIY2,70.0,0.0,366.0,42.0,15.5,71.0,A
552,636020952,2022-09-22,35.49153,-74.96687,6.1,6.5,9.0,MSC SAMIRA III,IMO9434462,5LBN8,71.0,0.0,212.0,30.0,10.1,71.0,A
556,248264000,2022-09-22,37.06923,-73.22968,7.0,262.6,238.0,TUGELA,IMO9505065,9HA2292,70.0,0.0,229.0,32.0,11.3,70.0,A
602,636013689,2022-09-22,36.59468,-75.19926,0.5,77.6,82.0,YM UBERTY,IMO9337444,A8OR4,70.0,0.0,333.0,42.0,14.5,71.0,A
684,538006047,2022-09-22,39.60901,-75.57445,14.7,334.8,335.0,PAMPERO,IMO9689548,V7JG5,80.0,0.0,226.0,,,80.0,A


In [36]:
year = '2022' # Put year here
month = '08' # Put month here

download_zip_files(year, month)
unzip_files(year, month)

extract_dir = month + '-' + year

month_day_dfs = []

# Iterate over all files in the directory
for filename in os.listdir(extract_dir):
    if filename.endswith(".csv"):
        file_path = os.path.join(extract_dir, filename)
        print(f"Reading and processing {filename}...")

        df = pd.read_csv(file_path)
        # Filter for vessels with length greater than 200
        df = df[df['Length'] > 200]

        # Filter for specific status codes (70-89, 1003, 1004, 1016, 1017, 1024)
        valid_status_codes = list(range(70, 90)) + [1003, 1004, 1016, 1017, 1024]
        df = df[df['VesselType'].isin(valid_status_codes)]

        df = shrink_dataset(df)

        # Filter out vessels that are not off the U.S. East Coast
        # U.S. East Coast approximate coordinates: Latitude 25-45N, Longitude -80 to -65W
        east_coast_vessels = df[(df['LAT'] >= 25) & (df['LAT'] <= 45) &
                                    (df['LON'] >= -80) & (df['LON'] <= -45)]

        # Append the filtered DataFrame to the list
        month_day_dfs.append(east_coast_vessels)

aug_df = pd.concat(month_day_dfs)

Downloading AIS_2022_08_01.zip...
Downloading AIS_2022_08_02.zip...
Downloading AIS_2022_08_03.zip...
Downloading AIS_2022_08_04.zip...
Downloading AIS_2022_08_05.zip...
Downloading AIS_2022_08_06.zip...
Downloading AIS_2022_08_07.zip...
Downloading AIS_2022_08_08.zip...
Downloading AIS_2022_08_09.zip...
Downloading AIS_2022_08_10.zip...
Downloading AIS_2022_08_11.zip...
Downloading AIS_2022_08_12.zip...
Downloading AIS_2022_08_13.zip...
Downloading AIS_2022_08_14.zip...
Downloading AIS_2022_08_15.zip...
Downloading AIS_2022_08_16.zip...
Downloading AIS_2022_08_17.zip...
Downloading AIS_2022_08_18.zip...
Downloading AIS_2022_08_19.zip...
Downloading AIS_2022_08_20.zip...
Downloading AIS_2022_08_21.zip...
Downloading AIS_2022_08_22.zip...
Downloading AIS_2022_08_23.zip...
Downloading AIS_2022_08_24.zip...
Downloading AIS_2022_08_25.zip...
Downloading AIS_2022_08_26.zip...
Downloading AIS_2022_08_27.zip...
Downloading AIS_2022_08_28.zip...
Downloading AIS_2022_08_29.zip...
Downloading AI

In [37]:
aug_df.head(10)

Unnamed: 0,MMSI,BaseDateTime,LAT,LON,SOG,COG,Heading,VesselName,IMO,CallSign,VesselType,Status,Length,Width,Draft,Cargo,TransceiverClass
172,215219000,2022-08-29 00:00:02,41.74747,-66.37743,20.7,231.5,230.0,CMA CGM VON HUMBOLDT,IMO9454448,9HA5017,71.0,0.0,396.0,54.0,13.3,71.0,A
191,211578000,2022-08-29 00:00:01,33.21588,-77.89171,15.1,59.7,62.0,ULSAN EXPRESS,IMO9613020,DDOQ2,70.0,0.0,366.0,48.0,15.5,70.0,A
225,636016305,2022-08-29 00:00:02,40.38418,-72.09436,11.8,89.8,89.0,SKYROS,IMO9695016,D5FL8,70.0,0.0,299.0,48.0,14.5,71.0,A
263,256930000,2022-08-29 00:00:01,36.23033,-74.68967,16.9,13.0,11.0,COSCO YANTIAN,IMO9305594,9HA4039,70.0,0.0,350.0,42.0,14.5,74.0,A
292,477690600,2022-08-29 00:00:06,36.81076,-75.57026,11.6,89.0,91.0,SEASPAN RIO DE JANEI,IMO9301847,VRCR9,70.0,0.0,260.0,32.0,12.8,70.0,A
373,563445000,2022-08-29 00:00:00,40.13976,-73.82889,12.0,181.4,178.0,STARLING,IMO9737527,9V3666,80.0,0.0,228.0,,,89.0,A
411,215131000,2022-08-29 00:00:04,35.93338,-75.03182,15.7,3.5,356.0,CMA CGM LYRA,IMO9410806,9HA4972,71.0,0.0,363.0,46.0,14.4,71.0,A
418,371243000,2022-08-29 00:00:03,38.95271,-76.38635,15.4,187.5,186.0,MSC KOREA,IMO9123154,3EBW3,70.0,0.0,242.0,32.0,11.7,71.0,A
449,563000800,2022-08-29 00:00:04,37.9918,-74.15974,17.9,191.9,191.0,MAERSK SARNIA,IMO9289946,9V5244,70.0,0.0,335.0,42.0,14.0,70.0,A
489,477232200,2022-08-29 00:00:01,36.87544,-76.34925,0.0,184.5,347.0,COSCO SAKURA,IMO9785794,VRSG3,71.0,5.0,366.0,48.0,13.1,71.0,A


In [38]:
aug_df.shape

(18421, 17)

In [39]:
unique_vessels_aug = aug_df.nunique()['MMSI']
unique_vessels_aug

856

In [40]:
aug_df_copy = aug_df.copy()

In [41]:
# Step 1: Round the timestamp to the nearest 6-hour mark
# This uses rounding in pandas: "H" means to round to the nearest hour, and 6H rounds to the nearest 6 hours
aug_df_copy['RoundedTime'] = aug_df_copy['BaseDateTime'].dt.round('6H')

# Step 2: Drop duplicates based on MMSI and the rounded time
# This keeps the first occurrence of each MMSI at the rounded 6-hour interval
deduplicated_aug_df = aug_df_copy.drop_duplicates(subset=['MMSI', 'RoundedTime'])

deduplicated_aug_df["BaseDateTime"] = deduplicated_aug_df["RoundedTime"]
deduplicated_aug_df.drop(columns=['RoundedTime'], inplace=True)

  aug_df_copy['RoundedTime'] = aug_df_copy['BaseDateTime'].dt.round('6H')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  deduplicated_aug_df["BaseDateTime"] = deduplicated_aug_df["RoundedTime"]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  deduplicated_aug_df.drop(columns=['RoundedTime'], inplace=True)


In [42]:
deduplicated_aug_df.shape


(12611, 17)

In [20]:
deduplicated_aug_df.head(10)

Unnamed: 0,MMSI,BaseDateTime,LAT,LON,SOG,COG,Heading,VesselName,IMO,CallSign,VesselType,Status,Length,Width,Draft,Cargo,TransceiverClass
172,215219000,2022-08-29,41.74747,-66.37743,20.7,231.5,230.0,CMA CGM VON HUMBOLDT,IMO9454448,9HA5017,71.0,0.0,396.0,54.0,13.3,71.0,A
191,211578000,2022-08-29,33.21588,-77.89171,15.1,59.7,62.0,ULSAN EXPRESS,IMO9613020,DDOQ2,70.0,0.0,366.0,48.0,15.5,70.0,A
225,636016305,2022-08-29,40.38418,-72.09436,11.8,89.8,89.0,SKYROS,IMO9695016,D5FL8,70.0,0.0,299.0,48.0,14.5,71.0,A
263,256930000,2022-08-29,36.23033,-74.68967,16.9,13.0,11.0,COSCO YANTIAN,IMO9305594,9HA4039,70.0,0.0,350.0,42.0,14.5,74.0,A
292,477690600,2022-08-29,36.81076,-75.57026,11.6,89.0,91.0,SEASPAN RIO DE JANEI,IMO9301847,VRCR9,70.0,0.0,260.0,32.0,12.8,70.0,A
373,563445000,2022-08-29,40.13976,-73.82889,12.0,181.4,178.0,STARLING,IMO9737527,9V3666,80.0,0.0,228.0,,,89.0,A
411,215131000,2022-08-29,35.93338,-75.03182,15.7,3.5,356.0,CMA CGM LYRA,IMO9410806,9HA4972,71.0,0.0,363.0,46.0,14.4,71.0,A
418,371243000,2022-08-29,38.95271,-76.38635,15.4,187.5,186.0,MSC KOREA,IMO9123154,3EBW3,70.0,0.0,242.0,32.0,11.7,71.0,A
449,563000800,2022-08-29,37.9918,-74.15974,17.9,191.9,191.0,MAERSK SARNIA,IMO9289946,9V5244,70.0,0.0,335.0,42.0,14.0,70.0,A
489,477232200,2022-08-29,36.87544,-76.34925,0.0,184.5,347.0,COSCO SAKURA,IMO9785794,VRSG3,71.0,5.0,366.0,48.0,13.1,71.0,A


In [43]:
# Combine August and September datasets
df = pd.concat([deduplicated_aug_df, deduplicated_sept_df], ignore_index=True)

In [44]:
df.head(10)

Unnamed: 0,MMSI,BaseDateTime,LAT,LON,SOG,COG,Heading,VesselName,IMO,CallSign,VesselType,Status,Length,Width,Draft,Cargo,TransceiverClass
0,215219000,2022-08-29,41.74747,-66.37743,20.7,231.5,230.0,CMA CGM VON HUMBOLDT,IMO9454448,9HA5017,71.0,0.0,396.0,54.0,13.3,71.0,A
1,211578000,2022-08-29,33.21588,-77.89171,15.1,59.7,62.0,ULSAN EXPRESS,IMO9613020,DDOQ2,70.0,0.0,366.0,48.0,15.5,70.0,A
2,636016305,2022-08-29,40.38418,-72.09436,11.8,89.8,89.0,SKYROS,IMO9695016,D5FL8,70.0,0.0,299.0,48.0,14.5,71.0,A
3,256930000,2022-08-29,36.23033,-74.68967,16.9,13.0,11.0,COSCO YANTIAN,IMO9305594,9HA4039,70.0,0.0,350.0,42.0,14.5,74.0,A
4,477690600,2022-08-29,36.81076,-75.57026,11.6,89.0,91.0,SEASPAN RIO DE JANEI,IMO9301847,VRCR9,70.0,0.0,260.0,32.0,12.8,70.0,A
5,563445000,2022-08-29,40.13976,-73.82889,12.0,181.4,178.0,STARLING,IMO9737527,9V3666,80.0,0.0,228.0,,,89.0,A
6,215131000,2022-08-29,35.93338,-75.03182,15.7,3.5,356.0,CMA CGM LYRA,IMO9410806,9HA4972,71.0,0.0,363.0,46.0,14.4,71.0,A
7,371243000,2022-08-29,38.95271,-76.38635,15.4,187.5,186.0,MSC KOREA,IMO9123154,3EBW3,70.0,0.0,242.0,32.0,11.7,71.0,A
8,563000800,2022-08-29,37.9918,-74.15974,17.9,191.9,191.0,MAERSK SARNIA,IMO9289946,9V5244,70.0,0.0,335.0,42.0,14.0,70.0,A
9,477232200,2022-08-29,36.87544,-76.34925,0.0,184.5,347.0,COSCO SAKURA,IMO9785794,VRSG3,71.0,5.0,366.0,48.0,13.1,71.0,A


In [45]:
# Sort by 'BaseDateTime' and Reset index
df.sort_values('BaseDateTime', inplace=True)
df.reset_index(drop=True, inplace=True)

In [46]:
df.head(10)

Unnamed: 0,MMSI,BaseDateTime,LAT,LON,SOG,COG,Heading,VesselName,IMO,CallSign,VesselType,Status,Length,Width,Draft,Cargo,TransceiverClass
0,241473000,2022-08-01,40.55946,-69.92838,13.7,267.5,270.0,CHEROKEE,IMO9749491,SVCN2,80.0,0.0,274.0,,,80.0,A
1,368734000,2022-08-01,36.85812,-76.31758,0.0,132.9,274.0,SISLER,IMO9117038,NZIX,70.0,5.0,289.0,32.0,11.3,70.0,A
2,636015411,2022-08-01,38.91655,-75.15314,0.0,122.5,141.0,ALMI GALAXY,IMO9579509,D5AO2,80.0,1.0,274.0,48.0,17.0,80.0,A
3,636021040,2022-08-01,32.5797,-78.83719,9.5,91.1,94.0,SFL HAWAII,IMO9679555,5LBY6,70.0,0.0,270.0,42.0,11.4,70.0,A
4,232012083,2022-08-01,27.05177,-79.38628,12.4,181.0,180.0,HELLESPONT PROTECTOR,IMO9351452,MCCF4,80.0,0.0,228.0,32.0,14.5,80.0,A
5,218776000,2022-08-01,40.50934,-73.66933,0.0,289.6,147.0,NEW YORK EXPRESS,IMO9501332,DIXJ2,70.0,1.0,366.0,48.0,15.5,70.0,A
6,636014629,2022-08-01,36.9618,-76.42385,0.9,44.6,36.0,ZHONG MAY,IMO9420124,A8VM5,70.0,0.0,292.0,45.0,18.3,,A
7,636015014,2022-08-01,26.38338,-78.84152,1.2,221.3,182.0,POLA,IMO9493767,A8YB5,80.0,0.0,274.0,48.0,16.5,,A
8,354918000,2022-08-01,36.87994,-76.33106,0.0,127.3,96.0,NYK MARIA,IMO9406764,3FER3,70.0,5.0,210.0,30.0,11.5,70.0,A
9,366698000,2022-08-01,32.85027,-79.94024,0.0,99.3,55.0,CAPE DECISION,IMO7310911,WMGD,70.0,5.0,207.0,32.0,6.8,70.0,A


In [47]:
df.shape

(24672, 17)

In [48]:
unique_vessels = df.nunique()['MMSI']
unique_vessels

1229

In [49]:
# Export csv and upload to github
df.to_csv('vessel_data1.csv', index=False)