# East Atlantic Coast Aquatic Invasive Species(AIS) Monitoring Program

CKAN record: https://catalogue.cioos.ca/dataset/ca-cioos_b54e1292-7483-4730-9873-4df055bd7edb

In [2]:
from erddapy import ERDDAP
import os
import pandas as pd
from tqdm.notebook import tqdm

In [25]:
e = ERDDAP(
    server = "https://erddap.ogsl.ca/erddap",
    protocol = "tabledap"
)

In [26]:
e.dataset_id = "mpoEaeTemperature"
e.variables = ['time', 'location', 'sea_water_temperature' ]

In [50]:
os.makedirs('data', exist_ok=True)

csvfile = f"data/ea_ais_tempdata.csv.gz"

if not os.path.exists(csvfile):
    print("Downloading...", end='')
    df = e.to_pandas()
    df.to_csv(csvfile, compression='gzip', index=False)
    print("Done.")
else:
    df = pd.read_csv(csvfile)

df = df.rename(columns={'time (UTC)': 'time',
           'location (unitless)': 'location',
           'sea_water_temperature (degree_C)':'sea_water_temperature'})

# Ensure the date column is in datetime format
df['time'] = pd.to_datetime(df['time'])

In [51]:
df.sample(5)

Unnamed: 0,time,location,sea_water_temperature
204784,2019-06-09 11:45:00+00:00,Baie des Belles Amours,2.624
2125952,2018-07-27 09:00:00+00:00,Port de Gros Cacouna,11.819
2801364,2016-09-29 15:30:00+00:00,Quai des pecheurs de Cap-aux-Meules,13.558
1565647,2010-06-28 08:38:43+00:00,Marina de Sept-iles,9.275
919367,2015-08-31 06:42:50+00:00,Marina de Cap-aux-Meules,19.758


In [63]:
start_date = "2008-06-01"
end_date = "2022-10-31"

daily_time_index = pd.date_range(start=start_date, end=end_date, freq='D')


In [85]:
all_timeseries = []

df_data = df.sort_values(by=['location', 'time'])
df_data.set_index(['location', 'time'], inplace=True)
location_id = 1

for location, segment_df in df_data.groupby(level=0):
    segment_df = segment_df.droplevel(0)
    segment_df.columns = [location_id]

    daily_timeseries = segment_df.resample('D').mean()

    all_timeseries.append(daily_timeseries)
    
    location_id += 1

dataset = pd.concat(all_timeseries, axis=1) 

In [86]:
dataset.to_csv('ais_dataset.csv')

In [87]:
dataset

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,40,41,42,43,44,45,46,47,48,49
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2008-06-21 00:00:00+00:00,,,,,,,,,,,...,,,,,,,,,,
2008-06-22 00:00:00+00:00,,,,,,,,,,,...,,,,,,,,,,
2008-06-23 00:00:00+00:00,,,,,,,,,,,...,,,,,,,,,,
2008-06-24 00:00:00+00:00,,,,,,,,,,,...,,,,,,,,,,
2008-06-25 00:00:00+00:00,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-10-13 00:00:00+00:00,,,,,,,,,10.827698,,...,,,10.647312,,10.942656,,,,,
2022-10-14 00:00:00+00:00,,,,,,,,,11.138854,,...,,,11.001427,,11.100167,,,,,
2022-10-15 00:00:00+00:00,,,,,,,,,11.455927,,...,,,11.335500,,11.276969,,,,,
2022-10-16 00:00:00+00:00,,,,,,,,,11.760396,,...,,,,,11.595698,,,,,
