In [1]:
import pandas as pd
import numpy as np
from scipy.interpolate import interp1d

In [2]:
erie_df = pd.read_csv("final/erie.csv")
huron_df = pd.read_csv("final/huron.csv")
michigan_df = pd.read_csv("final/michigan.csv")
ontario_df = pd.read_csv("final/ontario.csv")
superior_df = pd.read_csv("final/superior.csv").fillna(method="ffill")
erie_df["date"] = pd.to_datetime(erie_df['date'])
huron_df["date"] = pd.to_datetime(huron_df['date'])
michigan_df["date"] = pd.to_datetime(michigan_df['date'])
ontario_df["date"] = pd.to_datetime(ontario_df['date'])
superior_df["date"] = pd.to_datetime(superior_df['date'])

In [3]:
superior_df.head()

Unnamed: 0,date,chlor,sst
0,2019-01-09,3.932144,2.630252
1,2019-01-17,1.171366,2.48947
2,2019-01-25,1.118915,2.029257
3,2019-02-02,0.51043,2.029257
4,2019-02-10,1.242594,-0.781515


In [4]:
erie_df.dtypes

date     datetime64[ns]
chlor           float64
sst             float64
dtype: object

In [5]:
def convert_dates_to_days(dates, start_date=None, name='Day'):
    """Converts a series of dates to a series of float values that
    represent days since start_date.
    """

    if start_date:
        ts0 = pd.Timestamp(start_date).timestamp()
    else:
        ts0 = 0

    return ((dates.apply(pd.Timestamp.timestamp) - 
            ts0)/(24*3600)).rename(name)

In [6]:
date = erie_df["date"].iloc[0]

# Erie

In [7]:
org_time = convert_dates_to_days(erie_df["date"], start_date = date)
org_chlor_interp = interp1d(org_time, erie_df["chlor"], kind = "cubic", fill_value = "extrapolate")
org_sst_interp = interp1d(org_time, erie_df["sst"], kind = "cubic", fill_value = "extrapolate")
interp_dates = convert_dates_to_days(pd.Series(pd.date_range(erie_df["date"].iloc[0], erie_df["date"].iloc[-1], freq = "D")), start_date = date)
interp_chlor = org_chlor_interp(interp_dates)
interp_sst = org_sst_interp(interp_dates)

In [8]:
interp_dates


0        0.0
1        1.0
2        2.0
3        3.0
4        4.0
       ...  
581    581.0
582    582.0
583    583.0
584    584.0
585    585.0
Name: Day, Length: 586, dtype: float64

In [9]:
interp_erie_df = pd.DataFrame()
dates = pd.Series(pd.date_range(erie_df["date"].iloc[0], erie_df["date"].iloc[-1], freq = "D"))
interp_erie_df["date"] = dates
interp_erie_df["chlor"] = interp_chlor
interp_erie_df["sst"] = interp_sst

In [10]:
interp_erie_df.to_csv("erie_interp.csv", index = False)

# Huron

In [11]:
org_time = convert_dates_to_days(huron_df["date"], start_date = date)
org_chlor_interp = interp1d(org_time, huron_df["chlor"], kind = "cubic", fill_value = "extrapolate")
org_sst_interp = interp1d(org_time, huron_df["sst"], kind = "cubic", fill_value = "extrapolate")
interp_dates = convert_dates_to_days(pd.Series(pd.date_range(huron_df["date"].iloc[0], huron_df["date"].iloc[-1], freq = "D")), start_date = date)
interp_chlor = org_chlor_interp(interp_dates)
interp_sst = org_sst_interp(interp_dates)

In [12]:
interp_huron_df = pd.DataFrame()
dates = pd.Series(pd.date_range(huron_df["date"].iloc[0], huron_df["date"].iloc[-1], freq = "D"))
interp_huron_df["date"] = dates
interp_huron_df["chlor"] = interp_chlor
interp_huron_df["sst"] = interp_sst

In [13]:
interp_huron_df.to_csv("huron_interp.csv", index = False)

# Michigan

In [14]:
org_time = convert_dates_to_days(michigan_df["date"], start_date = date)
org_chlor_interp = interp1d(org_time, michigan_df["chlor"], kind = "cubic", fill_value = "extrapolate")
org_sst_interp = interp1d(org_time, michigan_df["sst"], kind = "cubic", fill_value = "extrapolate")
interp_dates = convert_dates_to_days(pd.Series(pd.date_range(michigan_df["date"].iloc[0], michigan_df["date"].iloc[-1], freq = "D")), start_date = date)
interp_chlor = org_chlor_interp(interp_dates)
interp_sst = org_sst_interp(interp_dates)

In [15]:
interp_michigan_df = pd.DataFrame()
dates = pd.Series(pd.date_range(michigan_df["date"].iloc[0], michigan_df["date"].iloc[-1], freq = "D"))
interp_michigan_df["date"] = dates
interp_michigan_df["chlor"] = interp_chlor
interp_michigan_df["sst"] = interp_sst

In [16]:
interp_michigan_df.to_csv("michigan_interp.csv", index = False)

# Ontario

In [17]:
org_time = convert_dates_to_days(ontario_df["date"], start_date = "2019-01-09")
org_chlor_interp = interp1d(org_time, ontario_df["chlor"], kind = "cubic", fill_value = "extrapolate")
org_sst_interp = interp1d(org_time, ontario_df["sst"], kind = "cubic", fill_value = "extrapolate")
interp_dates = convert_dates_to_days(pd.Series(pd.date_range(ontario_df["date"].iloc[0], ontario_df["date"].iloc[-1], freq = "D")), start_date = date)
interp_chlor = org_chlor_interp(interp_dates)
interp_sst = org_sst_interp(interp_dates)

In [18]:
interp_ontario_df = pd.DataFrame()
dates = pd.Series(pd.date_range(ontario_df["date"].iloc[0], ontario_df["date"].iloc[-1], freq = "D"))
interp_ontario_df["date"] = dates
interp_ontario_df["chlor"] = interp_chlor
interp_ontario_df["sst"] = interp_sst

In [19]:
interp_ontario_df.to_csv("ontario_interp.csv", index = False)

# Superior

In [20]:
org_time = convert_dates_to_days(superior_df["date"], start_date = date)
org_chlor_interp = interp1d(org_time, superior_df["chlor"], kind = "cubic", fill_value = "extrapolate")
org_sst_interp = interp1d(org_time, superior_df["sst"], kind = "cubic", fill_value = "extrapolate")
interp_dates = convert_dates_to_days(pd.Series(pd.date_range(superior_df["date"].iloc[0], superior_df["date"].iloc[-1], freq = "D")), start_date = date)
interp_chlor = org_chlor_interp(interp_dates)
interp_sst = org_sst_interp(interp_dates)

In [21]:
interp_superior_df = pd.DataFrame()
dates = pd.Series(pd.date_range(superior_df["date"].iloc[0], superior_df["date"].iloc[-1], freq = "D"))
interp_superior_df["date"] = dates
interp_superior_df["chlor"] = interp_chlor
interp_superior_df["sst"] = interp_sst

In [22]:
interp_superior_df.to_csv("superior_interp.csv", index = False)

In [23]:
interp_superior_df

Unnamed: 0,date,chlor,sst
0,2019-01-09,3.932144,2.630252
1,2019-01-10,3.267805,2.726523
2,2019-01-11,2.714236,2.779243
3,2019-01-12,2.261658,2.793921
4,2019-01-13,1.900291,2.776064
...,...,...,...
581,2020-08-12,1.054696,16.204808
582,2020-08-13,1.034493,16.325007
583,2020-08-14,1.002084,16.531883
584,2020-08-15,0.955819,16.842726
