In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

## Reading in data for each lake

### Lake Superior

In [3]:
####### EVAPORATION #######

# Read the csv file and make 2000 the first year
filePath = '../Data/evaporation_sup.csv'
s_evap = pd.read_csv(filePath, skiprows=3)
s_evap = s_evap.iloc[50:, :]

# Adjsuting the way data frame is presented
## we want two columns: Time and Evap Value

## melt the data frame and make 'Month' a categorical variable
s_evap_melted = pd.melt(s_evap, id_vars=['YYYY'], var_name='Month', value_name='Evap Value')
monthOrder = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
s_evap_melted['Month'] = pd.Categorical(s_evap_melted['Month'], categories=monthOrder, ordered=True)

## now sort the DataFrame by 'Year' and 'Month'
s_evap_melted = s_evap_melted.sort_values(by=['YYYY', 'Month'])

## combine Month and Year into one column
s_evap_melted['Time'] = s_evap_melted['Month'].astype(str) + '-' + s_evap_melted['YYYY'].astype(str)

## create the final 1D DataFrame with two desired columns and evaporation values
s_evap = s_evap_melted[['Time', 'Evap Value']].reset_index(drop=True)

# Conversion
# s_evap['Evap Value'] = s_evap['Evap Value'] * 0.001

# s_evap

In [5]:
####### PRECIPITATION #######

# Read the csv file and make 2000 the first year
filePath = '../Data/prc_sup_lake_mon.csv'
s_prc = pd.read_csv(filePath, skiprows=5)
s_prc = s_prc.iloc[60:81, :]

# Adjsuting the way data frame is presented
## we want two columns: Time and Prc Value

## melt the data frame and make 'Month' a categorical variable
s_prc_melted = pd.melt(s_prc, id_vars=['YYYY'], var_name='Month', value_name='Prc Value')
monthOrder = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
s_prc_melted['Month'] = pd.Categorical(s_prc_melted['Month'], categories=monthOrder, ordered=True)

## now sort the DataFrame by 'Year' and 'Month'
s_prc_melted = s_prc_melted.sort_values(by=['YYYY', 'Month'])

## combine Month and Year into one column
s_prc_melted['Time'] = s_prc_melted['Month'].astype(str) + '-' + s_prc_melted['YYYY'].astype(str)

# create the final 1D DataFrame with two desired columns and prc values
s_prc = s_prc_melted[['Time', 'Prc Value']].reset_index(drop=True)
s_prc['Prc Value'] = s_prc['Prc Value'].astype(float)

# Conversion
# s_prc['Prc Value'] = s_prc['Prc Value'] * 0.001

# s_prc

In [7]:
####### RUNOFF #######

# Read the csv file and make 2000 the first year
filePath = '../Data/runoff_eri_arm.csv'
s_runoff = pd.read_csv(filePath, skiprows=2)
s_runoff = s_runoff.iloc[1224:, :3]
s_runoff = s_runoff.rename(columns={'Mon': 'Month'})
s_runoff = s_runoff.rename(columns={'Flow': 'Runoff Value (Flow)'})

# Adjsuting the way data frame is presented
## we want two columns: Time and Runoff Value

## melt the data frame and make 'Month' a categorical variable
month_conversion = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun', 7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'}
s_runoff['Month'] = s_runoff['Month'].map(month_conversion)

## combine Month and Year into one column
s_runoff['Time'] = s_runoff['Month'].astype(str) + '-' + s_runoff['Year'].astype(str)

# # create the final 1D DataFrame with two desired columns and runoff values
s_runoff = s_runoff[['Time', 'Runoff Value (Flow)']].reset_index(drop=True)
s_runoff = s_runoff.iloc[:-4, :]

# Conversion
# s_runoff['Runoff Value (Flow)'] = s_runoff['Runoff Value (Flow)'] * 0.001

# s_runoff

### Lakes Michigan and Huron

Note: since we consider these two lakes as one entity, we will add their evaporation, precipitation, and runoff data

In [18]:
####### EVAPORATION #######

def evaporation_data(file_path):
    
    # Read the csv file and make 2000 the first year
    evap = pd.read_csv(file_path, skiprows=3)
    evap = evap.iloc[50:, :]

    # Adjsuting the way data frame is presented
    ## we want two columns: Time and Prc Value

    ## melt the data frame and make 'Month' a categorical variable
    evap_melted = pd.melt(evap, id_vars=['YYYY'], var_name='Month', value_name='Evap Value')
    month_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    evap_melted['Month'] = pd.Categorical(evap_melted['Month'], categories=month_order, ordered=True)

    ## now sort the DataFrame by 'Year' and 'Month'
    evap_melted = evap_melted.sort_values(by=['YYYY', 'Month'])

    ## combine Month and Year into one column
    evap_melted['Time'] = evap_melted['Month'].astype(str) + '-' + evap_melted['YYYY'].astype(str)

    # Create the final 1D DataFrame with two desired columns and prc values
    evap = evap_melted[['Time', 'Evap Value']].reset_index(drop=True)
    
    # Conversion
    # evap['Evap Value'] = evap['Evap Value'] * 0.001

    return evap


# Apply the function to Lakes Michigan and Huron
file_path_h = '../Data/evaporation_hur.csv'
h_evap_data = evaporation_data(file_path_h)

file_path_m = '../Data/evaporation_mic.csv'
m_evap_data = evaporation_data(file_path_m)

# Combine two DataFrames, as these two lakes are considered to be one entity in our analysis
mh_evap_data = m_evap_data + h_evap_data
mh_evap_data['Time'] = m_evap_data['Time']

# mh_evap_data

In [21]:
####### PRECIPITATION #######

def precipitation_data(file_path, start_row=5, start_index=60, end_index=81):
    
    # Read the csv file and make 2000 the first year
    prc = pd.read_csv(file_path, skiprows=start_row)
    prc = prc.iloc[start_index:end_index, :]

    # Adjsuting the way data frame is presented
    ## we want two columns: Time and Prc Value

    ## melt the data frame and make 'Month' a categorical variable
    prc_melted = pd.melt(prc, id_vars=['YYYY'], var_name='Month', value_name='Prc Value')
    month_order = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    prc_melted['Month'] = pd.Categorical(prc_melted['Month'], categories=month_order, ordered=True)

    ## now sort the DataFrame by 'Year' and 'Month'
    prc_melted = prc_melted.sort_values(by=['YYYY', 'Month'])

    ## combine Month and Year into one column
    prc_melted['Time'] = prc_melted['Month'].astype(str) + '-' + prc_melted['YYYY'].astype(str)

    # Create the final 1D DataFrame with two desired columns and prc values
    prc = prc_melted[['Time', 'Prc Value']].reset_index(drop=True)
    prc['Prc Value'] = prc['Prc Value'].astype(float)

    # Conversion
    # prc['Prc Value'] = prc['Prc Value'] * 0.001

    return prc


# Apply the function to Lakes Michigan and Huron
file_path_h = '../Data/prc_hur_lake_mon.csv'
h_prc_data = precipitation_data(file_path_h)

file_path_m = '../Data/prc_mic_lake_mon.csv'
m_prc_data = precipitation_data(file_path_m)

# Combine two DataFrames, as these two lakes are considered to be one entity in our analysis
mh_prc_data = m_prc_data + h_prc_data
mh_prc_data['Time'] = m_prc_data['Time']

# mh_prc_data

In [23]:
####### RUNOFF #######

def runoff_data(file_path):
    
    # Read the CSV file and make 2000 the first year
    runoff = pd.read_csv(file_path, skiprows=2)
    runoff = runoff.iloc[1224:, :3]
    runoff = runoff.rename(columns={'Mon': 'Month'})
    runoff = runoff.rename(columns={'Flow': 'Runoff Value (Flow)'})
    
    # Adjust the way data frame is presented
    ## we want two columns: Time and Runoff Value

    ## melt the data frame and make 'Month' a categorical variabl
    month_conversion = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun', 7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'}
    runoff['Month'] = runoff['Month'].map(month_conversion)

    ## combine Month and Year into one column
    runoff['Time'] = runoff['Month'].astype(str) + '-' + runoff['Year'].astype(str)

    # Create the final 1D DataFrame with two desired columns and runoff values
    runoff = runoff[['Time', 'Runoff Value (Flow)']].reset_index(drop=True)
    runoff = runoff.iloc[:-4, :]

    # Conversion
    # ...

    return runoff


# Apply the function to Lakes Michigan and Huron
file_path_h = '../Data/runoff_hur_arm.csv'
h_runoff_data = runoff_data(file_path_h)

file_path_m = '../Data/runoff_mic_arm.csv'
m_runoff_data = runoff_data(file_path_m)

# Combine two DataFrames, as these two lakes are considered to be one entity in our analysis
mh_runoff_data = m_runoff_data + h_runoff_data
mh_runoff_data['Time'] = m_runoff_data['Time']

# mh_runoff_data

### Lake Erie

In [7]:
####### EVAPORATION #######

# Read the csv file and make 2000 the first year
filePath = '../Data/evaporation_eri.csv'
e_evap = pd.read_csv(filePath, skiprows=3)
e_evap = e_evap.iloc[50:, :]

# Adjsuting the way data frame is presented
## we want two columns: Time and Evap Value

## melt the data frame and make 'Month' a categorical variable
e_evap_melted = pd.melt(e_evap, id_vars=['YYYY'], var_name='Month', value_name='Evap Value')
monthOrder = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
e_evap_melted['Month'] = pd.Categorical(e_evap_melted['Month'], categories=monthOrder, ordered=True)

## now sort the DataFrame by 'Year' and 'Month'
e_evap_melted = e_evap_melted.sort_values(by=['YYYY', 'Month'])

## combine Month and Year into one column
e_evap_melted['Time'] = e_evap_melted['Month'].astype(str) + '-' + e_evap_melted['YYYY'].astype(str)

## create the final 1D DataFrame with two desired columns and evaporation values
e_evap = e_evap_melted[['Time', 'Evap Value']].reset_index(drop=True)

# Conversion
# e_evap['Evap Value'] = e_evap['Evap Value'] * 0.001

# e_evap


In [8]:
####### PRECIPITATION #######

# Read the csv file and make 2000 the first year
filePath = '../Data/prc_eri_lake_mon.csv'
e_prc = pd.read_csv(filePath, skiprows=5)
e_prc = e_prc.iloc[60:81, :]

# Adjsuting the way data frame is presented
## we want two columns: Time and Prc Value

## melt the data frame and make 'Month' a categorical variable
e_prc_melted = pd.melt(e_prc, id_vars=['YYYY'], var_name='Month', value_name='Prc Value')
monthOrder = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
e_prc_melted['Month'] = pd.Categorical(e_prc_melted['Month'], categories=monthOrder, ordered=True)

## now sort the DataFrame by 'Year' and 'Month'
e_prc_melted = e_prc_melted.sort_values(by=['YYYY', 'Month'])

## combine Month and Year into one column
e_prc_melted['Time'] = e_prc_melted['Month'].astype(str) + '-' + e_prc_melted['YYYY'].astype(str)

# create the final 1D DataFrame with two desired columns and prc values
e_prc = e_prc_melted[['Time', 'Prc Value']].reset_index(drop=True)
e_prc['Prc Value'] = e_prc['Prc Value'].astype(float)

# Conversion
# e_prc['Prc Value'] = e_prc['Prc Value'] * 0.001

# e_prc

In [9]:
####### RUNOFF #######

# Read the csv file and make 2000 the first year
filePath = '../Data/runoff_eri_arm.csv'
e_runoff = pd.read_csv(filePath, skiprows=2)
e_runoff = e_runoff.iloc[1224:, :3]
e_runoff = e_runoff.rename(columns={'Mon': 'Month'})
e_runoff = e_runoff.rename(columns={'Flow': 'Runoff Value (Flow)'})

# Adjsuting the way data frame is presented
## we want two columns: Time and Runoff Value

## melt the data frame and make 'Month' a categorical variable
month_conversion = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun', 7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'}
e_runoff['Month'] = e_runoff['Month'].map(month_conversion)

## combine Month and Year into one column
e_runoff['Time'] = e_runoff['Month'].astype(str) + '-' + e_runoff['Year'].astype(str)

# # create the final 1D DataFrame with two desired columns and runoff values
e_runoff = e_runoff[['Time', 'Runoff Value (Flow)']].reset_index(drop=True)
e_runoff = e_runoff.iloc[:-4, :]

# Conversion
# e_runoff['Runoff Value (Flow)'] = e_runoff['Runoff Value (Flow)'] * 0.001

# e_runoff

### Lake Ontario


In [10]:
####### EVAPORATION #######

# Read the csv file and make 2000 the first year
filePath = '../Data/evaporation_ont.csv'
o_evap = pd.read_csv(filePath, skiprows=3)
o_evap = o_evap.iloc[50:, :]

# Adjsuting the way data frame is presented
## we want two columns: Time and Evap Value

## melt the data frame and make 'Month' a categorical variable
o_evap_melted = pd.melt(o_evap, id_vars=['YYYY'], var_name='Month', value_name='Evap Value')
monthOrder = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
o_evap_melted['Month'] = pd.Categorical(o_evap_melted['Month'], categories=monthOrder, ordered=True)

## now sort the DataFrame by 'Year' and 'Month'
o_evap_melted = o_evap_melted.sort_values(by=['YYYY', 'Month'])

## combine Month and Year into one column
o_evap_melted['Time'] = o_evap_melted['Month'].astype(str) + '-' + o_evap_melted['YYYY'].astype(str)

## create the final 1D DataFrame with two desired columns and evaporation values
o_evap = o_evap_melted[['Time', 'Evap Value']].reset_index(drop=True)

# Conversion
# o_evap['Evap Value'] = o_evap['Evap Value'] * 0.001

# o_evap

In [11]:
####### PRECIPITATION #######

# Read the csv file and make 2000 the first year
filePath = '../Data/prc_ont_lake_mon.csv'
o_prc = pd.read_csv(filePath, skiprows=5)
o_prc = o_prc.iloc[60:81, :]

# Adjsuting the way data frame is presented
## we want two columns: Time and Prc Value

## melt the data frame and make 'Month' a categorical variable
o_prc_melted = pd.melt(o_prc, id_vars=['YYYY'], var_name='Month', value_name='Prc Value')
monthOrder = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
o_prc_melted['Month'] = pd.Categorical(o_prc_melted['Month'], categories=monthOrder, ordered=True)

## now sort the DataFrame by 'Year' and 'Month'
o_prc_melted = o_prc_melted.sort_values(by=['YYYY', 'Month'])

## combine Month and Year into one column
o_prc_melted['Time'] = o_prc_melted['Month'].astype(str) + '-' + o_prc_melted['YYYY'].astype(str)

# create the final 1D DataFrame with two desired columns and prc values
o_prc = o_prc_melted[['Time', 'Prc Value']].reset_index(drop=True)
o_prc['Prc Value'] = o_prc['Prc Value'].astype(float)

# Conversion
# o_prc['Prc Value'] = o_prc['Prc Value'] * 0.001

# o_prc

In [12]:
####### RUNOFF #######

# Read the csv file and make 2000 the first year
filePath = '../Data/runoff_ont_arm.csv'
o_runoff = pd.read_csv(filePath, skiprows=2)
o_runoff = o_runoff.iloc[1224:, :3]
o_runoff = o_runoff.rename(columns={'Mon': 'Month'})
o_runoff = o_runoff.rename(columns={'Flow': 'Runoff Value (Flow)'})

# Adjsuting the way data frame is presented
## we want two columns: Time and Runoff Value

## melt the data frame and make 'Month' a categorical variable
month_conversion = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun', 7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'}
o_runoff['Month'] = o_runoff['Month'].map(month_conversion)

## combine Month and Year into one column
o_runoff['Time'] = o_runoff['Month'].astype(str) + '-' + o_runoff['Year'].astype(str)

# # create the final 1D DataFrame with two desired columns and runoff values
o_runoff = o_runoff[['Time', 'Runoff Value (Flow)']].reset_index(drop=True)
o_runoff = o_runoff.iloc[:-4, :]

# Conversion
# o_runoff['Runoff Value (Flow)'] = o_runoff['Runoff Value (Flow)'] * 0.001

# o_runoff

### Lake Saint Clair


In [13]:
####### EVAPORATION #######

# Read the csv file and make 2000 the first year
filePath = '../Data/evaporation_stc.csv'
stc_evap = pd.read_csv(filePath, skiprows=3)
stc_evap = stc_evap.iloc[50:, :]

# Adjsuting the way data frame is presented
## we want two columns: Time and Evap Value

## melt the data frame and make 'Month' a categorical variable
stc_evap_melted = pd.melt(stc_evap, id_vars=['YYYY'], var_name='Month', value_name='Evap Value')
monthOrder = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
stc_evap_melted['Month'] = pd.Categorical(stc_evap_melted['Month'], categories=monthOrder, ordered=True)

## now sort the DataFrame by 'Year' and 'Month'
stc_evap_melted = stc_evap_melted.sort_values(by=['YYYY', 'Month'])

## combine Month and Year into one column
stc_evap_melted['Time'] = stc_evap_melted['Month'].astype(str) + '-' + stc_evap_melted['YYYY'].astype(str)

## create the final 1D DataFrame with two desired columns and evaporation values
stc_evap = stc_evap_melted[['Time', 'Evap Value']].reset_index(drop=True)

# Conversion
# stc_evap['Evap Value'] = stc_evap['Evap Value'] * 0.001

# stc_evap

In [14]:
####### PRECIPITATION #######

# Read the csv file and make 2000 the first year
filePath = '../Data/prc_stc_lake_mon.csv'
stc_prc = pd.read_csv(filePath, skiprows=5)
stc_prc = stc_prc.iloc[60:81, :]

# Adjsuting the way data frame is presented
## we want two columns: Time and Prc Value

## melt the data frame and make 'Month' a categorical variable
stc_prc_melted = pd.melt(stc_prc, id_vars=['YYYY'], var_name='Month', value_name='Prc Value')
monthOrder = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
stc_prc_melted['Month'] = pd.Categorical(stc_prc_melted['Month'], categories=monthOrder, ordered=True)

## now sort the DataFrame by 'Year' and 'Month'
stc_prc_melted = stc_prc_melted.sort_values(by=['YYYY', 'Month'])

## combine Month and Year into one column
stc_prc_melted['Time'] = stc_prc_melted['Month'].astype(str) + '-' + stc_prc_melted['YYYY'].astype(str)

# create the final 1D DataFrame with two desired columns and prc values
stc_prc = stc_prc_melted[['Time', 'Prc Value']].reset_index(drop=True)
stc_prc['Prc Value'] = stc_prc['Prc Value'].astype(float)

# Conversion
# stc_prc['Prc Value'] = stc_prc['Prc Value'] * 0.001

# stc_prc

In [15]:
####### RUNOFF #######

# Read the csv file and make 2000 the first year
filePath = '../Data/runoff_stc_arm.csv'
stc_runoff = pd.read_csv(filePath, skiprows=2)
stc_runoff = stc_runoff.iloc[816:, :3]
stc_runoff = stc_runoff.rename(columns={'Mon': 'Month'})
stc_runoff = stc_runoff.rename(columns={'Flow': 'Runoff Value (Flow)'})

# Adjsuting the way data frame is presented
## we want two columns: Time and Runoff Value

## melt the data frame and make 'Month' a categorical variable
month_conversion = {1: 'Jan', 2: 'Feb', 3: 'Mar', 4: 'Apr', 5: 'May', 6: 'Jun', 7: 'Jul', 8: 'Aug', 9: 'Sep', 10: 'Oct', 11: 'Nov', 12: 'Dec'}
stc_runoff['Month'] = stc_runoff['Month'].map(month_conversion)

## combine Month and Year into one column
stc_runoff['Time'] = stc_runoff['Month'].astype(str) + '-' + stc_runoff['Year'].astype(str)

# # create the final 1D DataFrame with two desired columns and runoff values
stc_runoff = stc_runoff[['Time', 'Runoff Value (Flow)']].reset_index(drop=True)
stc_runoff = stc_runoff.iloc[:-4, :]

# Conversion
# stc_runoff['Runoff Value (Flow)'] = stc_runoff['Runoff Value (Flow)'] * 0.001

# stc_runoff