In [23]:
import numpy as np
import pandas as pd

# read datasets, data taken from public csv from the EPA and NCCA
# original df is https://www.epa.gov/sites/default/files/2021-04/ncca_2015_water_chemistry_great_lakes-data.csv
# df for secchi disks (SD) is found here https://www.epa.gov/system/files/other-files/2023-02/Final%20NCCA%20GL%20Special%20Study%20%282014-2018%29%20data%20and%20metadata.zip 
# df for dissolved oxygen (DO) is in the same site
df = pd.read_csv("ncca_2015_water_chemistry_great_lakes-data.csv")
dfSD = pd.read_csv("ncca_2014-2018_secchi_great_lakes_special_study_data.csv")
dfDO = pd.read_csv("ncca_2014-2018_hydrographic_profile_great_lakes_special_study_data.csv")

# non-TSI but important variables

# pH readings
pHList = []
pHAvg = 0
for index, row in df.loc[df['ANALYTE'] == "PH"].iterrows():
    pHAvg += row['RESULT']
    pHList.append(row['RESULT'])
pHAvg /= len(df.loc[df['ANALYTE'] == "PH"])

# dissolved oxygen (DO) readings
DOList = []
DOAvg = 0
dfDO = dfDO.dropna(subset=['DO'])
for index, row in dfDO.loc[dfDO['STUDY'] == "Lake_Erie_Study"].iterrows():
    DOAvg += row['DO']
    DOList.append(row['DO'])
DOAvg /= len(dfDO.loc[dfDO['STUDY'] == "Lake_Erie_Study"])

# TSI variables

# Chlorophyll A (CHLA) readings 
CHLAList = []
CHLA_Avg = 0
df = df.dropna(subset=['RESULT'])
for index, row in df.loc[df['ANALYTE'] == "CHLA"].iterrows():
    CHLA_Avg += row['RESULT']
    CHLAList.append(row['RESULT'])
CHLA_Avg /= len(df.loc[df['ANALYTE'] == "CHLA"])
# TSI(CHLA) calculation, the equation is TSI(CHL) = 10(6-(2.04-0.68(ln(CHL)))/ln2)) simplified to 9.81 ln(CHL) + 30.6 
TSICHLA = 9.81 * np.log(CHLA_Avg) + 30.6

# Secchi Disk (SD) readings
# Secchi Disk Depth is the depth when the disk disappears and measures turbidity
SDList = []
SDAvg = 0
dfSD = dfSD.dropna(subset=['DISAPPEARS'])
for index, row in dfSD.loc[dfSD['STUDY'] == "Lake_Erie_Study"].iterrows():
    SDAvg += row['DISAPPEARS']
    SDList.append(row['DISAPPEARS'])
SDAvg /= len(dfSD.loc[dfSD['STUDY'] == "Lake_Erie_Study"])
# TSI(SD) calculation, eq is TSI(SD) = 60 – 14.41 * ln(SD)
TSISD = 60 - 14.41 * np.log(SDAvg)

# Total Phosphorus (TP) readings 
TPList = []
TPAvg = 0
df = df.dropna(subset=['RESULT'])
for index, row in df.loc[df['ANALYTE'] == "PTL"].iterrows():
    TPAvg += row['RESULT']
    TPList.append(row['RESULT'])
TPAvg /= len(df.loc[df['ANALYTE'] == "PTL"])
# TSI(TP) calculation, eq is TSI(TP) = 14.42 ln(TP) + 4.15
TSITP = 14.42 * np.log(TPAvg) + 4.15

#activation function
def sigmoid(num):
    return 1/(1+np.exp(-num))

#inp = vector input, w = weights, b = bias
def predict(inp, w, b):
    l1 = np.dot(inp, w) + b
    l2 = sigmoid(l1)
    return l2




8.049840848806358 [8.17, 8.18, 8.18, 8.13, 7.99, 7.92, 7.94, 7.94, 8.04, 8.01, 7.99, 8.02, 8.11, 7.98, 8.51, 8.8, 8.22, 7.99, 7.85, 7.81, 8.0, 8.19, 8.19, 7.9, 8.42, 8.2, 8.2, 8.15, 8.1, 7.82, 7.8, 7.76, 8.1, 8.08, 8.14, 8.15, 7.78, 7.75, 8.13, 8.35, 7.72, 7.63, 8.06, 8.12, 8.06, 7.9, 8.09, 8.14, 7.8, 8.14, 8.17, 7.77, 7.81, 7.75, 7.83, 7.78, 8.06, 7.79, 7.81, 8.31, 8.03, 8.11, 7.85, 8.18, 8.12, 8.17, 8.15, 8.15, 8.27, 7.91, 8.16, 8.1, 8.06, 8.16, 7.87, 7.68, 8.14, 8.08, 7.92, 7.96, 8.31, 8.34, 8.11, 8.24, 8.17, 8.13, 7.8, 7.86, 8.14, 8.22, 7.8, 8.29, 7.8, 7.79, 8.28, 8.12, 8.2, 8.19, 8.2, 7.79, 8.15, 8.17, 8.24, 8.08, 8.27, 8.15, 8.45, 8.25, 8.25, 8.1, 8.04, 8.0, 9.18, 7.89, 8.07, 8.0, 8.07, 8.18, 8.04, 8.26, 8.08, 8.14, 8.15, 8.29, 7.99, 8.17, 7.89, 8.13, 8.13, 8.23, 8.13, 8.21, 8.19, 8.06, 7.75, 8.14, 8.2, 8.17, 8.59, 8.29, 8.22, 8.09, 8.26, 8.14, 7.98, 7.86, 7.8, 8.07, 8.17, 8.37, 8.23, 8.33, 8.22, 7.61, 8.03, 8.0, 7.92, 7.89, 8.17, 8.11, 7.72, 8.13, 8.3, 8.56, 7.69, 7.97, 8.16, 8.