### Description
This notebook contains all code used in the creation of the MassBay_2017_to_2022_upcast_update file.

In [45]:
import pandas as pd

In [46]:
data_dir = '../../data/'

In [47]:
mwra = pd.read_csv(data_dir +'MassBay/MWRA_MassBay_upcast_2017-202205.csv')

#### Add Lat, Lon, pH, VAL_QUAL from MassBay_2017_to_2022_batch

In [48]:
mwra_batch = pd.read_csv(data_dir +'MassBay/concat/MassBay_2017_to_2022_batch.csv')

In [49]:
matches = 0
no_match = 0
overlaps = 0
rows = []

for ind, row in mwra.iterrows():
  cond = row['CONDTVY']
  do2 = row['DISS_OXYGEN']
  flu = row['FLU_RAW']
  ph = row['PH']
  depth = row['PROFILE_DEPTH']
  sigma_t = row['SIGMA_T']
  temp = row['TEMP']


  # Get matching row in mwra batch
  match_row = mwra_batch[
  (abs(mwra_batch['DEPTH (m)'] - depth) <=  0.01)
  & (abs(mwra_batch['TEMP (C)'] - temp) <= 0.01)
  & (abs(mwra_batch['SIGMA_T ()'] - sigma_t) <= 0.01)
  ]
  

  if (len(match_row) == 1):
    matches += 1
    rows.append(match_row.iloc[[0]])

  if (len(match_row) < 1):
    no_match += 1
  if (len(match_row) > 1):
    overlaps += 1


  if len(match_row) == 1:
    match_ind = match_row.index.values[0]

    mwra.loc[ind, 'LATITUDE'] = mwra_batch['LATITUDE'][match_ind]
    mwra.loc[ind, 'LONGITUDE'] = mwra_batch['LONGITUDE'][match_ind]
    if pd.isnull(mwra.loc[ind, 'PH']):
      mwra.loc[ind, 'PH'] = mwra_batch['pH ()'][match_ind]
      mwra.loc[ind, 'VAL_QUAL'] = -1
    
print("Matches:", matches)
print("No Matches:", no_match)
print("Overlaps:", overlaps)

Matches: 2658
No Matches: 1152
Overlaps: 0


#### Add Station Target Lat/Lon from MWRA_MassBay_metadata if it is still missing

In [50]:
station_defined = mwra[((mwra['STAT_ID'].notnull()) & (mwra['LATITUDE'].isnull())) & (mwra['LONGITUDE'].isnull())]

In [51]:
# Station ID, Target latitude, Target longitude
stations  = pd.read_csv(data_dir + 'MassBay/MWRA_MassBay_metadata.xlsx - Station locations.csv')
stations.set_index('Station ID', inplace=True)

In [52]:
print(len(mwra[mwra['LATITUDE'].isnull()]))
for ind, row in station_defined.iterrows():
  station = row['STAT_ID']
  if station in stations.index:
    mwra.loc[ind, 'LATITUDE'] = stations.loc[station, 'Target latitude']
    mwra.loc[ind, 'LONGITUDE'] = stations.loc[station, 'Target longitude']
print(len(mwra[mwra['LATITUDE'].isnull()]))

1152
0


#### Add metadata reference

In [53]:
# Save as csv
mwra['Data Source'] = 'MWRA_MassBay_upcast_2017-202205'

#### Export as csv

In [54]:
mwra.to_csv(data_dir + 'MassBay/concat/MassBay_2017_to_2022_upcast_update.csv', index=False)