### Description
This notebook contains all code used in the creation of the MassBay_2017_to_2022_batch.csv file

In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import dateparser
from pytz import timezone
import shutil
import datetime

In [2]:
data_dir = '../../data/'

In [3]:
mwra_batch = pd.read_excel(data_dir + 'MassBay/concat/Copy of WN17x_to_WN22x_Event_Data.xlsx')

### Add VAL_QUAL s for pH from the old MassBay concatenation file

In [4]:
old_concat = pd.read_csv(data_dir + 'MassBay/concat/MassBay_2017_to_2022.csv')

# Get all non-null 
concat_with_val_qual = old_concat[old_concat['VAL_QUAL'].notnull()]

# Iterate over the rows with a val_qual defined
for ind, row in concat_with_val_qual.iterrows():

  val_qual = row['VAL_QUAL']

  # Get comparator columns
  lat = row['LATITUDE']
  lon = row['LONGITUDE']
  depth = row['DEPTH (m)']
  cond = row['CONDTVY (mS/cm)']
  
  # Get matching row in mwra batch
  match_row = mwra_batch[
    (mwra_batch['LATITUDE'] == lat)
  & (mwra_batch['LONGITUDE'] == lon)
  & (mwra_batch['DEPTH (m)'] == depth)
  & (mwra_batch['CONDTVY (mS/cm)'] == cond)]

  if (len(match_row) != 1):
    print("Couldn't find 1 to 1 match")
    break

  match_ind = match_row.index.values[0]

  # Add this val qual to the matching row
  mwra_batch.loc[match_ind, 'VAL_QUAL'] = val_qual

### Merge DISS_OXYGEN (mg/L), DO_RAW (mg/L); PCT_SAT (PCT), PCT_SAT_RAW (PCT)

In [5]:
# Merge DISS_OXYGEN (mg/L)	DO_RAW (mg/L) PCT_SAT (PCT)	PCT_SAT_RAW (PCT)
mwra_batch['DO_RAW (mg/L)'] = mwra_batch['DO_RAW (mg/L)'].fillna(mwra_batch['DISS_OXYGEN (mg/L)'])
mwra_batch['PCT_SAT_RAW (PCT)'] = mwra_batch['PCT_SAT_RAW (PCT)'].fillna(mwra_batch['PCT_SAT (PCT)'])

# Drop DISS_OXYGEN (mg/L), PCT_SAT_RAW (PCT)
mwra_batch.drop(columns=[
  'DISS_OXYGEN (mg/L)',
   'PCT_SAT (PCT)',
   'depth_code_sort',
   'depth_SORT',
   'Depths_Flipped'
   ], inplace=True)

### Add metadata reference + format timestamps

In [6]:
# Add metadata column
mwra_batch['Data Source'] = 'MassBay_2017_to_2022'

# Add timezone
eastern = timezone('US/Eastern')
mwra_batch['PROF_DATE_TIME_LOCAL'] = [date.replace(microsecond=0).replace(tzinfo=eastern) for date in mwra_batch['PROF_DATE_TIME_LOCAL']]

#Convert to excel parsable string
mwra_batch['PROF_DATE_TIME_LOCAL'] = [date.strftime("%Y-%m-%d %H:%M:%S%z") for date in mwra_batch['PROF_DATE_TIME_LOCAL']]

### Export as csv

In [7]:
mwra_batch.to_csv(data_dir + 'MassBay/concat/MassBay_2017_to_2022_batch.csv', index=False)