# Biosound data - Tidy up 
Prep the biosound data for use in the dashboard

To run this in Pycharm you need to set up a kernel that points to the venv:
`python -m ipykernel install --user --name=venv-osa`

Also please note that the data files and folders in this notebook point to a specific path a local Google Drive shared folder. This will not work for others - the paths will need updating for each person.

In [1]:
import pandas as pd
import numpy as np

import data_wrangler as dw
import os

In [2]:
# Set up some initial things
# Local path to the main "Processed Data" folder
local_data_folder = ('/Users/michelle/Library/CloudStorage/GoogleDrive-michelle@waveformanalytics.com/.shortcut'
                     '-targets-by-id/1QNPk7Z3Yb2uhHsHCf49pw7k_2-ivhIJg/Processed_Data')

config_file = os.path.join(local_data_folder, 'BioSound_Datasets_MapApp.csv')
df_config = pd.read_csv(config_file)

annotations_folder = os.path.join(local_data_folder, 'Annotations')


In [3]:
# Take a look at the contents of the config file
df_config

Unnamed: 0,Dataset,folder name,short name,Source,Contact,Sampling Rate (kHz),Bit Rate,Recording Cycle,Hydrophone Latitude,Hydrophone Longitude,Instrument Depth (m),Ecosystem Type,Selected Time Periods,Seascaper File,Annotations File
0,"Biscayne Bay, FL",Biscayne_Bay,Biscayne Bay,University of Miami,Neil Hammerschlag/Abby Tinari,32,16,10 seconds every minute,25.396,-80.237,,Mangrove,Feb - Mar 2019,CaesarCreek_SeascapeR_20190101_to_20190401.csv,
1,"Chukchi Sea, Hanna Shoal",Chukchi_Sea_Hanna_Shoal,Chuckchi Sea,Oregon State University,Kate Stafford,16,16,25 minutes every hour,71.6,-161.5,30.0,Offshore,Jan - Feb 2019,ChuckChi_Sea_SeascapeR_20190101_to_20190401.csv,Chuckchi_Sea_Vessel_2019_02_ISO.csv
2,"Gray's Reef, GA",GraysReef_GA,Gray's Reef,NEFSC-SanctSound,Tim Rowell,48,16,Continuous five hour files,31.396,-80.89,16.0,Offshore,Jan - Feb 2019,GraysReef_SeascapeR_20190101_to_20190401.csv,sanctsound_products_detections_gr01_sanctsound...
3,"Key West, FL",KeyWest_FL,Key West,Florida Fish & Wildlife,Jess Keller,48,16,30 seconds every five minutes,24.442,-81.934,23.0,Coral Reef,Feb - Mar 2020,KeyWest_SeascapeR_20200101_to_20200401.csv,KW_Annotations.csv
4,"May River, SC",MayRiver_SC,May River,University of S. Carolina Beaufort,Alyssa Marian/Eric Montie,80,16,2 minutes every hour,32.195,-80.792,4.5,Estuary,Jan - Mar 2019,MayRiver_SeascapeR_20190101_to_20190401.csv,Master_Manual_14M_2h_011119_071619.xlsx
5,"Olowalu (Maui, HI)",Olowalu_Maui_HI,Olowalu,SanctSound - Hawaiian Islands Humpback NMS,Eden Zang,48,16,Continuous five hour files,20.807,-156.655,59.7,Island/ Nearshore,Jan - Feb 2019,HI01_NMS_SeascapeR_20190101_to_20190401.csv,sanctsound_products_detections_hi01_sanctsound...
6,ONC-MEF,ONC_MEF,ONC,Ocean Networks Canada,Jasper Kanes,64,24,Continuous 5 minute files,47.949,-129.098,2189.0,Offshore,Jan - Feb 2019,ONC_MEF_SeascapeR_20190101_to_20190401.csv,ONC_MEF_Vessels_2019_02_ISO.csv
7,OOI-HYDBBA106,OOI_HYDBBA106,OOI,Ocean Observatories Initiative,Liz Ferguson,64,16,Continuous 5 minute files,44.637,-124.306,80.0,Oregon Shelf,Jan - Feb 2019,HYDBBA106_SeascapeR_20190101_to_20190401.csv,OOI_HYDBBA105_Vessel_2019_02_ISO.csv


## Annotations data

### Prep Key West annotations
Key west annotations are a bit different from the others. They're in a folder that contains several .txt files that need to first be merged and then copied into the same annotations folder as the other datasets.

In [4]:
KW_ANNOTATIONS_FOLDER = os.path.join(local_data_folder, 'KeyWest_FL/v1/Fish_Annotations')
KW_ANNOTATIONS_OUTFIILE = os.path.join(local_data_folder, 'Annotations/KW_Annotations.csv')

# Run the function from data_wrangler to convert to the regular annotations format and move the file to the same location as the rest of the annotation files
dw.combine_annotation_txt_files(KW_ANNOTATIONS_FOLDER, KW_ANNOTATIONS_OUTFIILE)

### Prep May River annotations data

In [None]:
MR_ANNOTATIONS_FILE = os.path.join(local_data_folder, '')

## Index data

In [5]:
df_aco = dw.prep_index_data(local_data_folder, normalize=False)
df_aco_norm = dw.normalize_df(df_aco, df_aco.columns[7:-2])

In [6]:
np.unique(df_aco["Dataset"])

array(['Biscayne Bay, FL'], dtype=object)

# Add the annotation information to index dataframes

In [7]:
# Load the fish codes lookup table
df_codes = pd.read_csv("../shiny/data/fish_codes.csv")

unique_fish_codes = np.unique(df_codes['code']).tolist()

# Add new columns that are named using unique_fish_codes. These will become the 
dw.add_new_columns(df_aco, unique_fish_codes)
dw.add_new_columns(df_aco_norm, unique_fish_codes)

In [None]:
dw.add_annotations_to_df(df_aco, df_config, df_codes)