## This notebook will process all of the data in the antenna result directories and produce a 'processed_data.csv' file.

#### The below cell will install dependencies, if needed.

In [1]:
# %%capture
# ! pip install folium
# ! pip install pandas
# ! pip install pandas_profiling

#### The necessary dependencies are imported.

In [2]:
import pandas as pd
import pandas_profiling
import folium
from folium.plugins import HeatMap

#### Add values for the below variables to return a processed file containing only a subset of the data.
##### For example, add a species to return a file of only data for that species. Or, add a species and an antenna to get data for a specific species/antenna

In [3]:
# User Selections
# Add a pythonic list to any of these for getting a subset of the data for the below analytics functions.
# This will not overwrite the processed_data.csv file with the subset.

SPECIES = []
ANTENNA = []
DATE = []
TAGID = []
COLUMNS = []

#### Execute 'fish_data.py' processing script. This creates the 'processed_data.csv' file. Each time this cell is run it will recreate the 'processed_data.csv' file.

In [4]:
%run -i fish_data

Importing Records from downstream 10.15.txt... 
Importing Records from downstream 10.2.txt... 
Importing Records from downstream 6.13.txt... 
Error processing line: D 2018-06-09 23:59:59.61 ï¿½8:16:01.03 HA 3D6.00184CE0D4    2    20

Importing Records from downstream 6.28.txt... 
Importing Records from downstream 6.8.txt... 
Error processing line: D 2008-10-18 Z6:42:N1.37 b2:17:I6.04 HB 000.0000000000 30137 25605

Error processing line: D 2018-05-12 00:00:05.29 ï¿½2:15:58.06 HA 3D6.00184CB873    2     0

Error processing line: D 2018-05-12 00:00:05.19 ï¿½2:15:58.16 HA 3D6.00184CB873    3     4

Error processing line: D 2018-05-12 00:00:05.19 ï¿½2:15:58.16 HA 3D6.00184CB873    3     4

Error processing line: D 2018-05-12 00:00:46.20 ï¿½2:15:59.31 HA 3D6.00184CB873    4     1

Error processing line: D 2018-05-12 00:16:25.72 00.00 HA 3D6.00184CB873    1     1

Error processing line: D 2018-05-12 01:45:35.65 ;0:56:01.13 HA 3D6.00184CBA2D    3     1

Error processing line: D 2018-05-12 01:4

#### The cell below does all of the heavy lifting of reading the 'processed_data.csv' file into a dataframe for use in the operations below.

In [None]:
# prepare dataframe for analysis

# Load fish tag data into a dataframe
fish_tag_data = pd.read_csv('./tag_data.csv', 
    names=['Date','Time','Tag ID','Species','Length','Capture Method','Marked At'], low_memory=False)

# Load antenna data into a dataframe
data = pd.read_csv('./processed_data.csv', 
    names=['D','Date','Time','Duration','Type','Tag ID','Count','Gap','Antenna'], low_memory=False)

# Join Dataframe on Tag ID
data = pd.merge(data,fish_tag_data[['Tag ID', 'Species', 'Length', 'Marked At']],on='Tag ID', how='left')

# Alter dtypes for time fields
# data['Date'] = data['Date'].astype('datetime64[ns]')
# data['Time'] = pd.to_timedelta(data['Time'])

# Antennae Lat/Long GLOBALS
U1_LAT=33.99644444
U1_LONG=-84.89666667
U2_LAT=33.99697222
U2_LONG=-84.89694444
U3_LAT=33.99700000
U3_LONG=-84.89805556
D1_LAT=33.99852778
D1_LONG=-84.89444444

# Add Lat/Long information to DATAFRAME
data.loc[data.Antenna == 'U1','lat'] = U1_LAT
data.loc[data.Antenna == 'U1','long'] = U1_LONG
data.loc[data.Antenna == 'U2','lat'] = U2_LAT
data.loc[data.Antenna == 'U2','long'] = U2_LONG
data.loc[data.Antenna == 'U3','lat'] = U3_LAT
data.loc[data.Antenna == 'U3','long'] = U3_LONG
data.loc[data.Antenna == 'D1','lat'] = D1_LAT
data.loc[data.Antenna == 'D1','long'] = D1_LONG

# Fill all missing values with a zero

print(data.sample())
print(data.shape)

#### The below cell creates the subset dataframe from the selection provided by the user above. It will perform the heatmapping and the pandas_profiling from this subset. 

In [6]:
# Create a data subset, if desired.

if SPECIES:
    data = data.loc[data['Species'].isin(SPECIES)]
if ANTENNA:
    data = data.loc[data['Antenna'].isin(ANTENNA)]
if DATE:
    data = data.loc[data_subset['Date'].isin(DATE)]
if TAGID:
    data = data.loc[data_subset['Tag ID'].isin(TAGID)]
if COLUMNS:
    data = data.filter(COLUMNS, axis=1)

In [None]:
pandas_profiling.ProfileReport(data)

In [None]:
fishTrackMap = folium.Map(location=[33.99697222, -84.89694444], zoom_start=15) 

# Ensure floats
data['lat'] = data['lat'].astype(float)
data['lat'] = data['lat'].astype(float)

# Remove NaNs
data = data[['lat', 'long']]
data = data.dropna(axis=0, subset=['lat','long'])

# List comprehension to make out list of lists
heat_data = [[row['lat'],row['long']] for index, row in data.iterrows()]

# Plot it on the map
HeatMap(heat_data).add_to(fishTrackMap)

# Display the map
fishTrackMap

