## This notebook will process all of the data in the antenna result directories and produce a 'processed_data.csv' file.

#### The below cell will install dependencies, if needed.

In [1]:
%%capture
! pip install folium
! pip install pandas
! pip install pandas_profiling

#### The necessary dependencies are imported.

In [2]:
import pandas as pd
import pandas_profiling
import folium
from folium.plugins import HeatMap

#### Add values for the below variables to return a processed file containing only a subset of the data.
##### For example, add a species to return a file of only data for that species. Or, add a species and an antenna to get data for a specific species/antenna

In [3]:
# User Selections
# Add a pythonic list to any of these for getting a subset of the data for the below analytics functions.
# This will not overwrite the processed_data.csv file with the subset.

SPECIES = []
ANTENNA = []
DATE = []
TAGID = []
COLUMNS = []

#### Execute 'fish_data.py' processing script. This creates the 'processed_data.csv' file. Each time this cell is run it will recreate the 'processed_data.csv' file.

In [4]:
%run -i fish_data

Importing Records from downstream 10.15.txt... 
Importing Records from downstream 10.2.txt... 
Importing Records from downstream 6.13.txt... 
Error processing line: D 2018-06-09 23:59:59.61 ï¿½8:16:01.03 HA 3D6.00184CE0D4    2    20

Importing Records from downstream 6.28.txt... 
Importing Records from downstream 6.8.txt... 
Error processing line: D 2008-10-18 Z6:42:N1.37 b2:17:I6.04 HB 000.0000000000 30137 25605

Error processing line: D 2018-05-12 00:00:05.29 ï¿½2:15:58.06 HA 3D6.00184CB873    2     0

Error processing line: D 2018-05-12 00:00:05.19 ï¿½2:15:58.16 HA 3D6.00184CB873    3     4

Error processing line: D 2018-05-12 00:00:05.19 ï¿½2:15:58.16 HA 3D6.00184CB873    3     4

Error processing line: D 2018-05-12 00:00:46.20 ï¿½2:15:59.31 HA 3D6.00184CB873    4     1

Error processing line: D 2018-05-12 00:16:25.72 00.00 HA 3D6.00184CB873    1     1

Error processing line: D 2018-05-12 01:45:35.65 ;0:56:01.13 HA 3D6.00184CBA2D    3     1

Error processing line: D 2018-05-12 01:4

#### The cell below does all of the heavy lifting of reading the 'processed_data.csv' file into a dataframe for use in the operations below.

In [None]:
# prepare dataframe for analysis

# Load fish tag data into a dataframe
fish_tag_data = pd.read_csv('./tag_data.csv', 
    names=['Date','Time','Tag ID','Species','Length','Capture Method','Marked At'], low_memory=False)

# Load antenna data into a dataframe
data = pd.read_csv('./processed_data.csv', 
    names=['D','Date','Time','Duration','Type','Tag ID','Count','Gap','Antenna'], low_memory=False)

# Join Dataframe on Tag ID
data = pd.merge(data,fish_tag_data[['Tag ID', 'Species', 'Length', 'Marked At']],on='Tag ID', how='left')

# Alter dtypes for time fields
# data['Date'] = data['Date'].astype('datetime64[ns]')
# data['Time'] = pd.to_timedelta(data['Time'])

# Antennae Lat/Long GLOBALS
U1_LAT=33.99644444
U1_LONG=-84.89666667
U2_LAT=33.99697222
U2_LONG=-84.89694444
U3_LAT=33.99700000
U3_LONG=-84.89805556
D1_LAT=33.99852778
D1_LONG=-84.89444444

# Add Lat/Long information to DATAFRAME
data.loc[data.Antenna == 'U1','lat'] = U1_LAT
data.loc[data.Antenna == 'U1','long'] = U1_LONG
data.loc[data.Antenna == 'U2','lat'] = U2_LAT
data.loc[data.Antenna == 'U2','long'] = U2_LONG
data.loc[data.Antenna == 'U3','lat'] = U3_LAT
data.loc[data.Antenna == 'U3','long'] = U3_LONG
data.loc[data.Antenna == 'D1','lat'] = D1_LAT
data.loc[data.Antenna == 'D1','long'] = D1_LONG

# Fill all missing values with a zero

print(data.sample())

#### The below cell creates the subset dataframe from the selection provided by the user above. It will perform the heatmapping and the pandas_profiling from this subset. 

In [6]:
# Create a data subset, if desired.

if SPECIES:
    data = data.loc[data['Species'].isin(SPECIES)]
if ANTENNA:
    data = data.loc[data['Antenna'].isin(ANTENNA)]
if DATE:
    data = data.loc[data_subset['Date'].isin(DATE)]
if TAGID:
    data = data.loc[data_subset['Tag ID'].isin(TAGID)]
if COLUMNS:
    data = data.filter(COLUMNS, axis=1)

In [7]:
pandas_profiling.ProfileReport(data)

0,1
Number of variables,14
Number of observations,3896
Total Missing (%),0.3%
Total size in memory,456.6 KiB
Average record size in memory,120.0 B

0,1
Numeric,1
Categorical,8
Boolean,0
Date,0
Text (Unique),0
Rejected,5
Unsupported,0

0,1
Constant value,U3

0,1
Distinct count,45
Unique (%),1.2%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,3.1199
Minimum,1
Maximum,109
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,1
Q1,1
Median,1
Q3,3
95-th percentile,12
Maximum,109
Range,108
Interquartile range,2

0,1
Standard deviation,5.1783
Coef of variation,1.6598
Kurtosis,76.085
Mean,3.1199
MAD,2.7402
Skewness,6.4425
Sum,12155
Variance,26.815
Memory size,60.9 KiB

Value,Count,Frequency (%),Unnamed: 3
1,2156,55.3%,
2,657,16.9%,
3,264,6.8%,
4,170,4.4%,
5,134,3.4%,
6,91,2.3%,
7,69,1.8%,
8,60,1.5%,
10,35,0.9%,
9,34,0.9%,

Value,Count,Frequency (%),Unnamed: 3
1,2156,55.3%,
2,657,16.9%,
3,264,6.8%,
4,170,4.4%,
5,134,3.4%,

Value,Count,Frequency (%),Unnamed: 3
49,1,0.0%,
50,1,0.0%,
62,1,0.0%,
81,1,0.0%,
109,1,0.0%,

0,1
Constant value,D

0,1
Distinct count,18
Unique (%),0.5%
Missing (%),0.0%
Missing (n),0

0,1
2018-08-16,1813
2018-08-06,1005
2018-08-17,686
Other values (15),392

Value,Count,Frequency (%),Unnamed: 3
2018-08-16,1813,46.5%,
2018-08-06,1005,25.8%,
2018-08-17,686,17.6%,
2018-08-18,53,1.4%,
2018-09-06,46,1.2%,
2018-09-03,40,1.0%,
2018-09-02,36,0.9%,
2018-08-31,36,0.9%,
2018-09-01,28,0.7%,
2018-09-04,27,0.7%,

0,1
Distinct count,124
Unique (%),3.2%
Missing (%),0.0%
Missing (n),0

0,1
00:00:00.00,2156
00:00:01.04,291
00:00:01.03,275
Other values (121),1174

Value,Count,Frequency (%),Unnamed: 3
00:00:00.00,2156,55.3%,
00:00:01.04,291,7.5%,
00:00:01.03,275,7.1%,
00:00:03.10,139,3.6%,
00:00:02.07,115,3.0%,
00:00:02.06,95,2.4%,
00:00:04.14,73,1.9%,
00:00:01.02,61,1.6%,
00:00:07.24,42,1.1%,
00:00:06.20,41,1.1%,

0,1
Distinct count,403
Unique (%),10.3%
Missing (%),0.0%
Missing (n),0

0,1
1,898
2,400
3,283
Other values (400),2315

Value,Count,Frequency (%),Unnamed: 3
1,898,23.0%,
2,400,10.3%,
3,283,7.3%,
4,208,5.3%,
5,161,4.1%,
6,129,3.3%,
0,102,2.6%,
7,83,2.1%,
8,82,2.1%,
10,67,1.7%,

0,1
Distinct count,20
Unique (%),0.5%
Missing (%),1.3%
Missing (n),50

0,1
104,1635
82,909
87,536
Other values (16),766

Value,Count,Frequency (%),Unnamed: 3
104,1635,42.0%,
82,909,23.3%,
87,536,13.8%,
89,294,7.5%,
85,150,3.9%,
65,64,1.6%,
135,58,1.5%,
138,53,1.4%,
271,40,1.0%,
160,35,0.9%,

0,1
Distinct count,6
Unique (%),0.2%
Missing (%),1.3%
Missing (n),50

0,1
U3.0,3566
U2.1,163
U2.0,112
Other values (2),5
(Missing),50

Value,Count,Frequency (%),Unnamed: 3
U3.0,3566,91.5%,
U2.1,163,4.2%,
U2.0,112,2.9%,
U2.3,4,0.1%,
U1.3,1,0.0%,
(Missing),50,1.3%,

0,1
Distinct count,6
Unique (%),0.2%
Missing (%),1.3%
Missing (n),50

0,1
LEAU,2056
LEME,1551
CAOL,164
Other values (2),75
(Missing),50

Value,Count,Frequency (%),Unnamed: 3
LEAU,2056,52.8%,
LEME,1551,39.8%,
CAOL,164,4.2%,
MODU,40,1.0%,
HYET,35,0.9%,
(Missing),50,1.3%,

0,1
Distinct count,24
Unique (%),0.6%
Missing (%),0.0%
Missing (n),0

0,1
3D6.00184CBA5E,1635
3D6.00184CBA67,909
3D6.00184CBAA5,536
Other values (21),816

Value,Count,Frequency (%),Unnamed: 3
3D6.00184CBA5E,1635,42.0%,
3D6.00184CBA67,909,23.3%,
3D6.00184CBAA5,536,13.8%,
3D6.00184CBA82,294,7.5%,
3D6.00184CBA78,150,3.9%,
3D6.00184CBA9E,64,1.6%,
3D6.1D592D607D,58,1.5%,
3D6.00184CBA7C,40,1.0%,
3D6.00184CB900,35,0.9%,
3D6.00184CBAA0,31,0.8%,

0,1
Distinct count,24
Unique (%),0.6%
Missing (%),0.0%
Missing (n),0

0,1
16:00:00,426
13:00:00,424
17:00:00,406
Other values (21),2640

Value,Count,Frequency (%),Unnamed: 3
16:00:00,426,10.9%,
13:00:00,424,10.9%,
17:00:00,406,10.4%,
14:00:00,392,10.1%,
12:00:00,388,10.0%,
15:00:00,329,8.4%,
11:00:00,271,7.0%,
18:00:00,163,4.2%,
07:00:00,147,3.8%,
04:00:00,140,3.6%,

0,1
Constant value,HA

0,1
Constant value,33.997

0,1
Constant value,-84.898

Unnamed: 0,D,Date,Time,Duration,Type,Tag ID,Count,Gap,Antenna,Species,Length,Marked At,lat,long
0,D,2018-08-06,11:00:00,00:00:01.04,HA,3D6.1D592D6039,2,302,U3,,,,33.997,-84.898056
1,D,2018-08-06,11:00:00,00:00:00.00,HA,3D6.1D592D6039,1,149,U3,,,,33.997,-84.898056
2,D,2018-08-06,11:00:00,00:00:02.07,HA,3D6.1D592D6039,3,381,U3,,,,33.997,-84.898056
3,D,2018-08-06,11:00:00,00:00:00.00,HA,3D6.1D592D6039,1,1,U3,,,,33.997,-84.898056
4,D,2018-08-06,11:00:00,00:00:02.08,HA,3D6.00184CBA5E,3,1213,U3,LEAU,104.0,U3.0,33.997,-84.898056


In [9]:
fishTrackMap = folium.Map(location=[33.99697222, -84.89694444], zoom_start=15) 

# Ensure floats
data['lat'] = data['lat'].astype(float)
data['lat'] = data['lat'].astype(float)

# Remove NaNs
data = data[['lat', 'long']]
data = data.dropna(axis=0, subset=['lat','long'])

# List comprehension to make out list of lists
heat_data = [[row['lat'],row['long']] for index, row in data.iterrows()]

# Plot it on the map
HeatMap(heat_data).add_to(fishTrackMap)

# Display the map
fishTrackMap

