## This notebook will process all of the data in the antenna result directories and produce a 'processed_data.csv' file.

#### The below cell will install dependencies, if needed.

In [1]:
%%capture
! pip install folium
! pip install pandas
! pip install pandas_profiling

#### The necessary dependencies are imported.

In [2]:
import pandas as pd
import pandas_profiling
import folium
from folium.plugins import HeatMap

#### Add values for the below variables to return a processed file containing only a subset of the data.
##### For example, add a species to return a file of only data for that species. Or, add a species and an antenna to get data for a specific species/antenna

In [3]:
# User Selections
# Add a pythonic list to any of these for getting a subset of the data for the below analytics functions.
# This will not overwrite the processed_data.csv file with the subset.

SPECIES = []
ANTENNA = []
DATE = []
TAGID = []
COLUMNS = []

#### Execute 'fish_data.py' processing script.

In [4]:
%run -i fish_data

Importing Records from downstream 10.15.txt... 
Importing Records from downstream 10.2.txt... 
Importing Records from downstream 6.13.txt... 
Error processing line: D 2018-06-09 23:59:59.61 ï¿½8:16:01.03 HA 3D6.00184CE0D4    2    20

Importing Records from downstream 6.28.txt... 
Importing Records from downstream 6.8.txt... 
Error processing line: D 2008-10-18 Z6:42:N1.37 b2:17:I6.04 HB 000.0000000000 30137 25605

Error processing line: D 2018-05-12 00:00:05.29 ï¿½2:15:58.06 HA 3D6.00184CB873    2     0

Error processing line: D 2018-05-12 00:00:05.19 ï¿½2:15:58.16 HA 3D6.00184CB873    3     4

Error processing line: D 2018-05-12 00:00:05.19 ï¿½2:15:58.16 HA 3D6.00184CB873    3     4

Error processing line: D 2018-05-12 00:00:46.20 ï¿½2:15:59.31 HA 3D6.00184CB873    4     1

Error processing line: D 2018-05-12 00:16:25.72 00.00 HA 3D6.00184CB873    1     1

Error processing line: D 2018-05-12 01:45:35.65 ;0:56:01.13 HA 3D6.00184CBA2D    3     1

Error processing line: D 2018-05-12 01:4

#### The cell below does all of the heavy lifting of creating the 'processed_data.csv' file and reading the resulting file into a dataframe for use in the operations below. Each time this cell is run it will recreate the 'processed_data.csv' file with the data available in the antenna result directories.

In [5]:
# prepare dataframe for analysis

# Load fish tag data into a dataframe
fish_tag_data = pd.read_csv('./tag_data.csv', 
    names=['Date','Time','Tag ID','Species','Length','Capture Method','Marked At'], low_memory=False)

# Load antenna data into a dataframe
data = pd.read_csv('./processed_data.csv', 
    names=['D','Date','Time','Duration','Type','Tag ID','Count','Gap','Antenna'], low_memory=False)

# Join Dataframe on Tag ID
data = pd.merge(data,fish_tag_data[['Tag ID', 'Species', 'Length', 'Marked At']],on='Tag ID', how='left')

# Alter dtypes for time fields
data['Date'] = data['Date'].astype('datetime64[ns]')
data['Time'] = pd.to_timedelta(data['Time'])

# Antennae Lat/Long GLOBALS
U1_LAT=33.99644444
U1_LONG=-84.89666667
U2_LAT=33.99697222
U2_LONG=-84.89694444
U3_LAT=33.99700000
U3_LONG=-84.89805556
D1_LAT=33.99852778
D1_LONG=-84.89444444

# Add Lat/Long information to DATAFRAME
data.loc[data.Antenna == 'U1','lat'] = U1_LAT
data.loc[data.Antenna == 'U1','long'] = U1_LONG
data.loc[data.Antenna == 'U2','lat'] = U2_LAT
data.loc[data.Antenna == 'U2','long'] = U2_LONG
data.loc[data.Antenna == 'U3','lat'] = U3_LAT
data.loc[data.Antenna == 'U3','long'] = U3_LONG
data.loc[data.Antenna == 'D1','lat'] = D1_LAT
data.loc[data.Antenna == 'D1','long'] = D1_LONG

# Fill all missing values with a zero
data = data.fillna(value=0)
# pd.to_csv('./processed_data.csv', sep=",")

#### The below cell creates the subset dataframe from the selection provided by the user above. It will perform the heatmapping and the pandas_profiling from this subset. 

In [6]:
# Create a data subset, if desired.

if SPECIES:
    data = data.loc[data['Species'].isin(SPECIES)]
if ANTENNA:
    data = data.loc[data['Antenna'].isin(ANTENNA)]
if DATE:
    data = data.loc[data_subset['Date'].isin(DATE)]
if TAGID:
    data = data.loc[data_subset['Tag ID'].isin(TAGID)]
if COLUMNS:
    data = data.filter(COLUMNS, axis=1)

In [7]:
pandas_profiling.ProfileReport(data)

0,1
Number of variables,14
Number of observations,423395
Total Missing (%),0.0%
Total size in memory,48.5 MiB
Average record size in memory,120.0 B

0,1
Numeric,2
Categorical,8
Boolean,0
Date,1
Text (Unique),0
Rejected,3
Unsupported,0

0,1
Distinct count,4
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
D1,303856
U1,72297
U2,36037

Value,Count,Frequency (%),Unnamed: 3
D1,303856,71.8%,
U1,72297,17.1%,
U2,36037,8.5%,
U3,11205,2.6%,

0,1
Distinct count,399
Unique (%),0.1%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,3.6987
Minimum,1
Maximum,7007
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,1
Q1,1
Median,1
Q3,3
95-th percentile,11
Maximum,7007
Range,7006
Interquartile range,2

0,1
Standard deviation,22.589
Coef of variation,6.1073
Kurtosis,29587
Mean,3.6987
MAD,3.587
Skewness,131.06
Sum,1566022
Variance,510.28
Memory size,6.5 MiB

Value,Count,Frequency (%),Unnamed: 3
1,224372,53.0%,
2,75578,17.9%,
3,36416,8.6%,
4,21070,5.0%,
5,13608,3.2%,
6,9680,2.3%,
7,6860,1.6%,
8,5374,1.3%,
9,4137,1.0%,
10,3265,0.8%,

Value,Count,Frequency (%),Unnamed: 3
1,224372,53.0%,
2,75578,17.9%,
3,36416,8.6%,
4,21070,5.0%,
5,13608,3.2%,

Value,Count,Frequency (%),Unnamed: 3
2758,1,0.0%,
3399,1,0.0%,
3749,1,0.0%,
4169,1,0.0%,
7007,1,0.0%,

0,1
Constant value,D

0,1
Distinct count,185
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Minimum,2018-02-27 00:00:00
Maximum,2037-04-17 00:00:00

0,1
Distinct count,1554
Unique (%),0.4%
Missing (%),0.0%
Missing (n),0

0,1
00:00:00.00,224380
00:00:01.04,25113
00:00:01.03,22083
Other values (1551),151819

Value,Count,Frequency (%),Unnamed: 3
00:00:00.00,224380,53.0%,
00:00:01.04,25113,5.9%,
00:00:01.03,22083,5.2%,
00:00:00.10,17617,4.2%,
00:00:02.07,11801,2.8%,
00:00:03.10,11519,2.7%,
00:00:02.06,9092,2.1%,
00:00:00.20,7510,1.8%,
00:00:01.02,5609,1.3%,
00:00:04.14,5129,1.2%,

0,1
Distinct count,2919
Unique (%),0.7%
Missing (%),0.0%
Missing (n),0

0,1
1,121892
2,50109
3,29046
Other values (2916),222348

Value,Count,Frequency (%),Unnamed: 3
1,121892,28.8%,
2,50109,11.8%,
3,29046,6.9%,
4,19118,4.5%,
5,14129,3.3%,
6,11043,2.6%,
7,8838,2.1%,
8,7439,1.8%,
9,6316,1.5%,
0,6305,1.5%,

0,1
Distinct count,120
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
140,37443
118,35667
0,29396
Other values (117),320889

Value,Count,Frequency (%),Unnamed: 3
140,37443,8.8%,
118,35667,8.4%,
0,29396,6.9%,
126,23270,5.5%,
94,21267,5.0%,
84,20694,4.9%,
177,17522,4.1%,
108,15444,3.6%,
129,15073,3.6%,
75,14495,3.4%,

0,1
Distinct count,11
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
D1.0,288010
U1.0,67949
0,29396
Other values (8),38040

Value,Count,Frequency (%),Unnamed: 3
D1.0,288010,68.0%,
U1.0,67949,16.0%,
0,29396,6.9%,
U2.0,27269,6.4%,
U3.0,7442,1.8%,
U2.3,1390,0.3%,
U1.3,1101,0.3%,
U2.2,318,0.1%,
U2.1,294,0.1%,
U1.2,193,0.0%,

0,1
Distinct count,8
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
LEAU,122864
MICO,98134
MODU,81596
Other values (5),120801

Value,Count,Frequency (%),Unnamed: 3
LEAU,122864,29.0%,
MICO,98134,23.2%,
MODU,81596,19.3%,
HYET,44934,10.6%,
0,29396,6.9%,
CAOL,26989,6.4%,
LEME,18705,4.4%,
LEAU,777,0.2%,

0,1
Distinct count,366
Unique (%),0.1%
Missing (%),0.0%
Missing (n),0

0,1
3D6.00184CB873,34138
3D6.00184CE0AB,22223
3D6.1D592D6053,21028
Other values (363),346006

Value,Count,Frequency (%),Unnamed: 3
3D6.00184CB873,34138,8.1%,
3D6.00184CE0AB,22223,5.2%,
3D6.1D592D6053,21028,5.0%,
3D6.00184CBA35,20921,4.9%,
3D6.00184CBAF8,20130,4.8%,
3D6.00184CBA1D,17522,4.1%,
3D6.00184CE0D4,16009,3.8%,
3D6.00184CB8AF,15444,3.6%,
3D6.00184CBAC6,13244,3.1%,
3D6.00184CB9FD,13098,3.1%,

0,1
Distinct count,392940
Unique (%),92.8%
Missing (%),0.0%
Missing (n),0

0,1
0 days 23:56:22.080000,10
0 days 00:14:55.780000,9
0 days 00:14:55.280000,9
Other values (392937),423367

Value,Count,Frequency (%),Unnamed: 3
0 days 23:56:22.080000,10,0.0%,
0 days 00:14:55.780000,9,0.0%,
0 days 00:14:55.280000,9,0.0%,
0 days 02:42:12.290000,8,0.0%,
0 days 02:24:22.230000,8,0.0%,
0 days 23:56:41.770000,8,0.0%,
0 days 02:10:34.500000,8,0.0%,
0 days 23:02:32.830000,8,0.0%,
0 days 21:51:07.450000,8,0.0%,
0 days 23:28:45.970000,8,0.0%,

0,1
Constant value,HA

0,1
Distinct count,4
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,33.998
Minimum,33.996
Maximum,33.999
Zeros (%),0.0%

0,1
Minimum,33.996
5-th percentile,33.996
Q1,33.997
Median,33.999
Q3,33.999
95-th percentile,33.999
Maximum,33.999
Range,0.0020833
Interquartile range,0.0015556

0,1
Standard deviation,0.00085409
Coef of variation,2.5122e-05
Kurtosis,-0.79213
Mean,33.998
MAD,0.00075868
Skewness,-1.0503
Sum,14395000
Variance,7.2947e-07
Memory size,6.5 MiB

Value,Count,Frequency (%),Unnamed: 3
33.99852778,303856,71.8%,
33.99644444,72297,17.1%,
33.99697222,36037,8.5%,
33.997,11205,2.6%,

Value,Count,Frequency (%),Unnamed: 3
33.99644444,72297,17.1%,
33.99697222,36037,8.5%,
33.997,11205,2.6%,
33.99852778,303856,71.8%,

Value,Count,Frequency (%),Unnamed: 3
33.99644444,72297,17.1%,
33.99697222,36037,8.5%,
33.997,11205,2.6%,
33.99852778,303856,71.8%,

0,1
Correlation,0.94773

Unnamed: 0,D,Date,Time,Duration,Type,Tag ID,Count,Gap,Antenna,Species,Length,Marked At,lat,long
0,D,2018-10-02,06:44:50.470000,00:00:03.10,HA,3D6.00184CB9B6,4,14,D1,0,0,0,33.998528,-84.894444
1,D,2018-10-02,06:45:01.830000,00:00:02.08,HA,3D6.00184CB9B6,3,7,D1,0,0,0,33.998528,-84.894444
2,D,2018-10-02,06:45:06.970000,00:00:00.00,HA,3D6.00184CB9B6,1,1,D1,0,0,0,33.998528,-84.894444
3,D,2018-10-02,06:46:05.900000,00:00:00.00,HA,3D6.00184CB9B6,1,57,D1,0,0,0,33.998528,-84.894444
4,D,2018-10-02,06:46:08.960000,00:00:00.00,HA,3D6.00184CB9B6,1,1,D1,0,0,0,33.998528,-84.894444


In [8]:
map_hooray = folium.Map(location=[33.99697222, -84.89694444], zoom_start=15) 

# Ensure floats
data['lat'] = data['lat'].astype(float)
data['lat'] = data['lat'].astype(float)

# Remove NaNs
data = data[['lat', 'long']]
data = data.dropna(axis=0, subset=['lat','long'])

# List comprehension to make out list of lists
heat_data = [[row['lat'],row['long']] for index, row in data.iterrows()]

# Plot it on the map
HeatMap(heat_data).add_to(fishTrackMap)

# Display the map
fishTrackmap



NameError: name 'fishTrackMap' is not defined