In [57]:
# import needed packages
import pandas as pd
import string
from datetime import datetime

In [58]:
# load fish data
fish = pd.read_csv('../original_data/FISH.csv')
# fish dataframe
fish.head(3)

Unnamed: 0,FIELDID,Latitude,Longitude,Date and Time,Common Name,Quantity
0,Low4,42.286604,-83.475661,8/25/2015,Gizzard shad,1
1,Low9,42.283461,-83.505311,8/22/2016,Gizzard shad,9
2,Low11,42.297215,-83.525437,8/22/2016,Gizzard shad,11


In [59]:
# Capitalize common name words for easier merging
fish['Common Name'] = fish['Common Name'].apply(lambda x: string.capwords(x))

In [60]:
# convert time to time object
fish['Date and Time'] = fish['Date and Time'].apply(lambda x: datetime.strptime(x,'%m/%d/%Y')) 

In [61]:
# create year column
fish['Year'] = fish['Date and Time'].apply(lambda x: x.year) 

In [62]:
# create month column
fish['Month'] = fish['Date and Time'].apply(lambda x: x.month)

In [63]:
# create day column
fish['Day'] = fish['Date and Time'].apply(lambda x: x.day)

In [64]:
# load species data
species = pd.read_csv('../P51_SPECIES.csv')
# species dataframe
species.head(3)

Unnamed: 0,Common Name,Category
0,Sea Lamprey,Intolerant
1,Silver Lamprey,Intolerant
2,Northern Brook Lamprey,Intolerant


In [65]:
# merge data
combined = fish.merge(species, how='left', on='Common Name')
combined

Unnamed: 0,FIELDID,Latitude,Longitude,Date and Time,Common Name,Quantity,Year,Month,Day,Category
0,Low4,42.286604,-83.475661,2015-08-25,Gizzard Shad,1,2015,8,25,
1,Low9,42.283461,-83.505311,2016-08-22,Gizzard Shad,9,2016,8,22,
2,Low11,42.297215,-83.525437,2016-08-22,Gizzard Shad,11,2016,8,22,
3,LR-6,42.285000,-83.383888,2013-09-06,Gizzard Shad,5,2013,9,6,
4,Fowl2,42.282260,-83.505150,2014-08-22,Gizzard Shad,2,2014,8,22,
...,...,...,...,...,...,...,...,...,...,...
15400,Low11,42.297215,-83.525437,2022-09-24,Black Crappie,1,2022,9,24,Sunfish
15401,Low11,42.297215,-83.525437,2022-09-24,Johnny Darter,34,2022,9,24,Insectivores
15402,Low11,42.297215,-83.525437,2022-09-24,Johnny Darter,34,2022,9,24,Tolerant
15403,Low11,42.297215,-83.525437,2022-09-24,Johnny Darter,34,2022,9,24,Darter


In [66]:
# merged dataframe, now containing fish info and category
combined.head(3)

Unnamed: 0,FIELDID,Latitude,Longitude,Date and Time,Common Name,Quantity,Year,Month,Day,Category
0,Low4,42.286604,-83.475661,2015-08-25,Gizzard Shad,1,2015,8,25,
1,Low9,42.283461,-83.505311,2016-08-22,Gizzard Shad,9,2016,8,22,
2,Low11,42.297215,-83.525437,2016-08-22,Gizzard Shad,11,2016,8,22,


In [67]:
# group dataframe by field and year and count number of species
species_per_field = combined.groupby(['FIELDID', 'Year', 'Latitude', 'Longitude'], as_index=False)['Common Name'].nunique()
species_per_field = species_per_field.rename(columns={'Common Name': 'Species per Year'})

In [68]:
species_per_field

Unnamed: 0,FIELDID,Year,Latitude,Longitude,Species per Year
0,Bell1,2016,42.429240,-83.396660,10
1,Bell4,2016,42.403270,-83.362370,10
2,Bell4,2022,42.403270,-83.362370,8
3,Bish2,2017,42.471310,-83.451510,8
4,Bish2,2022,42.471310,-83.451510,7
...,...,...,...,...,...
357,Wall7,2017,42.509150,-83.495620,9
358,WalnutL,2018,42.566600,-83.325630,10
359,Will1,2015,42.330020,-83.464220,6
360,Will1,2022,42.330020,-83.464220,8


In [69]:
# save dataset for mapping
species_per_field.to_csv('../fish_mapping.csv') 

In [70]:
# merge with combined dataset
fish_updated = combined.merge(species_per_field, on=["FIELDID","Year"], how='left')
fish_updated = fish_updated.drop(columns=['Latitude_y', 'Longitude_y'])
fish_updated = fish_updated.rename(columns={"Latitude_x": "Latitude", "Longitude_x": "Longitude"})
fish_updated

Unnamed: 0,FIELDID,Latitude,Longitude,Date and Time,Common Name,Quantity,Year,Month,Day,Category,Species per Year
0,Low4,42.286604,-83.475661,2015-08-25,Gizzard Shad,1,2015,8,25,,21
1,Low9,42.283461,-83.505311,2016-08-22,Gizzard Shad,9,2016,8,22,,10
2,Low11,42.297215,-83.525437,2016-08-22,Gizzard Shad,11,2016,8,22,,13
3,LR-6,42.285000,-83.383888,2013-09-06,Gizzard Shad,5,2013,9,6,,14
4,Fowl2,42.282260,-83.505150,2014-08-22,Gizzard Shad,2,2014,8,22,,13
...,...,...,...,...,...,...,...,...,...,...,...
15400,Low11,42.297215,-83.525437,2022-09-24,Black Crappie,1,2022,9,24,Sunfish,14
15401,Low11,42.297215,-83.525437,2022-09-24,Johnny Darter,34,2022,9,24,Insectivores,14
15402,Low11,42.297215,-83.525437,2022-09-24,Johnny Darter,34,2022,9,24,Tolerant,14
15403,Low11,42.297215,-83.525437,2022-09-24,Johnny Darter,34,2022,9,24,Darter,14


In [71]:
# save to new csv 
fish_updated.to_csv('../fish_updated.csv')  