In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as graph
import seaborn as sns

import cartopy.crs as ccrs

import statsmodels

import os
from tqdm import tqdm, trange

from convertbng.util import convert_bng, convert_lonlat
import utm

In [2]:
# code and species dictionary

code_df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Alberta/To_look_at/Banff_NP_Freshwater_Lake_Fish_Index_2017_data_dictionary.csv')
# code_df = code_df.iloc[:-6]
code_df.rename(columns={'Data_Value_Valeur_de_la_donnée': 'code_name', 
                        'Value_Description_EN_Description_de_la_valeur': 'common_name'}, inplace=True)
code_df = code_df[['code_name', 'common_name']]
code_df['common_name'] = code_df['common_name'].str.lower().str.replace(' ', '_')
display(code_df.sample(5))

dict_code_name = code_df.set_index('code_name').to_dict()['common_name']

Unnamed: 0,code_name,common_name
3,BLTR,bull_trout
41,SLSC,slimy_sculpin
13,RNTR,rainbow_trout
19,ARLM,arctic_lamprey
10,MATR,marstoni_trout


In [3]:
new_codes = {'LNDC': 'longnose_dace', 'BNTR': 'brown_trout', 'RNTR1': 'rainbow_trout', 'CTTR1': 'cutthroat_trout', 
             'WHSC': 'white_sucker', 'BLTRCTTR(N&I)': 'bull_trout_x_cutthroat_trout', 'CTTR(hybrids)': 'cutthroat_trout', 
             'CTTRhybrids': 'cutthroat_trout', 'SUCK': 'white_sucker'}

dict_code_name.update(new_codes)

In [4]:
# Species dictionay

species_name_df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/species_name_dictionary.csv')

dict_species_name = species_name_df.set_index('common_name').to_dict()['scientific_name']

In [5]:
new_names = {'nine-spine_stickleback': 'pungitius_pungitius'}

dict_species_name.update(new_names)
dict_species_name

{'striped_bass': 'morone_saxatilis',
 'american_shad': 'alosa_sapidissima',
 'white_sucker': 'catostomus_commersonii',
 'channel_catfish': 'ictalurus_punctatus',
 'mooneye': 'hiodon_tergisus',
 'walleye': 'sander_vitreus',
 'lake_sturgeon': 'acipenser_fulvescens',
 'rainbow_smelt': 'osmerus_mordax',
 'longnose_sucker': 'catostomus_catostomus',
 'alewife': 'alosa_pseudoharengus',
 'american_eel': 'anguilla_rostrata',
 'white_perch': 'morone_americana',
 'quillback': 'carpiodes_cyprinus',
 'smallmouth_bass': 'micropterus_dolomieu',
 'silver_redhorse': 'moxostoma_anisurum',
 'logperch': 'percina_caprodes',
 'tessellated_darter': 'etheostoma_olmstedi',
 'brown_bullhead': 'ameiurus_nebulosus',
 'longnose_gar': 'lepisosteus_osseus',
 'freshwater_drum': 'aplodinotus_grunniens',
 'fallfish': 'semotilus_corporalis',
 'channel_darter': 'percina_copelandi',
 'emerald_shiner': 'notropis_atherinoides',
 'johnny_darter': 'etheostoma_nigrum',
 'tadpole_madtom': 'noturus_gyrinus',
 'trout-perch': 'per

In [16]:
species_name_df = pd.DataFrame.from_dict({'common_name': list(dict_species_name.keys()), 
                                        'scientific_name' : list(dict_species_name.values())})
# species_name_df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/species_name_dictionary.csv', index=False)

# Compiling stuff

### Angler's Map

In [20]:
df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Saskatchewan/Used/AnglerSaskatchewanStockedTroutWatersRivers.csv')
df = df[['Name', 'Species', 'Year', 'LatMid', 'LongMid']]
df.rename(columns={'Name': 'waterbody_name', 'Year': 'year', 'LatMid': 'latitude', 'LongMid': 'longitude'}, inplace=True)

df['lat_long'] = df['latitude'].round(3).astype(str) + '_' + df['longitude'].round(3).astype(str)
df['Species'] = df['Species'].str.lower().str.replace(' ', '_')
df['Species'].replace(dict_species_name, inplace=True)

df.head()

Unnamed: 0,waterbody_name,Species,year,latitude,longitude,lat_long
0,Battle Creek,oncorhynchus_mykiss,2019,49.42649,-109.581201,49.426_-109.581
1,Battle Creek,salmo_trutta,2018,49.42649,-109.581201,49.426_-109.581
2,Battle Creek,oncorhynchus_mykiss,2018,49.42649,-109.581201,49.426_-109.581
3,Battle Creek,oncorhynchus_mykiss,2017,49.42649,-109.581201,49.426_-109.581
4,Battle Creek,salmo_trutta,2016,49.42649,-109.581201,49.426_-109.581


In [21]:
print(df['year'].min(), df['year'].max())
for col in df['Species'].unique():
    df[col] = np.where(df.isin([col]).any(1), 1, np.nan)
    df[col] = df[col].fillna(df.groupby(['year', 'lat_long'])[col].transform('mean'))
    
display(df.sample(5))

1999 2019


Unnamed: 0,waterbody_name,Species,year,latitude,longitude,lat_long,oncorhynchus_mykiss,salmo_trutta,salvelinus_fontinalis,esox_spp,sander_vitreus,percidae_spp,hiodon_alosoides,acipenser_fulvescens
6,Battle Creek,oncorhynchus_mykiss,2015,49.42649,-109.581201,49.426_-109.581,1.0,,,,,,,
50,Greenbush River,salvelinus_fontinalis,2019,52.928232,-102.705929,52.928_-102.706,,,1.0,,,,,
41,Etomami River,oncorhynchus_mykiss,2018,52.737599,-102.405885,52.738_-102.406,1.0,,1.0,,,,,
40,Etomami River,salvelinus_fontinalis,2018,52.737599,-102.405885,52.738_-102.406,1.0,,1.0,,,,,
94,White Gull Creek,salvelinus_fontinalis,2016,53.962225,-104.79052,53.962_-104.791,,,1.0,,,,,


In [23]:
print(len(df))
df.drop(columns=['Species'], inplace=True)
df.drop_duplicates(subset = df.columns[1:], inplace=True)
print(len(df))

display(df.sample(5))
df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Saskatchewan/Processed/sask_anglers_map_1999_2019_occurence.csv', 
          index=False)

109
87


Unnamed: 0,waterbody_name,year,latitude,longitude,lat_long,oncorhynchus_mykiss,salmo_trutta,salvelinus_fontinalis,esox_spp,sander_vitreus,percidae_spp,hiodon_alosoides,acipenser_fulvescens
15,Bear Creek,2015,49.853232,-109.09391,49.853_-109.094,,,1.0,,,,,
50,Greenbush River,2019,52.928232,-102.705929,52.928_-102.706,,,1.0,,,,,
19,Belanger Creek,2019,49.560107,-109.360333,49.56_-109.36,1.0,,1.0,,,,,
75,Nipekamew Creek,2012,54.181732,-104.889575,54.182_-104.89,,,1.0,,,,,
99,White Gull Creek,2011,53.962225,-104.79052,53.962_-104.791,,,1.0,,,,,


# Prince Albert

In [58]:
df_code = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Saskatchewan/Used/Prince_Albert_NP_Freshwater_Piscivorous_Fish_2009-2015_data_dictionary_2.csv', 
                      engine='python')
df_code['names'] = df_code['Value_Description_EN_Description_de_la_valeur'].str.split('- ').str.get(1).str.lower().str.replace(' ', '_')
dict_prince_names = dict(zip(df_code['Data_Value_Valeur_de_la_donn�e'][:6], df_code['names'][:6]))
dict_prince_names

{'81': 'salvelinus_namaycush',
 '91': 'coregonus_clupeaformis',
 '93': 'coregonus_artedi',
 '131': 'esox_lucius',
 '271': 'lota_lota',
 '334': 'stizostedion_vitreum'}

In [59]:
df_location = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Saskatchewan/Used/Prince_Albert_NP_Freshwater_Piscivorous_Fish_2009-2017_data_1.csv', 
                      engine='python')

df_location['latitude'], df_location['longitude'] = np.where(df_location['UTM Zone'] == 13, 
                                                             (utm.to_latlon(df_location['UTM Easting'], df_location['UTM Northing'], 13, 'U')), 
                                                             np.nan)
df_location['latitude'], df_location['longitude'] = np.where(df_location['UTM Zone'] == 9, 
                                                             (utm.to_latlon(df_location['UTM Easting'], df_location['UTM Northing'], 15, 'U')),
                                                             (df_location['latitude'], df_location['longitude']))
df_location = df_location[['Waterbody Name', 'Year', 'Effort Number', 'latitude', 'longitude']]
df_location.head()

Unnamed: 0,Waterbody Name,Year,Effort Number,latitude,longitude
0,Kingsmere,2009,1,54.051214,-106.445356
1,Kingsmere,2009,2,54.060116,-106.448782
2,Kingsmere,2009,3,54.065335,-106.445312
3,Kingsmere,2009,4,54.060144,-106.434421
4,Kingsmere,2009,5,54.061682,-106.454628


In [60]:
df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Saskatchewan/Used/Prince_Albert_NP_Freshwater_Piscivorous_Fish_2009-2017_data_2.csv', 
                      engine='python')
df = df[['Waterbody Name', 'Year', 'Effort Number', 'Species']]
df['Species'] = df['Species'].astype(str)

df.head()

Unnamed: 0,Waterbody Name,Year,Effort Number,Species
0,Kingsmere,2009,1,81
1,Kingsmere,2009,1,91
2,Kingsmere,2009,1,91
3,Kingsmere,2009,2,81
4,Kingsmere,2009,2,81


In [61]:
df = df.merge(df_location, on=['Waterbody Name', 'Year', 'Effort Number'], how='left')
df['Species'].replace(dict_prince_names, inplace=True)

df['lat_long'] = df['latitude'].round(3).astype(str) + '_' + df['longitude'].round(3).astype(str)
df.columns = df.columns.str.lower().str.replace(' ', '_')
df.head()

Unnamed: 0,waterbody_name,year,effort_number,species,latitude,longitude,lat_long
0,Kingsmere,2009,1,salvelinus_namaycush,54.051214,-106.445356,54.051_-106.445
1,Kingsmere,2009,1,coregonus_clupeaformis,54.051214,-106.445356,54.051_-106.445
2,Kingsmere,2009,1,coregonus_clupeaformis,54.051214,-106.445356,54.051_-106.445
3,Kingsmere,2009,2,salvelinus_namaycush,54.060116,-106.448782,54.06_-106.449
4,Kingsmere,2009,2,salvelinus_namaycush,54.060116,-106.448782,54.06_-106.449


In [63]:
print(df['year'].min(), df['year'].max())

for col in df['species'].unique():
    df[col] = np.where(df.isin([col]).any(1), 1, np.nan)
    df[col] = df[col].fillna(df.groupby(['year', 'lat_long'])[col].transform('mean'))
    
display(df.sample(5))

2009 2017


Unnamed: 0,waterbody_name,year,effort_number,species,latitude,longitude,lat_long,salvelinus_namaycush,coregonus_clupeaformis,stizostedion_vitreum,coregonus_artedi,esox_lucius,lota_lota
435,Kingsmere,2017,35,salvelinus_namaycush,54.072493,-106.424026,54.072_-106.424,1.0,,,,,
420,Kingsmere,2017,24,salvelinus_namaycush,54.099106,-106.428027,54.099_-106.428,1.0,,,,,
215,Kingsmere,2015,20,salvelinus_namaycush,54.124481,-106.489529,54.124_-106.49,1.0,,,,,
146,Kingsmere,2009,81,salvelinus_namaycush,54.119896,-106.511965,54.12_-106.512,1.0,,,,,
462,Kingsmere,2017,53,coregonus_clupeaformis,54.089146,-106.463172,54.089_-106.463,1.0,1.0,,,,


In [64]:
print(len(df))
df.drop(columns=['species', 'effort_number'], inplace=True)
df.drop_duplicates(subset = df.columns[1:], inplace=True)
print(len(df))

display(df.sample(5))
df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Saskatchewan/Processed/prince_albert_2009_2017_occurence.csv', 
          index=False)

517
179


Unnamed: 0,waterbody_name,year,latitude,longitude,lat_long,salvelinus_namaycush,coregonus_clupeaformis,stizostedion_vitreum,coregonus_artedi,esox_lucius,lota_lota
381,Kingsmere,2015,54.069318,-106.451976,54.069_-106.452,1.0,1.0,,,,
474,Kingsmere,2017,54.110821,-106.413315,54.111_-106.413,,1.0,,,,
110,Kingsmere,2009,54.065558,-106.447688,54.066_-106.448,1.0,1.0,,,,
123,Kingsmere,2009,54.121883,-106.501815,54.122_-106.502,1.0,,,,,
54,Kingsmere,2009,54.089331,-106.47671,54.089_-106.477,1.0,1.0,,,,


# Waterton

In [77]:
df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Saskatchewan/Used/Waterton_Lakes_NP_Freshwater_Stream_Fish_Occupancy_2016_data.csv')
df.columns = df.columns.str.lower().str.replace(' ', '_')
df.rename(columns=dict_species_name, inplace=True)

df['latitude'], df['longitude'] = np.where(df['utm_zone'] == 11, 
                                           (utm.to_latlon(df['utm_easting'], df['utm_northing'], 11, 'U')), 
                                           np.nan)
df['latitude'], df['longitude'] = np.where(df['utm_zone'] == 12, 
                                           (utm.to_latlon(df['utm_easting'], df['utm_northing'], 12, 'U')), 
                                           (df['latitude'], df['longitude']))
df['lat_long'] = df['latitude'].round(3).astype(str) + '_' + df['longitude'].round(3).astype(str)

df['year'] = pd.DatetimeIndex(pd.to_datetime(df['date_(date/month/year)'])).year

df = df[['waterbody_name', 'site_description', 'year', 'latitude', 'longitude', 'lat_long', 'oncorhynchus_clarkii', 
         'salvelinus_fontinalis', 'salvelinus_confluentus', 'oncorhynchus_mykiss', 'rhinichthys_cataractae', 
         'catostomus_catostomus', 'catostomus_commersonii', 'percopsis_omiscomaycus', 'lota_lota', 'salmo_trutta', 
         'oncorhynchus_clarkii_x_oncorhynchus_mykiss']]

df.head()

Unnamed: 0,waterbody_name,site_description,year,latitude,longitude,lat_long,oncorhynchus_clarkii,salvelinus_fontinalis,salvelinus_confluentus,oncorhynchus_mykiss,rhinichthys_cataractae,catostomus_catostomus,catostomus_commersonii,percopsis_omiscomaycus,lota_lota,salmo_trutta,oncorhynchus_clarkii_x_oncorhynchus_mykiss
0,WATERTON,Hell Roaring,2016,49.021741,-113.898271,49.022_-113.898,0.0,3.0,0.0,0.0,,,,,,,
1,WATERTON,Crooked,2016,49.12363,-119.949173,49.124_-119.949,0.0,0.0,0.0,0.0,29.0,12.0,16.0,14.0,1.0,,
2,WATERTON,Crooked,2016,49.12363,-119.949173,49.124_-119.949,0.0,0.0,0.0,0.0,13.0,,1.0,7.0,,,
3,WATERTON,Blakiston,2016,49.125808,-114.02705,49.126_-114.027,0.0,0.0,10.0,0.0,,,,,,,
4,WATERTON,Blakiston,2016,49.125808,-114.02705,49.126_-114.027,0.0,0.0,6.0,0.0,,,,,,,


In [81]:
print(df['year'].min(), df['year'].max())

for col in df.columns[6:]:
    df[col] = np.where(df[col].notnull(), 1, df[col])
    df[col] = df[col].fillna(df.groupby(['year', 'lat_long'])[col].transform('mean'))

display(df.sample(5))

2016 2016


Unnamed: 0,waterbody_name,site_description,year,latitude,longitude,lat_long,oncorhynchus_clarkii,salvelinus_fontinalis,salvelinus_confluentus,oncorhynchus_mykiss,rhinichthys_cataractae,catostomus_catostomus,catostomus_commersonii,percopsis_omiscomaycus,lota_lota,salmo_trutta,oncorhynchus_clarkii_x_oncorhynchus_mykiss
10,WATERTON,Blakiston,2016,49.096255,-113.905775,49.096_-113.906,,1.0,1.0,,,,,,,,
33,WATERTON,Dungarven,2016,49.186081,-113.943429,49.186_-113.943,1.0,1.0,,,,,,,,,1.0
31,WATERTON,Rowe,2016,49.056519,-114.049382,49.057_-114.049,,,,,,,,,,,
17,WATERTON,Blue Grouse,2016,49.120595,-114.141875,49.121_-114.142,,,,,,,,,,,
15,WATERTON,Crooked,2016,49.063478,-113.751008,49.063_-113.751,,1.0,,,,,,,,,


In [84]:
print(len(df))
df.drop_duplicates(subset = df.columns[2:], inplace=True)
print(len(df))

display(df.sample(5))
df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Saskatchewan/Processed/waterton_2016_occurence.csv', 
          index=False)

43
27


Unnamed: 0,waterbody_name,site_description,year,latitude,longitude,lat_long,oncorhynchus_clarkii,salvelinus_fontinalis,salvelinus_confluentus,oncorhynchus_mykiss,rhinichthys_cataractae,catostomus_catostomus,catostomus_commersonii,percopsis_omiscomaycus,lota_lota,salmo_trutta,oncorhynchus_clarkii_x_oncorhynchus_mykiss
13,WATERTON,Bauerman,2016,49.151866,-114.08532,49.152_-114.085,,,,,,,,,,,
3,WATERTON,Blakiston,2016,49.125808,-114.02705,49.126_-114.027,,,1.0,,,,,,,,
40,WATERTON,Lone,2016,49.112359,-114.138086,49.112_-114.138,,,,,,,,,,,
8,WATERTON,Blakiston,2016,49.112009,-114.0789,49.112_-114.079,,,,,,,,,,,
7,WATERTON,Blue Grouse,2016,49.118292,-114.133194,49.118_-114.133,,,,,,,,,,,


# Anglers water points

In [23]:
df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Saskatchewan/Used/AnglerSaskatchewanStockedTroutWatersPoints.csv')# df = df[['Name', 'Species', 'Year', 'LatMid', 'LongMid']]
df.rename(columns={'Name': 'waterbody_name', 'Year': 'year', 'Lat': 'latitude', 'Long': 'longitude'}, inplace=True)
df.drop(columns=['Count'], inplace=True)

df['lat_long'] = df['latitude'].round(3).astype(str) + '_' + df['longitude'].round(3).astype(str)
df['Species'] = df['Species'].str.lower().str.rstrip().str.replace(' ', '_')
df['Species'].replace(dict_species_name, inplace=True)

df = df.loc[df['Species'] != 'lake']
df = df.loc[df['year'] != '-']
df['year'].replace(20122, 2012, inplace=True)

df.head()

Unnamed: 0,waterbody_name,Species,year,latitude,longitude,lat_long
0,Althouse Lake,salvelinus_fontinalis_x_salvelinus_namaycush,2007,55.508968,-104.836206,55.509_-104.836
1,Amber Lake,salvelinus_fontinalis,2019,57.446012,-109.264097,57.446_-109.264
2,Amber Lake,salvelinus_fontinalis,2018,57.446012,-109.264097,57.446_-109.264
3,Amber Lake,salmo_trutta,2018,57.446012,-109.264097,57.446_-109.264
4,Amber Lake,salvelinus_fontinalis,2017,57.446012,-109.264097,57.446_-109.264


In [24]:
print(df['year'].min(), df['year'].max())
for col in df['Species'].unique():
    df[col] = np.where(df.isin([col]).any(1), 1, np.nan)
    df[col] = df[col].fillna(df.groupby(['year', 'lat_long'])[col].transform('mean'))
    
display(df.sample(5))

1973 2019


Unnamed: 0,waterbody_name,Species,year,latitude,longitude,lat_long,salvelinus_fontinalis_x_salvelinus_namaycush,salvelinus_fontinalis,salmo_trutta,oncorhynchus_mykiss,salmo_trutta_x_salvelinus_fontinalis,esox_lucius,sander_vitreus,lota_lota,coregonus_clupeaformis,perca_flavescens,sander_canadensis,salvelinus_namaycush
768,Steep Creek,oncorhynchus_mykiss,2012,53.226976,-105.350003,53.227_-105.35,,1.0,,1.0,,,,,,,,
758,Steep Creek,oncorhynchus_mykiss,2018,53.226976,-105.350003,53.227_-105.35,,,,1.0,,,,,,,,
658,Sand Lake,salvelinus_fontinalis,2019,54.1869,-104.859352,54.187_-104.859,,1.0,,,,,,,,,,
443,McLaren Lake,esox_lucius,2015,50.296577,-109.89727,50.297_-109.897,,,,,,1.0,,,,,,
89,Coal Pit-SPC-C,salvelinus_fontinalis_x_salvelinus_namaycush,2019,49.098571,-103.091154,49.099_-103.091,1.0,,,,,,,,,,,


In [25]:
print(len(df))
df.drop(columns=['Species'], inplace=True)
df.drop_duplicates(subset = df.columns[1:], inplace=True)
print(len(df))

display(df.sample(5))
df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Saskatchewan/Processed/sask_anglers_map_water_point_1973_2019_occurence.csv', 
          index=False)

1275
1031


Unnamed: 0,waterbody_name,year,latitude,longitude,lat_long,salvelinus_fontinalis_x_salvelinus_namaycush,salvelinus_fontinalis,salmo_trutta,oncorhynchus_mykiss,salmo_trutta_x_salvelinus_fontinalis,esox_lucius,sander_vitreus,lota_lota,coregonus_clupeaformis,perca_flavescens,sander_canadensis,salvelinus_namaycush
109,Cora Lake,2015,54.277579,-104.582119,54.278_-104.582,,,,1.0,,,,,,,,
48,Berna Lake,2018,55.92699,-104.29493,55.927_-104.295,,1.0,,1.0,,,,,,,,
1859,Shutte Lake,2015,52.177977,-102.915966,52.178_-102.916,,,,,,,1.0,,,,,
302,Kipling Reservoir,2016,50.072841,-102.741995,50.073_-102.742,,,,1.0,,,,,,,,
282,Junction Lake,2018,54.753855,-105.651398,54.754_-105.651,,,1.0,,,,,,,,,
