In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as graph
import seaborn as sns

import cartopy.crs as ccrs

import statsmodels

import os
from tqdm import tqdm, trange

from convertbng.util import convert_bng, convert_lonlat
import utm

In [3]:
# code and species dictionary

code_df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Alberta/To_look_at/Banff_NP_Freshwater_Lake_Fish_Index_2017_data_dictionary.csv')
# code_df = code_df.iloc[:-6]
code_df.rename(columns={'Data_Value_Valeur_de_la_donnée': 'code_name', 
                        'Value_Description_EN_Description_de_la_valeur': 'common_name'}, inplace=True)
code_df = code_df[['code_name', 'common_name']]
code_df['common_name'] = code_df['common_name'].str.lower().str.replace(' ', '_')
display(code_df.sample(5))

dict_code_name = code_df.set_index('code_name').to_dict()['common_name']

Unnamed: 0,code_name,common_name
39,NRDC,northern_redbelly_dace
18,ARGR,arctic_grayling
24,BURB,burbot
25,CISC,cisco
8,LNSC,longnose_sucker


In [4]:
new_codes = {'LNDC': 'longnose_dace', 'BNTR': 'brown_trout', 'RNTR1': 'rainbow_trout', 'CTTR1': 'cutthroat_trout', 
             'WHSC': 'white_sucker', 'BLTRCTTR(N&I)': 'bull_trout_x_cutthroat_trout', 'CTTR(hybrids)': 'cutthroat_trout', 
             'CTTRhybrids': 'cutthroat_trout', 'SUCK': 'white_sucker'}

dict_code_name.update(new_codes)

In [5]:
# Species dictionay

species_name_df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/species_name_dictionary.csv')

dict_species_name = species_name_df.set_index('common_name').to_dict()['scientific_name']

In [94]:
species_name_df = pd.DataFrame.from_dict({'common_name': list(dict_species_name.keys()), 
                                        'scientific_name' : list(dict_species_name.values())})
# species_name_df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/species_name_dictionary.csv', index=False)

# Sapna

### 2013 Ontario Lakes

In [30]:
df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Ontario/Used/Ontario lakes and fish database_October2013.csv')

df.drop(columns=['Lake Code', 'LATITUDE', 'LONGITUDE', 'Total # of spp', 'Water temp sampling date', 'Source'], inplace=True)

df.columns = df.columns.str.lower().str.rstrip().str.replace(' ', '_').str.replace('_sp', '_spp', regex=True)
df.rename(columns=dict_species_name, inplace=True)

df['lat_long'] = df['latitude'].round(3).astype(str) + '_' + df['longitude'].round(3).astype(str)

cols = ['year', 'lat_long']
cols.extend(list(df.columns[:-2]))

df = df[cols]
df = df.sum(axis=1, level=0)

for col in list(df.columns)[21:]:
    df[col] = df[col].replace(0, np.nan)
    df[col] = np.where(df[col].notnull(), 1, np.nan)
    
display(df.head())
print(len(df['lat_long'].unique()))

Unnamed: 0,year,lat_long,lake_name,wshed,latitude,longitude,surface_area_(ha),maximum_depth_(m),mean_depth,island_perimeter_(km),...,myoxocephalus_thompsonii,cottus_spp,esox_lucius_x_esox_americanus_vermiculatus,chrosomus_sppp,chrosomus_eos_x_chrosomus_neogaeus,chrosomus_eos_x_margariscus_nachtriebi,chrosomus_neogaeus_x_margariscus_nachtriebi,luxilus_cornutus_x_semotilus_atromaculatus,lepomis_sppp,lepomis_gibbosus_x_lepomis_macrochirus
0,1968.0,50.338_-93.404,Aerobus L.,5QE04,50.337778,-93.404444,1947.7,43.9,15.4,4.8,...,,,,,,,,,,
1,1985.0,50.026_-93.155,Affleck L.,5QD01,50.026389,-93.154722,168.6,15.5,7.3,0.3,...,,,,,,,,,,
2,1977.0,49.459_-92.469,Aiabewatik L.,5QD01,49.459444,-92.468611,162.6,33.0,9.0,5.2,...,,,,,,,,,,
3,1975.0,50.029_-92.952,Little Amesdale L.,5QD04,50.028889,-92.9525,36.8,4.3,1.8,0.0,...,,,,,,,,,,
4,1985.0,50.11_-92.466,Alder L.,5QB01,50.11,-92.466389,192.3,14.8,6.2,0.0,...,,,,,,,,,,


9758


In [6]:
print(df['year'].min(), df['year'].max())

1957.0 1986.0


In [33]:
df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Ontario/Processed/sapna_ontario_lakes_occurence_1957_1986.csv', 
          index=False)

### York Request ONBSM
##### can get abundance

In [2]:
df_location = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Ontario/York_request_ONBSM_Location.csv')
df_location.drop(columns=['Target_species', 'Lake_selection'], inplace=True)
df_location['year'] = pd.DatetimeIndex(pd.to_datetime(df_location['Survey_year_month'])).year
df_location['lat_long'] = df_location['Lat'].round(3).astype(str) + '_' + df_location['Long'].round(3).astype(str)
print(f'Location size: {len(df_location)}')

df_variables = df_location[['year', 'lat_long', 'Area_ha', 'Depth_Max', 'Depth_Mn', 'SDF', 'pLittoral', 'Secchi_Su', 
                            'Thermo_Obs', 'Thermo_Pred', 'TotalPhosphorus(ugL)', 'TrueColour_pctl', 'pH_pctl', 
                            'Conductivity(uScms)', 'TDS (mg/L)', 'DD5_8110', 'AirTemp_8110']]

df_variables = df_variables.groupby(['year', 'lat_long']).mean().reset_index()
print(f'Variables size: {len(df_variables)}')
display(df_variables.head())

Location size: 1409
Variables size: 1406


Unnamed: 0,year,lat_long,Area_ha,Depth_Max,Depth_Mn,SDF,pLittoral,Secchi_Su,Thermo_Obs,Thermo_Pred,TotalPhosphorus(ugL),TrueColour_pctl,pH_pctl,Conductivity(uScms),TDS (mg/L),DD5_8110,AirTemp_8110
0,2007.0,44.584_-78.844,4764.0,12.8,5.0,2.984901,0.467848,,,9.770048,,,,,,1975.0,6.4
1,2007.0,48.579_-90.419,3407.0,54.9,18.1,3.101563,0.115151,,,13.763188,8.2,12.0,38.0,66.0,43.956,1453.0,2.4
2,2007.0,49.111_-91.881,8467.0,56.4,22.1,5.469126,0.140908,,11.0,15.693298,5.2,50.0,40.0,25.3,16.8498,1615.0,2.8
3,2008.0,43.057_-81.176,233.0,12.2,3.6,2.478946,0.674001,1.2,,6.269517,52.5,64.0,64.0,549.0,365.634,2246.0,8.1
4,2008.0,44.172_-78.855,6650.0,6.1,1.3,5.075877,0.992,0.5,,,10.1,33.0,100.0,390.0,259.74,2065.0,7.2


In [39]:
df_location = df_location[['year', 'lat_long', 'Lat', 'Long', 
                           'BsM_Cycle', 'FMZ', 'Wby_LID', 'Wby_Name']].merge(df_variables, on=['year', 'lat_long'])
display(df_location.head())
print(len(df_location))

Unnamed: 0,year,lat_long,Lat,Long,BsM_Cycle,FMZ,Wby_LID,Wby_Name,Area_ha,Depth_Max,...,Secchi_Su,Thermo_Obs,Thermo_Pred,TotalPhosphorus(ugL),TrueColour_pctl,pH_pctl,Conductivity(uScms),TDS (mg/L),DD5_8110,AirTemp_8110
0,2012.0,54.146_-85.04,54.14639,-85.03972,1.0,1.0,16-6284-60018,Pine L.,299.0,12.66,...,1.9,11.0,8.846716,12.8,100.0,100.0,137.0,91.242,1069.0,-1.8
1,2012.0,54.165_-85.689,54.165,-85.68917,1.0,1.0,16-5856-60025,Shamattawa L.,963.0,7.2,...,1.2,,,11.5,100.0,50.0,115.0,76.59,1073.0,-1.8
2,2012.0,54.334_-85.014,54.33444,-85.01361,1.0,1.0,16-6292-60223,Spruce L.,1082.0,16.0,...,2.0,,10.110772,9.5,50.0,100.0,133.0,88.578,1059.0,-1.9
3,2011.0,52.579_-91.541,52.57861,-91.54139,1.0,2.0,15-5988-58262,Windigo L.,8028.0,7.0,...,1.6,,,11.3,63.0,63.0,108.0,71.928,1329.0,-0.3
4,2011.0,52.91_-87.384,52.90972,-87.38417,1.0,2.0,16-4753-58628,Winisk L.,25815.0,16.5,...,3.0,,12.043536,9.0,50.0,88.0,149.0,99.234,1179.0,-1.2


1407


In [42]:
df_species = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Ontario/York_request_ONBSM_occurence.csv')
print(len(df_species))

df = df_species.merge(df_location, on=(['BsM_Cycle', 'FMZ', 'Wby_LID']), how='left')
print(len(df))
df.dropna(subset=['SpecName', 'Lat', 'Long'], inplace=True)

df.drop(columns=['BsM_Cycle', 'FMZ', 'Wby_LID', 'SpecCode'], inplace=True)
df['SpecName'] = df['SpecName'].str.lower().str.rstrip().str.replace(' ', '_')
df.replace(dict_species_name, inplace=True)
df.replace({'_sp.': '_spp'}, regex=True, inplace=True)

display(df.sample(5))

22924
22931


Unnamed: 0,Wby_name,SpecName,year,lat_long,Lat,Long,Wby_Name,Area_ha,Depth_Max,Depth_Mn,...,Secchi_Su,Thermo_Obs,Thermo_Pred,TotalPhosphorus(ugL),TrueColour_pctl,pH_pctl,Conductivity(uScms),TDS (mg/L),DD5_8110,AirTemp_8110
10052,Little Venetian L.,luxilus_cornutus,2011.0,46.941_-81.209,46.94055,-81.20916,Little Venetian L.,72.0,22.12,7.3,...,6.1,6.0,7.161583,4.0,32.0,100.0,21.6,14.3856,1693.0,3.9
5458,White Otter L.,perca_flavescens,2007.0,49.111_-91.881,49.11111,-91.88139,White Otter L.,8467.0,56.4,22.1,...,,11.0,15.693298,5.2,50.0,40.0,25.3,16.8498,1615.0,2.8
4834,Rugby L.,perca_flavescens,2010.0,49.958_-92.963,49.95778,-92.96333,Rugby L.,1021.0,7.6,3.4,...,0.8,,,4.0,49.0,69.0,37.6,25.0416,1662.0,2.6
8677,Raven L.,ameiurus_nebulosus,2015.0,48.052_-79.55,48.05196,-79.55023,Raven L.,581.0,46.9,19.0,...,2.5,8.0,11.407494,10.0,32.0,5.0,41.1,27.3726,1571.0,2.6
14785,Rice L.,ameiurus_nebulosus,2015.0,44.173_-78.186,44.17348,-78.18609,Rice L.,9185.0,13.4,2.6,...,2.2,,8.963688,11.4,33.0,100.0,232.0,154.512,2023.0,7.1


In [43]:
df = df[~df['SpecName'].isin(['unknown_(any_or_all_fish_sppcies)', 'unidentifiable', 'mixed_scrap_fish_(animal_food)'])]

In [44]:
%%time
for col in tqdm(df['SpecName'].unique()):
    df[col] = np.where(df.isin([col]).any(1), 1, np.nan)
    df[col] = df[col].fillna(df.groupby(['year', 'lat_long'])[col].transform('mean'))

display(df.sample(5))  
print(df['year'].min(), df['year'].max())

100%|██████████| 113/113 [00:16<00:00,  6.84it/s]


Unnamed: 0,Wby_name,SpecName,year,lat_long,Lat,Long,Wby_Name,Area_ha,Depth_Max,Depth_Mn,...,lepomis_spp,labidesthes_sicculus,esox_americanus,oncorhynchus_tshawytscha,lepomis_gibbosus_x_lepomis_macrochirus,percopsidae_spp,esox_lucius_x_esox_masquinongy,dorosoma_cepedianum,notropis_stramineus,etheostoma_olmstedi
2131,Thaddeus L.,coregonus_artedi,2009.0,50.171_-92.881,50.17139,-92.88111,Thaddeus L.,1566.0,16.2,8.5,...,,,,,,,,,,
9354,Chiblow L.,esox_lucius,2008.0,46.344_-83.049,46.34361,-83.04917,Chiblow L.,1996.0,66.9,24.1,...,,,,,,,,,,
12367,Lac Dollard-des-Ormeaux,carpiodes_cyprinus,2017.0,45.594_-74.488,45.59411,-74.48757,Lac Dollard-des-Ormeaux,14414.0,34.0,6.1,...,,,,,,,,,,
11828,"Nosbonsing, L.",esox_lucius,2014.0,46.207_-79.213,46.20715,-79.21294,"Nosbonsing, L.",1765.0,14.0,5.0,...,,,,,,,,,,
1194,Joyce L.,catostomus_commersonii,2013.0,51.132_-93.014,51.1321,-93.01422,Joyce L.,1458.0,6.2,3.1,...,,,,,,,,,,


2007.0 2017.0
CPU times: user 13.1 s, sys: 3.56 s, total: 16.6 s
Wall time: 16.6 s


In [45]:
print(len(df))
df.drop(columns=['SpecName'], inplace=True)
df.drop_duplicates(inplace=True)
print(len(df))

display(df.sample(5))
df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Ontario/Processed/York_request_occurence_2007_2017.csv', 
          index=False)

15951
1405


Unnamed: 0,Wby_name,year,lat_long,Lat,Long,Wby_Name,Area_ha,Depth_Max,Depth_Mn,SDF,...,lepomis_spp,labidesthes_sicculus,esox_americanus,oncorhynchus_tshawytscha,lepomis_gibbosus_x_lepomis_macrochirus,percopsidae_spp,esox_lucius_x_esox_masquinongy,dorosoma_cepedianum,notropis_stramineus,etheostoma_olmstedi
6121,Jolly L.,2013.0,49.168_-89.459,49.16785,-89.45932,Jolly L.,101.0,5.0,2.1,1.71324,...,,,,,,,,,,
4366,NL,2010.0,49.319_-92.059,49.31917,-92.05944,NL,114.0,5.2,2.3,1.394525,...,,,,,,,,,,
13964,Fanshawe L.,2008.0,43.057_-81.176,43.05722,-81.17556,Fanshawe L.,233.0,12.2,3.6,2.478946,...,,,,,,,,,,
7619,South Greenhill L.,2016.0,48.455_-83.887,48.45508,-83.88685,South Greenhill L.,110.0,7.0,3.2,2.503808,...,,,,,,,,,,
8362,Little Ridley L.,2015.0,47.904_-82.694,47.90407,-82.69446,Little Ridley L.,59.0,7.8,2.1,3.176761,...,,,,,,,,,,


### That's it folks!