In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as graph
import seaborn as sns

import cartopy.crs as ccrs

import statsmodels

import os
from tqdm import tqdm, trange

from convertbng.util import convert_bng, convert_lonlat
import utm

In [2]:
# code and species dictionary

code_df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Alberta/To_look_at/Banff_NP_Freshwater_Lake_Fish_Index_2017_data_dictionary.csv')
# code_df = code_df.iloc[:-6]
code_df.rename(columns={'Data_Value_Valeur_de_la_donnée': 'code_name', 
                        'Value_Description_EN_Description_de_la_valeur': 'common_name'}, inplace=True)
code_df = code_df[['code_name', 'common_name']]
code_df['common_name'] = code_df['common_name'].str.lower().str.replace(' ', '_')
display(code_df.sample(5))

dict_code_name = code_df.set_index('code_name').to_dict()['common_name']

Unnamed: 0,code_name,common_name
47,WALL,walleye
26,EMSH,emerald_shiner
32,LKTR,lake_trout
12,OU,ouananiche
18,ARGR,arctic_grayling


In [3]:
new_codes = {'LNDC': 'longnose_dace', 'BNTR': 'brown_trout', 'RNTR1': 'rainbow_trout', 'CTTR1': 'cutthroat_trout', 
             'WHSC': 'white_sucker', 'BLTRCTTR(N&I)': 'bull_trout_x_cutthroat_trout', 'CTTR(hybrids)': 'cutthroat_trout', 
             'CTTRhybrids': 'cutthroat_trout', 'SUCK': 'white_sucker'}

dict_code_name.update(new_codes)

In [61]:
# Species dictionay

species_name_df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/species_name_dictionary.csv')

dict_species_name = species_name_df.set_index('common_name').to_dict()['scientific_name']

In [5]:
new_names = {'nine-spine_stickleback': 'pungitius_pungitius'}

dict_species_name.update(new_names)
dict_species_name

{'striped_bass': 'morone_saxatilis',
 'american_shad': 'alosa_sapidissima',
 'white_sucker': 'catostomus_commersonii',
 'channel_catfish': 'ictalurus_punctatus',
 'mooneye': 'hiodon_tergisus',
 'walleye': 'sander_vitreus',
 'lake_sturgeon': 'acipenser_fulvescens',
 'rainbow_smelt': 'osmerus_mordax',
 'longnose_sucker': 'catostomus_catostomus',
 'alewife': 'alosa_pseudoharengus',
 'american_eel': 'anguilla_rostrata',
 'white_perch': 'morone_americana',
 'quillback': 'carpiodes_cyprinus',
 'smallmouth_bass': 'micropterus_dolomieu',
 'silver_redhorse': 'moxostoma_anisurum',
 'logperch': 'percina_caprodes',
 'tessellated_darter': 'etheostoma_olmstedi',
 'brown_bullhead': 'ameiurus_nebulosus',
 'longnose_gar': 'lepisosteus_osseus',
 'freshwater_drum': 'aplodinotus_grunniens',
 'fallfish': 'semotilus_corporalis',
 'channel_darter': 'percina_copelandi',
 'emerald_shiner': 'notropis_atherinoides',
 'johnny_darter': 'etheostoma_nigrum',
 'tadpole_madtom': 'noturus_gyrinus',
 'trout-perch': 'per

In [47]:
species_name_df = pd.DataFrame.from_dict({'common_name': list(dict_species_name.keys()), 
                                        'scientific_name' : list(dict_species_name.values())})
# species_name_df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/species_name_dictionary.csv', index=False)

# Compiling stuff

### Manitoba Drain Catch

In [5]:
df_location = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Manitoba/Used/mbagdrainssites.xlsx - Sites.csv')
df_location.rename(columns={'SiteName': 'waterbody_name', 'Latitude_DD': 'latitude', 'Longitude_DD': 'longitude', 
                            'Site Number': 'SiteNumber2'}, inplace=True)

df_location = df_location[['SiteNumber2', 'Date', 'waterbody_name', 'latitude', 'longitude']]
df_location['year'] = pd.DatetimeIndex(pd.to_datetime(df_location['Date'])).year
df_location.drop(columns=['Date'], inplace=True)

df_location['lat_long'] = df_location['latitude'].round(3).astype(str) + '_' + df_location['longitude'].round(3).astype(str)

df_location.head()

Unnamed: 0,SiteNumber2,waterbody_name,latitude,longitude,year,lat_long
0,D-02-001,Sturgeon Creek,49.87693,-97.27348,2002,49.877_-97.273
1,D-02-002,La Salle River,49.69432,-97.26246,2002,49.694_-97.262
2,D-02-003,Jackfish Creek,51.13015,-99.93745,2002,51.13_-99.937
3,D-02-004,Mink Creek,51.39807,-100.45823,2002,51.398_-100.458
4,D-02-005,Turtle River,50.90373,-99.50532,2002,50.904_-99.505


In [6]:
df_chemistry = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Manitoba/Used/mbagdrainschemistry.xlsx - Water_Chemistry.csv')
df_chemistry.rename(columns={'Latitude_DD': 'latitude', 'Longitude_DD': 'longitude'}, inplace=True)
df_chemistry.drop(columns=['SiteNumber', 'Date', 'latitude', 'longitude'], inplace=True)
df_chemistry.head()

Unnamed: 0,SiteNumber2,AirTemp_C,WaterTemp_C,DissolvedOxygen_mg_L,DO_%Sat,Conductivity_µS_cm,Turbidity_NTU,pH_units
0,D-02-001,4.6,1.6,10.4,100.0,425.0,,7.4
1,D-02-002,4.0,1.1,,,265.0,,7.41
2,D-02-003,-1.0,0.9,11.5,99.0,415.0,,8.54
3,D-02-004,,5.0,11.1,98.8,,,
4,D-02-005,,6.5,10.69,98.6,510.0,,8.41


In [7]:
df_loc_chem = df_location.merge(df_chemistry, on=['SiteNumber2'])
print(len(df_loc_chem))
display(df_loc_chem.head())

2371


Unnamed: 0,SiteNumber2,waterbody_name,latitude,longitude,year,lat_long,AirTemp_C,WaterTemp_C,DissolvedOxygen_mg_L,DO_%Sat,Conductivity_µS_cm,Turbidity_NTU,pH_units
0,D-02-001,Sturgeon Creek,49.87693,-97.27348,2002,49.877_-97.273,4.6,1.6,10.4,100.0,425.0,,7.4
1,D-02-002,La Salle River,49.69432,-97.26246,2002,49.694_-97.262,4.0,1.1,,,265.0,,7.41
2,D-02-003,Jackfish Creek,51.13015,-99.93745,2002,51.13_-99.937,-1.0,0.9,11.5,99.0,415.0,,8.54
3,D-02-004,Mink Creek,51.39807,-100.45823,2002,51.398_-100.458,,5.0,11.1,98.8,,,
4,D-02-005,Turtle River,50.90373,-99.50532,2002,50.904_-99.505,,6.5,10.69,98.6,510.0,,8.41


In [8]:
df_catch = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Manitoba/Used/mbagdrainscatch.xlsx - Catch.csv')
df_catch.rename(columns={'Site Number': 'SiteNumber2'}, inplace=True)
df_catch = df_catch[['SiteNumber2', 'Common Name']]
df_catch.dropna(inplace=True)

df_catch['Common Name'] = df_catch['Common Name'].str.lower().str.rstrip().str.replace(' ', '_')
df_catch['Common Name'].replace(dict_species_name, inplace=True)

df_catch = df_catch[~df_catch['Common Name'].isin(['no_catch', 'eggs', 'hybrid'])]
df_catch.drop_duplicates(inplace=True)

print(len(df_catch))
df_catch.head()

3678


Unnamed: 0,SiteNumber2,Common Name
2,D-02-002,semotilus_atromaculatus
3,D-02-002,pimephales_promelas
4,D-02-002,etheostoma_nigrum
5,D-02-002,percina_shumardi
6,D-02-002,rhinichthys_cataractae


In [39]:
df = df_catch.merge(df_loc_chem, how='left')
df.drop(columns=['SiteNumber2'], inplace=True)
print(len(df))
display(df.sample(5))

3678


Unnamed: 0,Common Name,waterbody_name,latitude,longitude,year,lat_long,AirTemp_C,WaterTemp_C,DissolvedOxygen_mg_L,DO_%Sat,Conductivity_µS_cm,Turbidity_NTU,pH_units
2818,culaea_inconstans,Unnamed tributary to St. Labre Bog,49.31111,-95.99988,2005,49.311_-96.0,24.0,19.1,8.09,101.2,253,0,7.25
1142,pimephales_promelas,East Branch Sturgeon Creek,50.06382,-97.54338,2003,50.064_-97.543,24.5,20.1,9.68,118.4,820,2,
2417,umbra_limi,Unnamed tributary to Whitemouth Lake,49.24697,-95.78717,2004,49.247_-95.787,26.0,23.4,3.93,56.4,170,0,6.68
1141,culaea_inconstans,East Branch Sturgeon Creek,50.06382,-97.54338,2003,50.064_-97.543,24.5,20.1,9.68,118.4,820,2,
451,semotilus_atromaculatus,Bosshill Creek,49.84333,-100.96983,2003,49.843_-100.97,25.5,18.5,8.83,104.7,1085,11,8.64


In [40]:
print(df['year'].min(), df['year'].max())

for col in df['Common Name'].unique():
    df[col] = np.where(df.isin([col]).any(1), 1, np.nan)
    df[col] = df[col].fillna(df.groupby(['year', 'lat_long'])[col].transform('mean'))
    
display(df)

2002 2006


Unnamed: 0,Common Name,waterbody_name,latitude,longitude,year,lat_long,AirTemp_C,WaterTemp_C,DissolvedOxygen_mg_L,DO_%Sat,...,micropterus_dolomieu,notemigonus_crysoleucas,salmo_trutta,noturus_flavus,cyprinella_spiloptera,aplodinotus_grunniens,moxostoma_macrolepidotum,ictalurus_punctatus,hiodon_alosoides,oncorhynchus_mykiss
0,semotilus_atromaculatus,La Salle River,49.69432,-97.26246,2002,49.694_-97.262,4.0,1.1,,,...,,,,,,,,,,
1,pimephales_promelas,La Salle River,49.69432,-97.26246,2002,49.694_-97.262,4.0,1.1,,,...,,,,,,,,,,
2,etheostoma_nigrum,La Salle River,49.69432,-97.26246,2002,49.694_-97.262,4.0,1.1,,,...,,,,,,,,,,
3,percina_shumardi,La Salle River,49.69432,-97.26246,2002,49.694_-97.262,4.0,1.1,,,...,,,,,,,,,,
4,rhinichthys_cataractae,La Salle River,49.69432,-97.26246,2002,49.694_-97.262,4.0,1.1,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3673,pimephales_promelas,Whitemud River Cross Ditch,50.24071,-98.85887,2006,50.241_-98.859,30.0,26.2,11.41,161.7,...,,,,,,1.0,,,,
3674,aplodinotus_grunniens,Whitemud River Cross Ditch,50.24071,-98.85887,2006,50.241_-98.859,30.0,26.2,11.41,161.7,...,,,,,,1.0,,,,
3675,etheostoma_nigrum,Whitemud River Cross Ditch,50.24071,-98.85887,2006,50.241_-98.859,30.0,26.2,11.41,161.7,...,,,,,,1.0,,,,
3676,notropis_stramineus,Whitemud River Cross Ditch,50.24071,-98.85887,2006,50.241_-98.859,30.0,26.2,11.41,161.7,...,,,,,,1.0,,,,


In [44]:
print(len(df))
df.drop(columns=['Common Name'], inplace=True)
df.drop_duplicates(subset = df.columns[1:], inplace=True)
print(len(df))

display(df.sample(5))
# df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Manitoba/Processed/manitoba_drain_catch_2002_2006_occurence.csv', 
#           index=False)

3678
1021


Unnamed: 0,waterbody_name,latitude,longitude,year,lat_long,AirTemp_C,WaterTemp_C,DissolvedOxygen_mg_L,DO_%Sat,Conductivity_µS_cm,...,micropterus_dolomieu,notemigonus_crysoleucas,salmo_trutta,noturus_flavus,cyprinella_spiloptera,aplodinotus_grunniens,moxostoma_macrolepidotum,ictalurus_punctatus,hiodon_alosoides,oncorhynchus_mykiss
2005,Sunny Valley School Drain,51.40645,-97.49728,2004,51.406_-97.497,20.0,19.7,7.31,90.7,,...,,,,,,,,,,
2809,Unnamed tributary to St. Labre Bog,49.31018,-96.07958,2005,49.31_-96.08,25.0,17.8,7.08,74.4,489.0,...,,,,,,,,,,
3506,Brelinski Creek,51.75018,-100.55737,2006,51.75_-100.557,20.0,19.4,8.42,113.6,302.0,...,,,,,,,,,,
2797,Unnamed tributary to Icelandic River,51.00133,-97.36833,2005,51.001_-97.368,22.0,20.5,12.67,,,...,,,,,,,,,,
2112,Crooked Lake Channel,49.81292,-98.00272,2004,49.813_-98.003,9.0,,,,,...,,,,,,,,,,


# Devil's Lake

In [66]:
df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Manitoba/Used/rrdevilslakephase3appendicesfishdata.csv')
df = df[['Species', 'Date of capture (dd/mm/yyyy)', 'Location (Manitoba)', 'N Latitude (DD)', 'LWBIN_Long']]

df.rename(columns={'Date of capture (dd/mm/yyyy)': 'year', 'Location (Manitoba)': 'waterbody_name', 
                   'N Latitude (DD)': 'latitude', 'LWBIN_Long': 'longitude'}, inplace=True)

df['year'] = pd.DatetimeIndex(pd.to_datetime(df['year'])).year

df['lat_long'] = df['latitude'].round(3).astype(str) + '_' + df['longitude'].round(3).astype(str)
df['Species'] = df['Species'].str.lower().str.rstrip().str.replace(' ', '_')
df['Species'].replace(dict_species_name, inplace=True)
df.drop_duplicates(inplace=True)

df.head()

Unnamed: 0,Species,year,waterbody_name,latitude,longitude,lat_long
0,pimephales_promelas,2007,Wavey Creek,50.26567,-96.97504,50.266_-96.975
56,pimephales_promelas,2007,Emerson ( Red River),49.00011,-97.23022,49.0_-97.23
66,pimephales_promelas,2007,Baylor Pond- I,49.78013,-97.153483,49.78_-97.153
72,pimephales_promelas,2008,Baylor Pond- I,49.78013,-97.153483,49.78_-97.153
73,pimephales_promelas,2009,Baylor Pond- I,49.78013,-97.153483,49.78_-97.153


In [67]:
print(df['year'].min(), df['year'].max())

for col in tqdm(df['Species'].unique()):
    df[col] = np.where(df.isin([col]).any(1), 1, np.nan)
    df[col] = df[col].fillna(df.groupby(['year', 'lat_long'])[col].transform('mean'))
    
display(df.sample(5))

100%|██████████| 3/3 [00:00<00:00, 190.59it/s]

2007 2011





Unnamed: 0,Species,year,waterbody_name,latitude,longitude,lat_long,pimephales_promelas,notropis_atherinoides,luxilus_cornutus
75,pimephales_promelas,2011,Baylor Pond- I,49.78013,-97.153483,49.78_-97.153,1.0,,
206,notropis_atherinoides,2007,Selkirk,50.15042,-96.85757,50.15_-96.858,,1.0,
0,pimephales_promelas,2007,Wavey Creek,50.26567,-96.97504,50.266_-96.975,1.0,,
203,notropis_atherinoides,2007,Emerson ( Red River),49.00011,-97.23022,49.0_-97.23,1.0,1.0,
168,pimephales_promelas,2007,Pembina/LaRiviera,49.23041,-98.67562,49.23_-98.676,1.0,,


In [69]:
print(len(df))
df.drop(columns=['Species'], inplace=True)
df.drop_duplicates(subset = ['year', 'latitude', 'longitude','lat_long', 'pimephales_promelas', 'notropis_atherinoides',
                             'luxilus_cornutus'], inplace=True)
print(len(df))

display(df.head())
df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Manitoba/Processed/devil_lake_2007_2011_occurence.csv', 
          index=False)

14
10


Unnamed: 0,year,waterbody_name,latitude,longitude,lat_long,pimephales_promelas,notropis_atherinoides,luxilus_cornutus
0,2007,Wavey Creek,50.26567,-96.97504,50.266_-96.975,1.0,,
56,2007,Emerson ( Red River),49.00011,-97.23022,49.0_-97.23,1.0,1.0,
66,2007,Baylor Pond- I,49.78013,-97.153483,49.78_-97.153,1.0,,
72,2008,Baylor Pond- I,49.78013,-97.153483,49.78_-97.153,1.0,,
73,2009,Baylor Pond- I,49.78013,-97.153483,49.78_-97.153,1.0,,


# Government Stocking

In [75]:
df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Manitoba/Used/manitoba_gov_lake_stocking.csv')
df = df[['WATERBODY_NAME', 'YEAR_', 'SPECIES', 'WATERBODY_LAT', 'WATERBODY_LONG']]

df.columns = df.columns.str.lower().str.rstrip('_')
df.rename(columns={'waterbody_lat': 'latitude', 'waterbody_long': 'longitude'}, inplace=True)

df['lat_long'] = df['latitude'].round(3).astype(str) + '_' + df['longitude'].round(3).astype(str)
df['species'] = df['species'].str.lower().str.rstrip().str.replace(' ', '_')
df['species'].replace(dict_species_name, inplace=True)
df.drop_duplicates(inplace=True)

df.head()

Unnamed: 0,waterbody_name,year,species,latitude,longitude,lat_long
0,WOODPECKERS POND R4,2020,oncorhynchus_mykiss,49.729755,-96.271489,49.73_-96.271
1,LAC DU BONNET,2020,salmo_trutta,50.366597,-95.916676,50.367_-95.917
3,LAC DU BONNET,2020,salmo_trutta_x_salvelinus_fontinalis,50.366597,-95.916676,50.367_-95.917
5,LAC DU BONNET,2020,oncorhynchus_mykiss,50.366597,-95.916676,50.367_-95.917
6,HUNT LAKE,2020,salvelinus_fontinalis,49.742504,-95.177395,49.743_-95.177


In [77]:
print(df['year'].min(), df['year'].max())

for col in tqdm(df['species'].unique()):
    df[col] = np.where(df.isin([col]).any(1), 1, np.nan)
    df[col] = df[col].fillna(df.groupby(['year', 'lat_long'])[col].transform('mean'))
    
display(df.sample(5))

 26%|██▌       | 7/27 [00:00<00:00, 67.06it/s]

1917 2020


100%|██████████| 27/27 [00:00<00:00, 46.30it/s]


Unnamed: 0,waterbody_name,year,species,latitude,longitude,lat_long,oncorhynchus_mykiss,salmo_trutta,salmo_trutta_x_salvelinus_fontinalis,salvelinus_fontinalis,...,sander_canadensis,ambloplites_rupestris,morone_chrysops,thymallus_arcticus,oncorhynchus_nerka,siluriformes_spp,oncorhynchus_clarkii,micropterus_salmoides,pimephales_promelas,chrosomus_neogaeus
6610,HIDDEN LAKE,1989,oncorhynchus_mykiss,54.780047,-101.877813,54.78_-101.878,1.0,,,,...,,,,,,,,,,
8880,BIRCH RIVER,1973,salvelinus_fontinalis,49.648411,-95.726603,49.648_-95.727,,,,1.0,...,,,,,,,,,,
9852,PERCH LAKE,1964,oncorhynchus_mykiss,51.651389,-100.898611,51.651_-100.899,1.0,,,,...,,,,,,,,,,
286,SCOTTY LAKE,2019,salvelinus_fontinalis,54.733567,-101.67028,54.734_-101.67,1.0,,1.0,1.0,...,,,,,,,,,,
9790,PERCH LAKE,1965,esox_masquinongy,51.651389,-100.898611,51.651_-100.899,,,,,...,,,,,,,,,1.0,


In [79]:
print(len(df))
df.drop(columns=['species'], inplace=True)
df.drop_duplicates(df.columns[1:], inplace=True)
print(len(df))

display(df.head())
df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Manitoba/Processed/manitoba_gov_stocking_1917_2020_occurence.csv', 
          index=False)

9851
8284


Unnamed: 0,waterbody_name,year,latitude,longitude,lat_long,oncorhynchus_mykiss,salmo_trutta,salmo_trutta_x_salvelinus_fontinalis,salvelinus_fontinalis,sander_vitreus,...,sander_canadensis,ambloplites_rupestris,morone_chrysops,thymallus_arcticus,oncorhynchus_nerka,siluriformes_spp,oncorhynchus_clarkii,micropterus_salmoides,pimephales_promelas,chrosomus_neogaeus
0,WOODPECKERS POND R4,2020,49.729755,-96.271489,49.73_-96.271,1.0,1.0,,1.0,,...,,,,,,,,,,
1,LAC DU BONNET,2020,50.366597,-95.916676,50.367_-95.917,1.0,1.0,1.0,,,...,,,,,,,,,,
6,HUNT LAKE,2020,49.742504,-95.177395,49.743_-95.177,,1.0,,1.0,,...,,,,,,,,,,
8,LYONS LAKE,2020,49.733471,-95.177634,49.733_-95.178,1.0,1.0,,,,...,,,,,,,,,,
9,MEADOWLARK POND R6,2020,49.731778,-96.272583,49.732_-96.273,,,,1.0,,...,,,,,,,,,,


# Manitoba Anglers Stocking

In [81]:
df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Manitoba/Used/manitoba_anglers_stocking.csv')
# df = df[['WATERBODY_NAME', 'YEAR_', 'SPECIES', 'WATERBODY_LAT', 'WATERBODY_LONG']]

df.columns = df.columns.str.lower().str.rstrip('_')
# df.rename(columns={'waterbody_lat': 'latitude', 'waterbody_long': 'longitude'}, inplace=True)

# df['lat_long'] = df['latitude'].round(3).astype(str) + '_' + df['longitude'].round(3).astype(str)
# df['species'] = df['species'].str.lower().str.rstrip().str.replace(' ', '_')
# df['species'].replace(dict_species_name, inplace=True)
# df.drop_duplicates(inplace=True)

df.head()

Unnamed: 0,objectid,waterbody_id,waterbody_name,fishing_division,year,date,species,size,quantity,lat_dd,long_dd,waterbody_lat,waterbody_long
0,1,39252,WOODPECKERS POND R4,Southern Division,2020,2020-04-29,RAINBOW TROUT,18+ cm,1000.0,49.729755,-96.271489,49.729755,-96.271489
1,2,38891,LAC DU BONNET,Southern Division,2020,2020-05-01,BROWN TROUT,Adult (>30 cm),27.0,50.366597,-95.916676,50.366597,-95.916676
2,3,38891,LAC DU BONNET,Southern Division,2020,2020-05-01,BROWN TROUT,Adult (>30 cm),26.0,50.366597,-95.916676,50.366597,-95.916676
3,4,38891,LAC DU BONNET,Southern Division,2020,2020-05-01,TIGER TROUT,18+ cm,500.0,50.366597,-95.916676,50.366597,-95.916676
4,5,38891,LAC DU BONNET,Southern Division,2020,2020-05-01,BROWN TROUT,18+ cm,1000.0,50.366597,-95.916676,50.366597,-95.916676


In [81]:
print(df['year'].min(), df['year'].max())

for col in df.columns[6:]:
    df[col] = np.where(df[col].notnull(), 1, df[col])
    df[col] = df[col].fillna(df.groupby(['year', 'lat_long'])[col].transform('mean'))

display(df.sample(5))

2016 2016


Unnamed: 0,waterbody_name,site_description,year,latitude,longitude,lat_long,oncorhynchus_clarkii,salvelinus_fontinalis,salvelinus_confluentus,oncorhynchus_mykiss,rhinichthys_cataractae,catostomus_catostomus,catostomus_commersonii,percopsis_omiscomaycus,lota_lota,salmo_trutta,oncorhynchus_clarkii_x_oncorhynchus_mykiss
10,WATERTON,Blakiston,2016,49.096255,-113.905775,49.096_-113.906,,1.0,1.0,,,,,,,,
33,WATERTON,Dungarven,2016,49.186081,-113.943429,49.186_-113.943,1.0,1.0,,,,,,,,,1.0
31,WATERTON,Rowe,2016,49.056519,-114.049382,49.057_-114.049,,,,,,,,,,,
17,WATERTON,Blue Grouse,2016,49.120595,-114.141875,49.121_-114.142,,,,,,,,,,,
15,WATERTON,Crooked,2016,49.063478,-113.751008,49.063_-113.751,,1.0,,,,,,,,,


In [84]:
print(len(df))
df.drop_duplicates(subset = df.columns[2:], inplace=True)
print(len(df))

display(df.sample(5))
df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Saskatchewan/Processed/waterton_2016_occurence.csv', 
          index=False)

43
27


Unnamed: 0,waterbody_name,site_description,year,latitude,longitude,lat_long,oncorhynchus_clarkii,salvelinus_fontinalis,salvelinus_confluentus,oncorhynchus_mykiss,rhinichthys_cataractae,catostomus_catostomus,catostomus_commersonii,percopsis_omiscomaycus,lota_lota,salmo_trutta,oncorhynchus_clarkii_x_oncorhynchus_mykiss
13,WATERTON,Bauerman,2016,49.151866,-114.08532,49.152_-114.085,,,,,,,,,,,
3,WATERTON,Blakiston,2016,49.125808,-114.02705,49.126_-114.027,,,1.0,,,,,,,,
40,WATERTON,Lone,2016,49.112359,-114.138086,49.112_-114.138,,,,,,,,,,,
8,WATERTON,Blakiston,2016,49.112009,-114.0789,49.112_-114.079,,,,,,,,,,,
7,WATERTON,Blue Grouse,2016,49.118292,-114.133194,49.118_-114.133,,,,,,,,,,,
