In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as graph
import seaborn as sns

import cartopy.crs as ccrs

import statsmodels

import os
from tqdm import tqdm, trange

from convertbng.util import convert_bng, convert_lonlat
import utm

In [2]:
# code and species dictionary

code_df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/Alberta/To_look_at/Banff_NP_Freshwater_Lake_Fish_Index_2017_data_dictionary.csv')
# code_df = code_df.iloc[:-6]
code_df.rename(columns={'Data_Value_Valeur_de_la_donnée': 'code_name', 
                        'Value_Description_EN_Description_de_la_valeur': 'common_name'}, inplace=True)
code_df = code_df[['code_name', 'common_name']]
code_df['common_name'] = code_df['common_name'].str.lower().str.replace(' ', '_')
display(code_df.sample(5))

dict_code_name = code_df.set_index('code_name').to_dict()['common_name']

Unnamed: 0,code_name,common_name
30,GOLD,goldeye
7,LKWH,lake_whitefish
34,LNDC,longnose_dace
25,CISC,cisco
37,NRPK,northern_pike


In [3]:
new_codes = {'LNDC': 'longnose_dace', 'BNTR': 'brown_trout', 'RNTR1': 'rainbow_trout', 'CTTR1': 'cutthroat_trout', 
             'WHSC': 'white_sucker', 'BLTRCTTR(N&I)': 'bull_trout_x_cutthroat_trout', 'CTTR(hybrids)': 'cutthroat_trout', 
             'CTTRhybrids': 'cutthroat_trout', 'SUCK': 'white_sucker'}

dict_code_name.update(new_codes)

In [4]:
# Species dictionay

species_name_df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/species_name_dictionary.csv')

dict_species_name = species_name_df.set_index('common_name').to_dict()['scientific_name']

In [5]:
new_names = {'nine-spine_stickleback': 'pungitius_pungitius'}

dict_species_name.update(new_names)
dict_species_name

{'lake_sturgeon': 'acipenser_fulvescens',
 'green_sturgeon': 'acipenser_medirostris',
 'atlantic_sturgeon': 'acipenser_oxyrinchus',
 'white_sturgeon': 'acipenser_transmontanus',
 'sturgeon': 'acipenseridae_spp',
 'sturgeons': 'acipenseridae_spp',
 'chiselmouth': 'acrocheilus_alutaceus',
 'poachers': 'agonidae_spp',
 'poacher_spp': 'agonidae_spp',
 'northern_spearnose_poacher': 'agonopsis_vulsa',
 'northern_spparnose_poacher': 'agonopsis_vulsa',
 'blueback_herring': 'alosa_aestivalis',
 'skipjack_herring': 'alosa_chrysochloris',
 'alewife': 'alosa_pseudoharengus',
 'alewife_(gaspereau)': 'alosa_pseudoharengus',
 'american_shad': 'alosa_sapidissima',
 'allis_shad': 'alosa_spp',
 'rock_bass': 'ambloplites_rupestris',
 'tiger_salamander': 'ambystoma_tigrinum',
 'black_bullhead': 'ameiurus_melas',
 'black_catfish': 'ameiurus_melas',
 'bullhead': 'ameiurus_melas',
 'black_bullhead_x_brown_bullhead': 'ameiurus_melas_x_ameiurus_nebulosus',
 'yellow_bullhead': 'ameiurus_natalis',
 'brown_bullhe

In [7]:
species_name_df = pd.DataFrame.from_dict({'common_name': list(dict_species_name.keys()), 
                                        'scientific_name' : list(dict_species_name.values())})
# species_name_df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/species_name_dictionary.csv', index=False)

# New Brunswick

In [8]:
# Bay of Fundy

df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/New_Brunswick/Used/minas_basin_bay_of_fundy.csv')

df = df[['scientificName', 'eventDate', 'decimalLongitude', 'decimalLatitude', 'waterBody', 'year', 'class']]
df = df[df['class'].isin(['Actinopterygii'])]
df.rename(columns={'decimalLongitude': 'longitude', 'decimalLatitude': 'latitude', 'waterBody': 'waterbody_name'}, inplace=True)
df.drop(columns=['eventDate', 'class'], inplace=True)

df['scientificName'] = df['scientificName'].str.lower().str.replace(' ', '_')
df['lat_long'] = df['latitude'].round(3).astype(str) + '_' + df['longitude'].round(3).astype(str)
df.head()

Unnamed: 0,scientificName,longitude,latitude,waterBody,year,lat_long
1,alosa_pseudoharengus,-64.105719,45.381385,Bay of Fundy,2009,45.381_-64.106
2,morone_saxatilis,-64.005356,45.227681,Bay of Fundy,2009,45.228_-64.005
3,scophthalmus_aquosus,-64.005356,45.227681,Bay of Fundy,2009,45.228_-64.005
5,alosa_sapidissima,-64.105719,45.381385,Bay of Fundy,2009,45.381_-64.106
6,acipenser_oxyrinchus,-64.005356,45.227681,Bay of Fundy,2009,45.228_-64.005


In [9]:
print(df['year'].min(), df['year'].max())
for col in tqdm(df['scientificName'].unique()):
    df[col] = np.where(df.isin([col]).any(1), 1, np.nan)
    df[col] = df[col].fillna(df.groupby('lat_long')[col].transform('mean'))

display(df.head())

100%|██████████| 9/9 [00:00<00:00, 242.99it/s]

2009 2009





Unnamed: 0,scientificName,longitude,latitude,waterBody,year,lat_long,alosa_pseudoharengus,morone_saxatilis,scophthalmus_aquosus,alosa_sapidissima,acipenser_oxyrinchus,pseudopleuronectes_americanus,hemitripterus_americanus,alosa_aestivalis,myoxocephalus_octodecemspinosus
1,alosa_pseudoharengus,-64.105719,45.381385,Bay of Fundy,2009,45.381_-64.106,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
2,morone_saxatilis,-64.005356,45.227681,Bay of Fundy,2009,45.228_-64.005,1.0,1.0,1.0,,1.0,1.0,1.0,1.0,
3,scophthalmus_aquosus,-64.005356,45.227681,Bay of Fundy,2009,45.228_-64.005,1.0,1.0,1.0,,1.0,1.0,1.0,1.0,
5,alosa_sapidissima,-64.105719,45.381385,Bay of Fundy,2009,45.381_-64.106,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
6,acipenser_oxyrinchus,-64.005356,45.227681,Bay of Fundy,2009,45.228_-64.005,1.0,1.0,1.0,,1.0,1.0,1.0,1.0,


In [10]:
df.drop(columns=['scientificName'], inplace=True)
print(len(df))
df.drop_duplicates(inplace=True)
print(len(df))
display(df.head())

df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/New_Brunswick/Processed/bay_of_fundy_occurence_2009.csv', 
          index=False)

15
2


Unnamed: 0,longitude,latitude,waterBody,year,lat_long,alosa_pseudoharengus,morone_saxatilis,scophthalmus_aquosus,alosa_sapidissima,acipenser_oxyrinchus,pseudopleuronectes_americanus,hemitripterus_americanus,alosa_aestivalis,myoxocephalus_octodecemspinosus
1,-64.105719,45.381385,Bay of Fundy,2009,45.381_-64.106,1.0,1.0,1.0,1.0,1.0,1.0,1.0,,1.0
2,-64.005356,45.227681,Bay of Fundy,2009,45.228_-64.005,1.0,1.0,1.0,,1.0,1.0,1.0,1.0,


### ACCDC

In [9]:
# accdc

df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/New_Brunswick/Used/accdc.csv')
df = df[['scientificName', 'eventDate', 'decimalLongitude', 'decimalLatitude', 'waterBody', 'year', 'class']]
df = df[df['class'].isin(['Actinopterygii'])]
df.rename(columns={'decimalLongitude': 'longitude', 'decimalLatitude': 'latitude', 'waterBody': 'waterbody_name'}, inplace=True)
df.drop(columns=['eventDate', 'class'], inplace=True)
df.dropna(subset=['year'], inplace=True)

df['scientificName'] = df['scientificName'].str.lower().str.replace(' ', '_')
df['lat_long'] = df['latitude'].round(3).astype(str) + '_' + df['longitude'].round(3).astype(str)
df.head()

Unnamed: 0,scientificName,longitude,latitude,waterbody_name,year,lat_long
21,salmo_salar,-60.78861,46.662315,,2002.0,46.662_-60.789
62,acipenser_brevirostrum,-65.900002,45.5,,2000.0,45.5_-65.9
111,salmo_salar,-66.363792,46.656895,,2002.0,46.657_-66.364
113,salmo_salar,-62.70554,46.319361,,2002.0,46.319_-62.706
123,salmo_salar,-62.281525,45.114655,,2002.0,45.115_-62.282


In [10]:
for col in tqdm(df['scientificName'].unique()):
    df[col] = np.where(df.isin([col]).any(1), 1, np.nan)
    df[col] = df[col].fillna(df.groupby('lat_long')[col].transform('mean'))

display(df.head())

100%|██████████| 4/4 [00:00<00:00, 219.07it/s]


Unnamed: 0,scientificName,longitude,latitude,waterbody_name,year,lat_long,salmo_salar,acipenser_brevirostrum,coregonus_huntsmani,acipenser_oxyrinchus
21,salmo_salar,-60.78861,46.662315,,2002.0,46.662_-60.789,1.0,,,
62,acipenser_brevirostrum,-65.900002,45.5,,2000.0,45.5_-65.9,,1.0,,
111,salmo_salar,-66.363792,46.656895,,2002.0,46.657_-66.364,1.0,,,
113,salmo_salar,-62.70554,46.319361,,2002.0,46.319_-62.706,1.0,,,
123,salmo_salar,-62.281525,45.114655,,2002.0,45.115_-62.282,1.0,,,


In [11]:
df.drop(columns=['scientificName'], inplace=True)
print(len(df))
df.drop_duplicates(inplace=True)
print(len(df))
display(df.head())

df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/New_Brunswick/Processed/accdc_occurence_2009.csv', 
          index=False)

48
48


Unnamed: 0,longitude,latitude,waterbody_name,year,lat_long,salmo_salar,acipenser_brevirostrum,coregonus_huntsmani,acipenser_oxyrinchus
21,-60.78861,46.662315,,2002.0,46.662_-60.789,1.0,,,
62,-65.900002,45.5,,2000.0,45.5_-65.9,,1.0,,
111,-66.363792,46.656895,,2002.0,46.657_-66.364,1.0,,,
113,-62.70554,46.319361,,2002.0,46.319_-62.706,1.0,,,
123,-62.281525,45.114655,,2002.0,45.115_-62.282,1.0,,,


### Atlantic SAlmon smolt

In [12]:
# Atlantic salmon smolt

atl_salmon_dict_df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/New_Brunswick/Used/atlantic_salmon_smolt_species_list.csv')
atl_salmon_dict_df['scientific_name'] = atl_salmon_dict_df['scientific_name'].str.lower().str.replace(' ', '_')
display(atl_salmon_dict_df.sample(5))

dict_atl_salmon = atl_salmon_dict_df.set_index('code_name').to_dict()['scientific_name']

Unnamed: 0,code_name,common_name,common_name_en__nom_commun_en,common_name_en__nom_commun_fr,life_stage_en__étape_de_vie_en,life_stage_fr__étape_de_vie_fr,scientific_name,ITIS_TSN
45,MACK,Mackerel,Mackerel,maquereau commun,,,scomber_scombrus,172414.0
46,MINN,Minnows (non specific),Minnows (non specific),ménés (non spécifique),,,phoxinus_phoxinus,163342.0
36,GASP,Gaspereau,Gaspereau,gaspareau,,,alosa_pseudoharengus,161701.0
19,BLDA,Blacknose dace,Blacknose dace,naseux noir,,,rhinichthys_atratulus,163382.0
65,SLSC,Slimy sculpin,Slimy sculpin,chabot visqueux,,,cottus_cognatus,167232.0


In [13]:
df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/New_Brunswick/To_look_at//atlantic_salmon_smolt.csv')
df.drop(columns=[col for col in df.columns if '_avg_fork_length' in col], inplace=True)
df.drop(columns=[col for col in df.columns if '_avg_weight' in col], inplace=True)
df.drop(columns=[col for col in df.columns if '_avg_total_length' in col], inplace=True)

df.columns = df.columns.str.replace('_abundance', '')
df.drop(columns=['avg_air_temp_arrival', 'avg_max_air_temp', 'avg_water_temp_shore'], inplace=True)
df['lat_long'] = df['site_latitude'].round(3).astype(str) + '_' + df['site_longitude'].round(3).astype(str)

display(df.head())

Unnamed: 0,year,site_name,site_latitude,site_longitude,3SST,9SST,ALEW,AMEL,AMSH,ATSA(sm),...,SALA,SELA,SELA(am),SELA(si),SLSC,STIC,SUCK,WHSU,YEPE,lat_long
0,2002,Butters-RST (Restigouche River),47.9867,-66.8023,,,3.0,429.0,1.0,692.0,...,,,,,1.0,1645.0,12.0,,,47.987_-66.802
1,2002,Kedgwick-RST (Kedgwick River),47.6711,-67.5098,,,,72.0,,1279.0,...,1.0,,39.0,11.0,3.0,14.0,6.0,2.0,,47.671_-67.51
2,2002,LittleMainRestigouche-RST (Little Main Restigo...,47.5285,-67.6187,,,,,,,...,,,,,,,,,,47.528_-67.619
3,2002,Moses-RST (Restigouche River),47.9971,-66.8159,,,,32.0,,86.0,...,,,,,,17.0,,,,47.997_-66.816
4,2002,Upsalquitch-RST (Upsalquitch River),47.8532,-66.9092,,,,,,,...,,,,,,,,,,47.853_-66.909


In [14]:
df.rename(columns = dict_atl_salmon, inplace=True)
df.drop(columns = [col for col in df.columns if 'remove' in col], inplace=True)
df.head()

Unnamed: 0,year,site_name,site_latitude,site_longitude,gasterosteus_aculeatus,pungitius_pungitius,alosa_pseudoharengus,anguilla_rostrata,alosa_sapidissima,salmo_linnaeus,...,prosopium_cylindraceum,petromyzon_marinus,petromyzon_marinus.1,petromyzon_marinus.2,cottus_cognatus,gasterosteidae_spp,catostomidae_spp,catostomus_commersoni,perca_flavescens,lat_long
0,2002,Butters-RST (Restigouche River),47.9867,-66.8023,,,3.0,429.0,1.0,692.0,...,,,,,1.0,1645.0,12.0,,,47.987_-66.802
1,2002,Kedgwick-RST (Kedgwick River),47.6711,-67.5098,,,,72.0,,1279.0,...,2.0,,39.0,11.0,3.0,14.0,6.0,2.0,,47.671_-67.51
2,2002,LittleMainRestigouche-RST (Little Main Restigo...,47.5285,-67.6187,,,,,,,...,,,,,,,,,,47.528_-67.619
3,2002,Moses-RST (Restigouche River),47.9971,-66.8159,,,,32.0,,86.0,...,,,,,,17.0,,,,47.997_-66.816
4,2002,Upsalquitch-RST (Upsalquitch River),47.8532,-66.9092,,,,,,,...,,,,,,,,,,47.853_-66.909


In [15]:
df_2 = pd.DataFrame(df.groupby(['year', 'lat_long']).mean())
df_3 = pd.DataFrame(df.groupby(by=df.columns, axis=1).sum())

df_3['lat_long'] = df_3['site_latitude'].round(3).astype(str) + '_' + df_3['site_longitude'].round(3).astype(str)
df_3.replace(0, np.nan, inplace=True)

df_col_names = [col for col in df_3.columns if col in list(dict_atl_salmon.values())]
col_order = ['year', 'site_name', 'site_latitude', 'site_longitude', 'lat_long']
col_order.extend(df_col_names)

df_3 = df_3[col_order]
df_3.rename(columns = {'site_name': 'waterbody_name', 'site_latitude': 'latitude', 'site_longitude': 'longitude'}, inplace=True)

display(df_3.sample(5))

Unnamed: 0,year,waterbody_name,latitude,longitude,lat_long,alosa_pseudoharengus,alosa_sapidissima,ameiurus_nebulosus,anguilla_rostrata,catostomidae_spp,...,perca_flavescens,petromyzon_marinus,petromyzontiformes_spp,prosopium_cylindraceum,pungitius_pungitius,rhinichthys_atratulus,salmo_linnaeus,salmo_trutta,salvelinus_fontinalis,semotilus_atromaculatus
81,2018,Kedgwick-RST (Kedgwick River),47.6711,-67.5098,47.671_-67.51,,,,15.0,,...,,1.0,,,,19.0,1556.0,,3.0,2.0
9,2003,Upsalquitch-RST (Upsalquitch River),47.8532,-66.9092,47.853_-66.909,,,,,,...,,,,,,,,,,
42,2010,LittleMainRestigouche-RST (Little Main Restigo...,47.5285,-67.6187,47.528_-67.619,,,,,,...,,,,,,,,,,
40,2010,Butters-RST (Restigouche River),47.9867,-66.8023,47.987_-66.802,,,,237.0,,...,,21.0,,,,6.0,1602.0,,1.0,
30,2008,Butters-RST (Restigouche River),47.9867,-66.8023,47.987_-66.802,,,,323.0,,...,,61.0,,,4.0,19.0,1590.0,,,


In [16]:
df_col_names = [col for col in df_3.columns if col in list(dict_atl_salmon.values())]

for col in df_col_names:
    df_3[col] = np.where(df_3[col].notnull(), 1, df_3[col])
    df_3[col] = df_3[col].fillna(df_3.groupby(['year', 'lat_long'])[col].transform('mean'))
    
display(df_3.head())

Unnamed: 0,year,waterbody_name,latitude,longitude,lat_long,alosa_pseudoharengus,alosa_sapidissima,ameiurus_nebulosus,anguilla_rostrata,catostomidae_spp,...,perca_flavescens,petromyzon_marinus,petromyzontiformes_spp,prosopium_cylindraceum,pungitius_pungitius,rhinichthys_atratulus,salmo_linnaeus,salmo_trutta,salvelinus_fontinalis,semotilus_atromaculatus
0,2002,Butters-RST (Restigouche River),47.9867,-66.8023,47.987_-66.802,1.0,1.0,,1.0,1.0,...,,,1.0,,,1.0,1.0,,1.0,
1,2002,Kedgwick-RST (Kedgwick River),47.6711,-67.5098,47.671_-67.51,,,,1.0,1.0,...,,1.0,1.0,1.0,,1.0,1.0,,1.0,
2,2002,LittleMainRestigouche-RST (Little Main Restigo...,47.5285,-67.6187,47.528_-67.619,,,,,,...,,,,,,,,,,
3,2002,Moses-RST (Restigouche River),47.9971,-66.8159,47.997_-66.816,,,,1.0,,...,,,,,,1.0,1.0,,1.0,
4,2002,Upsalquitch-RST (Upsalquitch River),47.8532,-66.9092,47.853_-66.909,,,,,,...,,,,,,,,,,


In [17]:
print(len(df_3))
df_3.drop_duplicates(inplace=True)
print(len(df_3))

display(df_3.sample(5))

df_3.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/New_Brunswick/Processed/atl_salmon_smolt_occurence_2002_2019.csv', 
            index=False)

90
90


Unnamed: 0,year,waterbody_name,latitude,longitude,lat_long,alosa_pseudoharengus,alosa_sapidissima,ameiurus_nebulosus,anguilla_rostrata,catostomidae_spp,...,perca_flavescens,petromyzon_marinus,petromyzontiformes_spp,prosopium_cylindraceum,pungitius_pungitius,rhinichthys_atratulus,salmo_linnaeus,salmo_trutta,salvelinus_fontinalis,semotilus_atromaculatus
19,2005,Upsalquitch-RST (Upsalquitch River),47.8532,-66.9092,47.853_-66.909,,,,1.0,1.0,...,,,1.0,,,1.0,1.0,,1.0,
28,2007,Moses-RST (Restigouche River),47.9971,-66.8159,47.997_-66.816,,,,1.0,,...,,1.0,,,1.0,1.0,1.0,,,
59,2013,Upsalquitch-RST (Upsalquitch River),47.8532,-66.9092,47.853_-66.909,,,,1.0,,...,,1.0,,,,1.0,1.0,,1.0,
6,2003,Kedgwick-RST (Kedgwick River),47.6711,-67.5098,47.671_-67.51,1.0,,,1.0,1.0,...,,,1.0,,,1.0,1.0,,1.0,
30,2008,Butters-RST (Restigouche River),47.9867,-66.8023,47.987_-66.802,,,,1.0,,...,,1.0,,,1.0,1.0,1.0,,,


# Stock Assessment

In [22]:
# Stock assessment

df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/New_Brunswick/To_look_at//stock_assesment_data.csv')
df = df[['YEAR', 'lat', 'lon', 'scien', 'dmin', 'dmax', 'AREA', 'DEPTH.x']]

df['lat_long'] = df['lat'].round(3).astype(str) + '_' + df['lon'].round(3).astype(str)
df['scien'] = df['scien'].str.lower().str.replace(' ', '_')
df.replace('sebastes_sp.', 'sebastes_spp', inplace=True)

df.rename(columns={'YEAR': 'year', 'lat': 'latitude', 'long': 'longitude', 'dmin': 'min_depth', 
                   'dmax': 'max_depth', 'AREA': 'area', 'DEPTH.x': 'mean_depth'}, inplace=True)
df.head()

Unnamed: 0,year,latitude,lon,scien,min_depth,max_depth,area,mean_depth,lat_long
0,1970,46.35,-58.9,gadus_morhua,179,184,924,181.5,46.35_-58.9
1,1976,46.66667,-59.46667,gadus_morhua,171,175,924,173.0,46.667_-59.467
2,1994,46.71467,-59.59683,gadus_morhua,153,157,924,155.0,46.715_-59.597
3,1990,46.1395,-58.77383,gadus_morhua,121,123,924,122.0,46.14_-58.774
4,1970,46.43333,-59.1,gadus_morhua,168,179,924,173.5,46.433_-59.1


In [23]:
%%time

for col in df['scien'].unique():
    df[col] = np.where(df.isin([col]).any(1), 1, np.nan)
    df[col] = df[col].fillna(df.groupby(['year', 'lat_long'])[col].transform('mean'))
    
display(df.head())

Unnamed: 0,year,latitude,lon,scien,min_depth,max_depth,area,mean_depth,lat_long,gadus_morhua,melanogrammus_aeglefinus,hippoglossoides_platessoides,pollachius_virens,urophycis_tenuis,hippoglossus_hippoglossus,pseudopleuronectes_americanus,merluccius_bilinearis,sebastes_spp
0,1970,46.35,-58.9,gadus_morhua,179,184,924,181.5,46.35_-58.9,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,1976,46.66667,-59.46667,gadus_morhua,171,175,924,173.0,46.667_-59.467,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,1994,46.71467,-59.59683,gadus_morhua,153,157,924,155.0,46.715_-59.597,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,1990,46.1395,-58.77383,gadus_morhua,121,123,924,122.0,46.14_-58.774,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,1970,46.43333,-59.1,gadus_morhua,168,179,924,173.5,46.433_-59.1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


CPU times: user 656 ms, sys: 156 ms, total: 812 ms
Wall time: 805 ms


In [24]:
print(len(df))
df.drop(columns=['scien'], inplace=True)
df.drop_duplicates(inplace=True)
print(len(df))

display(df.sample(5))
df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/New_Brunswick/Processed/stock_assessment_1970_2012.csv', 
          index=False)

68155
7672


Unnamed: 0,year,latitude,lon,min_depth,max_depth,area,mean_depth,lat_long,gadus_morhua,melanogrammus_aeglefinus,hippoglossoides_platessoides,pollachius_virens,urophycis_tenuis,hippoglossus_hippoglossus,pseudopleuronectes_americanus,merluccius_bilinearis,sebastes_spp
130,1983,47.03333,-60.05,122,123,924,122.5,47.033_-60.05,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2376,1985,43.6,-59.98333,58,59,499,58.5,43.6_-59.983,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2383,1974,43.63333,-59.86667,61,65,499,63.0,43.633_-59.867,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4580,1977,43.0,-62.11667,70,70,2383,70.0,43.0_-62.117,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1067,2003,44.6935,-59.21367,119,119,1023,119.0,44.694_-59.214,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


### SWAM

In [29]:
# SWAM

path = '/mnt/c/Users/imrit/Downloads/Sharma_fish_project/New_Brunswick/To_look_at/SWAM_files/'

df = []

for file in os.listdir(path):
    species_name = file.split('_')[2].split('.xlsx')[0]
    year = file.split('_')[1]
    df_temp = pd.read_excel(f'{path}{file}')
    
    df_temp['year'] = pd.DatetimeIndex(pd.to_datetime(df_temp['Date'])).year
    df_temp['lat_long'] = df_temp['Lat'].round(3).astype(str) + '_' + df_temp['Long'].round(3).astype(str)
    df_temp['species'] = species_name.replace(' ', '_')
    
    df.append(df_temp)
    
df = pd.concat(df)
df= df[['year', 'Lat', 'Long', 'lat_long', 'species']]
df.drop_duplicates(inplace=True)
df.dropna(axis=1, how='all', inplace=True)
df.rename(columns = {'Lat': 'latitude', 'Long': 'longitude'}, inplace=True)
df.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.




Unnamed: 0,year,latitude,longitude,lat_long,species
0,2015,45.82982,-65.22385,45.83_-65.224,anguilla_rostrata
1,2015,45.72493,-65.20672,45.725_-65.207,anguilla_rostrata
2,2015,45.79116,-65.17107,45.791_-65.171,anguilla_rostrata
3,2015,45.82706,-65.21902,45.827_-65.219,anguilla_rostrata
4,2015,45.83904,-65.24611,45.839_-65.246,anguilla_rostrata


In [30]:
for col in df['species'].unique():
    df[col] = np.where(df.isin([col]).any(1), 1, np.nan)
    df[col] = df[col].fillna(df.groupby(['year', 'lat_long'])[col].transform('mean'))
    
display(df.head())

Unnamed: 0,year,latitude,longitude,lat_long,species,anguilla_rostrata,catostomus_commersonii,cottus_cognatus,gasterosteus_aculeatus,lota_lota,luxilus_cornutus,notemigonus_crysoleucas,notropis_heterolepis,petromyzon_marinus,pungitius_pungitius,rhinichthys_atratulus,salmo_salar,salvelinus_fontinalis
0,2015,45.82982,-65.22385,45.83_-65.224,anguilla_rostrata,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,2015,45.72493,-65.20672,45.725_-65.207,anguilla_rostrata,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,2015,45.79116,-65.17107,45.791_-65.171,anguilla_rostrata,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,2015,45.82706,-65.21902,45.827_-65.219,anguilla_rostrata,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,2015,45.83904,-65.24611,45.839_-65.246,anguilla_rostrata,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [31]:
print(len(df))
df.drop(columns=['species'], inplace=True)
df.drop_duplicates(inplace=True)
print(len(df))

display(df.sample(5))
df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/New_Brunswick/Processed/SWAM_2015_occurence.csv', 
          index=False)

325
25


Unnamed: 0,year,latitude,longitude,lat_long,anguilla_rostrata,catostomus_commersonii,cottus_cognatus,gasterosteus_aculeatus,lota_lota,luxilus_cornutus,notemigonus_crysoleucas,notropis_heterolepis,petromyzon_marinus,pungitius_pungitius,rhinichthys_atratulus,salmo_salar,salvelinus_fontinalis
8,2015,45.72736,-65.36009,45.727_-65.36,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
21,2015,45.78099,-65.38686,45.781_-65.387,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
14,2015,45.80344,-65.11241,45.803_-65.112,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
11,2015,45.70245,-65.09772,45.702_-65.098,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
17,2015,45.73813,-65.29926,45.738_-65.299,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


# Kouchibouguac

In [32]:
# Kouchibouguac

df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/New_Brunswick/Used/Kouchibouguac_NP_Coastal_Marine_Estuarine_IBI_data_dictionary.csv', 
                 sep=';')

df['scientific_name'] = df['Date.of.sampling..dd.mm.yyyy.'].str.split('(').str.get(1).str.split(')').str.get(0)
df['scientific_name'] = df['scientific_name'].str.lower().str.replace(' ', '_')

dict_df_species = df.set_index('DATE.1').to_dict()['scientific_name']

In [33]:
df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/New_Brunswick/To_look_at/Kouchibouguac_NP_Coastal_Marine_Estuarine_IBI_1996-2018_data.csv')

df.rename(columns=dict_df_species, inplace=True)
df.replace(0, np.nan, inplace=True)

df['lat'], df['long'] = 'TO_GET', 'TO_GET'

df['year'] = df['DATE'].str.split('\(').str.get(0).str.split('-').str.get(-1).str.split('/').str.get(-1).astype(int)
df.dropna(subset=['STATION CODE'], inplace=True)

print(len(df))
display(df.sample(5))

890


Unnamed: 0,DATE,STATION NUMBER,STATION ZONE,STATION CODE,alosa_aestivalus,apeltes_quadracus,anguilla_rostrata,catostomus_commersoni,culaea_inconstans,fundulus_diaphanous,...,rhinichthys_atratulus,scopthalmus_aequosus,salvelinus_fontinalis,salmo_salar,syngnathus_fuscus,tautogolabrus_adspersus,urophycis_tenuis,lat,long,year
725,15-07-2014,3,Rivière Kouchibouguac River (upper/supérieure),KR3,,1.0,,,,24.0,...,,,,,,,,TO_GET,TO_GET,2014
756,29-09-2014,15,Rivière Kouchibouguacis River,SLR15,1.0,1.0,,,,,...,,,,,,,,TO_GET,TO_GET,2014
734,12-08-2014,4,Rivière Kouchibouguac River (lower/inférieure),KR4,6.0,10.0,,,,27.0,...,,,,,,,,TO_GET,TO_GET,2014
224,29-05-2001,17,Rivière Kouchibouguacis River,SLR17,,1.0,,,,,...,,,,,,,,TO_GET,TO_GET,2001
552,16-06-2010,5,Rivière Kouchibouguac River (lower/inférieure),KR5,,3.0,,,,,...,,,,,,,,TO_GET,TO_GET,2010


In [34]:
for col in df.columns[4:-3]:
    df[col] = np.where(df[col].notnull(), 1, df[col])
    df[col] = df[col].fillna(df.groupby(['year', 'STATION CODE'])[col].transform('mean'))

display(df.head())

Unnamed: 0,DATE,STATION NUMBER,STATION ZONE,STATION CODE,alosa_aestivalus,apeltes_quadracus,anguilla_rostrata,catostomus_commersoni,culaea_inconstans,fundulus_diaphanous,...,rhinichthys_atratulus,scopthalmus_aequosus,salvelinus_fontinalis,salmo_salar,syngnathus_fuscus,tautogolabrus_adspersus,urophycis_tenuis,lat,long,year
0,11-06-1996,4,Rivière Kouchibouguac River (lower/inférieure),KR4,,1.0,,,,1.0,...,,,,,,,,TO_GET,TO_GET,1996
1,11-06-1996,7,Rivière Kouchibouguac River (lower/inférieure),KR7,,1.0,,,,1.0,...,,,,,,,,TO_GET,TO_GET,1996
2,11-06-1996,14,Lagune Kouchibouguac Lagoon,KR14,1.0,1.0,,,,1.0,...,,,,,,1.0,,TO_GET,TO_GET,1996
6,25-06-1996,2,Rivière Kouchibouguac River (upper/supérieure),KR2,1.0,1.0,,,,1.0,...,,,,,,,,TO_GET,TO_GET,1996
7,25-06-1996,3,Rivière Kouchibouguac River (upper/supérieure),KR3,1.0,1.0,1.0,,,1.0,...,,,,,,,,TO_GET,TO_GET,1996


In [35]:
print(len(df))
df.drop(columns=['DATE'], inplace=True)
df.drop_duplicates(inplace=True)
print(len(df))

display(df.sample(5))
df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/New_Brunswick/Processed/kouchibouguac_NO_LAT_LONG_1996_2016_occurence.csv', 
          index=False)

890
232


Unnamed: 0,STATION NUMBER,STATION ZONE,STATION CODE,alosa_aestivalus,apeltes_quadracus,anguilla_rostrata,catostomus_commersoni,culaea_inconstans,fundulus_diaphanous,fundulus_heteroclitus,...,rhinichthys_atratulus,scopthalmus_aequosus,salvelinus_fontinalis,salmo_salar,syngnathus_fuscus,tautogolabrus_adspersus,urophycis_tenuis,lat,long,year
234,19,Rivière Kouchibouguac River (lower/inférieure),KR19,,1.0,,,,1.0,1.0,...,,,1.0,,,,,TO_GET,TO_GET,2001
697,4,Rivière Kouchibouguac River (lower/inférieure),KR4,1.0,1.0,,,,1.0,1.0,...,,,,,1.0,,,TO_GET,TO_GET,2014
82,5,Rivière Kouchibouguac River (lower/inférieure),KR5,,1.0,,,,1.0,1.0,...,,,,,,1.0,,TO_GET,TO_GET,1999
33,6,Rivière Kouchibouguac River (lower/inférieure),KR6,,1.0,,,,1.0,1.0,...,,,,,,1.0,,TO_GET,TO_GET,1997
534,19,Rivière Kouchibouguac River (lower/inférieure),KR19,,1.0,,,,,1.0,...,,,,,,1.0,,TO_GET,TO_GET,2010


# PEI

In [41]:
df = pd.read_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/PEI/To_look_at/PEI_NP_Freshwater_Fish_Community_2006-2019_data_1.csv')
df.columns = df.columns.str.lower().str.replace(' ', '_')

df.drop(columns=['ecosystem', 'watershed', 'total_catch', 'shoreline_sector', 'gear_type', 
                 'gear_colour', 'month_in', 'day_in', 'time_in', 'month_out', 'day_out', 'time_out', 'effort_(hours)'], 
        inplace=True)

df.rename(columns=dict_species_name, inplace=True)

df['lat_long'] = df['latitude'].round(3).astype(str) + '_' + df['longitude'].round(3).astype(str)
df.rename(columns = {'location': 'waterbody_name'}, inplace=True)
df['waterbody_name'] = df['waterbody_name'].str.rstrip()
df['waterbody_name'].replace('Longpond', 'Long Pond', inplace=True)
df.head()

Unnamed: 0,waterbody_name,year,latitude,longitude,morone_americana,anguilla_rostrata,alosa_pseudoharengus,fundulus_diaphanus,fundulus_heteroclitus,apeltes_quadracus,gasterosteus_aculeatus,salvelinus_fontinalis,culaea_inconstans,pungitius_pungitius,osmerus_mordax,lat_long
0,Lake of Shining Waters,2006,46.49648,-63.38951,,0.0,,,,,,,,,,46.496_-63.39
1,Lake of Shining Waters,2006,46.49648,-63.38951,,0.0,,,,,,,,,,46.496_-63.39
2,Lake of Shining Waters,2006,46.49648,-63.38951,2.0,8.0,1.0,112.0,1.0,,,,,,,46.496_-63.39
3,Lake of Shining Waters,2006,46.4978,-63.39002,,0.0,,4.0,,4.0,,,,,,46.498_-63.39
4,Lake of Shining Waters,2006,46.4978,-63.39002,,0.0,,2.0,,,,,,,,46.498_-63.39


In [46]:
for col in df.columns[4:-1]:
    df[col] = np.where(df[col].notnull(), 1, df[col]).astype(float)
    df[col] = df[col].fillna(df.groupby(['year', 'lat_long'])[col].transform('mean'))

display(df.sample(5))

Unnamed: 0,waterbody_name,year,latitude,longitude,morone_americana,anguilla_rostrata,alosa_pseudoharengus,fundulus_diaphanus,fundulus_heteroclitus,apeltes_quadracus,gasterosteus_aculeatus,salvelinus_fontinalis,culaea_inconstans,pungitius_pungitius,osmerus_mordax,lat_long
1953,Rollings Pond,2016,46.464092,-63.303321,,1.0,,1.0,1.0,1.0,1.0,,,1.0,,46.464_-63.303
1814,Campbells Pond Inner,2015,46.40476,-63.05921,,1.0,,1.0,1.0,,,,,,,46.405_-63.059
1299,Campbells Pond Inner,2013,46.40378,-63.05825,1.0,1.0,,1.0,,,1.0,,,,,46.404_-63.058
40,Lake of Shining Waters,2006,46.49244,-63.38514,1.0,1.0,1.0,1.0,,1.0,,,,,,46.492_-63.385
1997,Rollings Pond,2016,46.464745,-63.30039,,1.0,1.0,1.0,1.0,1.0,1.0,,,,,46.465_-63.3


In [47]:
print(len(df))
df.drop_duplicates(inplace=True)
print(len(df))

display(df.sample(5))
df.to_csv('/mnt/c/Users/imrit/Downloads/Sharma_fish_project/New_Brunswick/Processed/PEI_occurence_2006_2019.csv', index=False)

2946
509


Unnamed: 0,waterbody_name,year,latitude,longitude,morone_americana,anguilla_rostrata,alosa_pseudoharengus,fundulus_diaphanus,fundulus_heteroclitus,apeltes_quadracus,gasterosteus_aculeatus,salvelinus_fontinalis,culaea_inconstans,pungitius_pungitius,osmerus_mordax,lat_long
1350,Lake of Shining Waters,2014,46.2985,-63.23297,1.0,1.0,1.0,1.0,,1.0,1.0,,,,,46.298_-63.233
2100,Campbells Pond Inner,2016,46.407829,-63.062053,1.0,1.0,,1.0,1.0,,1.0,,,,,46.408_-63.062
2446,Dalvay Lake,2018,46.41219,-63.07274,,1.0,,1.0,,,,,,,,46.412_-63.073
2226,Long Pond,2017,46.414844,-63.090092,1.0,1.0,,1.0,,,,,,,,46.415_-63.09
966,Campbells Pond Inner,2012,46.40599,-63.05891,1.0,1.0,,1.0,1.0,1.0,,,,,,46.406_-63.059


# That's it folks!