In [2]:
# last edited Apr 2022
# accidentally deleted changes - new input / output paths etc.
# still need to redo these changes

### Join EPAD data to lat / lon data for hatchery releases
March 2020
By: G Oldford

Modified Apr 2022  

Data In: 
1. EPAD data from Carl (DFO / SEP) - has no coordinates - 'actual_releases.csv'
2. RMIS release location data (edits by SOGDC) - has coordinates - 'rmis_locations_2019.csv'
3. PSF release and release location data - has coordinates - 'PSEReleasesAndLocations2019.csv'
4. Coordinate table by Greig - has a few problem coordinates georeferenced with best-guess

Purpose:
1. Fix the AVG_WEIGHT field in 'actual_releases.csv' (EPAD) when values missing or zero
2. Calculate a BIOMASS (mt) field
3. Do several 'rounds' of joins to fill in missing coordinates from above datasets
4. Export 'actual_releases_COORDS.csv

Notes:
- this EPAD data does not include Puget Sound releases
- EPAD data from Carl Walters and RMIS locations data from SOGDC
- rmis_smolt_releases dataset from  'rmis_releases.csv' from http://sogdatacentre.ca/search-data/spatial-data/ from all_layers->rmis->rmis_smolt_releases
- prioritized effort for coordinate matching on just coho and Chinook

## TOC: <a class="anchor" id="top"></a>
* [A. Address missing data](#section-A)
* [1. Join EPAD Release and RMIS Locations](#section-1)
* [2. Use SSMSP Releases - Skip this Section](#section-2)
* [3. Use PSF locations for unknown release locations](#section-3)
* [4. Fix remaining release location coordinates ](#section-4)
* [5. Re-Merge and Write to CSV](#section-5)
* [6. Write to File](#section-6)
* [experiments etc](#section-7)

In [3]:
# pandas is a library for doing r-like operations in python
import pandas as pd

In [4]:
# locations table from the SSMSP SOGDC (may have more lats / lons added than source at RMIS)
locations_df = pd.read_csv("C:/Users/Greig/Sync/6. SSMSP Model/Model Greig/Data/1. Salmon/Hatchery Releases/RMIS_HatcheryReleases_SSMSP_2020/MODIFIED/rmis_locations_2019.csv")

# EPAD releases received from Carl Walters
releases_df = pd.read_csv("C:/Users/Greig/Sync/6. SSMSP Model/Model Greig/Data/1. Salmon/Hatchery Releases/EPADHatcherReleasesGST/MODIFIED/actual_releases.csv")

print("Quick check - chinook in 1988 should be around 33 mil, with no other years higher")
print(releases_df.loc[(releases_df['SPECIES_NAME']=='Chinook')].groupby(['BROOD_YEAR'])['TotalRelease'].sum())

FileNotFoundError: [Errno 2] File b'C:/Users/Greig/Sync/6. SSMSP Model/Model Greig/Data/1. Salmon/Hatchery Releases/RMIS_HatcheryReleases_SSMSP_2020/MODIFIED/rmis_locations_2019.csv' does not exist: b'C:/Users/Greig/Sync/6. SSMSP Model/Model Greig/Data/1. Salmon/Hatchery Releases/RMIS_HatcheryReleases_SSMSP_2020/MODIFIED/rmis_locations_2019.csv'

In [5]:
releases_df.columns

NameError: name 'releases_df' is not defined

## A. Address issues with empty AVE_WEIGHT and calculate biomass (mt) <a class="anchor" id="section-A"></a>
- many records have an AVG_WEIGHT of zero which causes errors in biomass calculations. Fix this by replacing these records with avg weight of other fish at similar release_stage, species. 

[BACK TO TOP](#top)

In [6]:
nonzero_aveweight = releases_df.loc[(releases_df["AVE_WEIGHT"]!=0)|
                (releases_df["AVE_WEIGHT"].notna())].groupby(['RELEASE_STAGE_NAME','SPECIES_NAME'])['AVE_WEIGHT'].mean().reset_index()

nonzero_aveweight.rename(columns={'AVE_WEIGHT':'NONZERO_MEAN_WEIGHT'}, inplace=True)

print("Mean weight of non-zero 'AVE_WEIGHT' records, grouped")
print(nonzero_aveweight)

NameError: name 'releases_df' is not defined

In [6]:
# join two tables
release_df_meanwt = pd.merge(releases_df, nonzero_aveweight, on=['RELEASE_STAGE_NAME','SPECIES_NAME'], how='left')
# calculate biomass
release_df_meanwt['BIOMASS_MT'] = (release_df_meanwt['AVE_WEIGHT'] * release_df_meanwt['TotalRelease']) / 1000000

print("Total releases, all records:")
total_releases = release_df_meanwt['TotalRelease'].sum()
print(total_releases)
print("Total releases (all records) with null or zero 'AVE_WEIGHT':")
releases_affected = release_df_meanwt.loc[(release_df_meanwt['AVE_WEIGHT']==0)|(release_df_meanwt['AVE_WEIGHT'].isna())]['TotalRelease'].sum()
print(releases_affected)
print("as percent:")
print(releases_affected / total_releases * 100)
print("---")
print("Total biomass estimated before (mt):")
print(release_df_meanwt['BIOMASS_MT'].sum().round())

Total releases, all records:
9458239389
Total releases (all records) with null or zero 'AVE_WEIGHT':
2072216260
as percent:
21.909111989806497
---
Total biomass estimated before (mt):
13435.0


In [7]:
# replace zeros in original 'AVE_WEIGHT' with the mean weight
release_df_meanwt.loc[(release_df_meanwt['AVE_WEIGHT']==0)|
                      (release_df_meanwt['AVE_WEIGHT'].isna()),'AVE_WEIGHT'] = release_df_meanwt['NONZERO_MEAN_WEIGHT']

release_df_meanwt['BIOMASS_MT'] = release_df_meanwt['AVE_WEIGHT'] * release_df_meanwt['TotalRelease'] / 1000000
print("Total releases, all records:")
total_releases = release_df_meanwt['TotalRelease'].sum()
print(total_releases)
print("Total releases (all records) with null or zero 'AVE_WEIGHT':")
releases_affected = release_df_meanwt.loc[(release_df_meanwt['AVE_WEIGHT']==0)|(release_df_meanwt['AVE_WEIGHT'].isna())]['TotalRelease'].sum()
print(releases_affected)
print("as percent:")
print(releases_affected / total_releases * 100)
print("---")
print("Total biomass estimated after (mt):")
print(release_df_meanwt['BIOMASS_MT'].sum().round())

releases_df = release_df_meanwt

Total releases, all records:
9458239389
Total releases (all records) with null or zero 'AVE_WEIGHT':
188790100
as percent:
1.9960385039478303
---
Total biomass estimated after (mt):
13959.0


In [None]:
# unused code
#releases_df_fry = releases_df.loc[((releases_df["AVE_WEIGHT"]!=0)|(releases_df["AVE_WEIGHT"].notna()))
#                                  &((releases_df["RELEASE_STAGE_NAME"]=="Nat Emerg")|(releases_df["RELEASE_STAGE_NAME"]=="Nat Fry"))]
#releases_df_coho_fry = releases_df_fry.loc[(releases_df_fry["SPECIES_NAME"]=='Coho')]
#releases_df_chin_fry = releases_df_fry.loc[(releases_df_fry["SPECIES_NAME"]=='Chinook')]
#releases_df_sockeye_fry = releases_df_fry.loc[(releases_df_fry["SPECIES_NAME"]=='Sockeye')]
#releases_df_chum_fry = releases_df_fry.loc[(releases_df_fry["SPECIES_NAME"]=='Chum')]
#releases_df_pink_fry = releases_df_fry.loc[(releases_df_fry["SPECIES_NAME"]=='Pink')]
#releases_df_steel_fry = releases_df_fry.loc[(releases_df_fry["SPECIES_NAME"]=='Steelhead')]
#releases_df_cutt_fry = releases_df_fry.loc[(releases_df_fry["SPECIES_NAME"]=='Cutthroat')]

#print("The average weight and stats on natural emergent coho fry:")
#print(releases_df_coho_fry["AVE_WEIGHT"].describe())

#print("The average weight and stats on natural emergent Chinook fry:")
#print(releases_df_chin_fry["AVE_WEIGHT"].describe())

#print("The average weight and stats on natural emergent Sockeye fry:")
#print(releases_df_sockeye_fry["AVE_WEIGHT"].describe())

#print("The average records weight and stats on natural emergent chum fry:")
#print(releases_df_chum_fry["AVE_WEIGHT"].describe())

#print("The average recorded weight and stats on natural emergent cutthroat fry:")
#releases_df_cutt_fry["AVE_WEIGHT"].describe()

#print("The average recorded weight and stats on natural emergent steelhead fry:")
#releases_df_steel_fry["AVE_WEIGHT"].describe()

#print("The average recorded weight and stats on natural emergent pink fry:")
#releases_df_pink_fry["AVE_WEIGHT"].describe()

#natfry_avg_chin = releases_df_chin_fry["AVE_WEIGHT"].mean()
#natfry_avg_coho = releases_df_coho_fry["AVE_WEIGHT"].mean()
#natfry_avg_steel = releases_df_steel_fry["AVE_WEIGHT"].mean()
#natfry_avg_sockeye = releases_df_sockeye_fry["AVE_WEIGHT"].mean()
#natfry_avg_chum = releases_df_chum_fry["AVE_WEIGHT"].mean()
#natfry_avg_cutt = releases_df_cutt_fry["AVE_WEIGHT"].mean()
#natfry_avg_pink = releases_df_pink_fry["AVE_WEIGHT"].mean()

## 1. Join EPAD Release and RMIS Locations <a class="anchor" id="section-1"></a>
[BACK TO TOP](#top)

In [8]:
print("Length of releases table:")
print(len(releases_df))
print("Unique RELEASE_SITE_NAME:")
print(len(releases_df.RELEASE_SITE_NAME_G.unique()))
print("Unique REL_CU_NAME:")
print(len(releases_df.REL_CU_NAME.unique()))
print("Number of Chinook release records")
print(len(releases_df.loc[(releases_df['SPECIES_NAME']=='Chinook')]))
print("Number of Coho release records")
print(len(releases_df.loc[(releases_df['SPECIES_NAME']=='Coho')]))
print("Unique locations in RMIS locations table:")
print(len(locations_df.name.unique()))

Length of releases table:
21451
Unique RELEASE_SITE_NAME:
938
Unique REL_CU_NAME:
51
Number of Chinook release records
4734
Number of Coho release records
9631
Unique locations in RMIS locations table:
17522


In [8]:
print("Unique stock production area codes:")
releases_df['STOCK_PROD_AREA_CODE'].unique()

# number of coho releases by year
print("Coho releases by year (SoG):")
releases_df.loc[(releases_df['SPECIES_NAME']=='Coho')&
                ((releases_df['STOCK_PROD_AREA_CODE']=='LWFR')|
                 (releases_df['STOCK_PROD_AREA_CODE']=='GSMN')|
                 (releases_df['STOCK_PROD_AREA_CODE']=='GSVI')|
                 (releases_df['STOCK_PROD_AREA_CODE']=='UPFR')|
                 (releases_df['STOCK_PROD_AREA_CODE']=='TOMF')|
                 (releases_df['STOCK_PROD_AREA_CODE']=='GSMS'))].groupby(['BROOD_YEAR'])['TotalRelease'].sum()

Unique stock production area codes:
Coho releases by year (SoG):


BROOD_YEAR
1967        6995
1968       69118
1969      409666
1970      766294
1971     1340584
1972      446236
1973      762572
1974      809207
1975     1513075
1976     2239821
1977     2038564
1978     2379216
1979     5558788
1980     4538967
1981     8063682
1982    10634215
1983    15066679
1984    17515108
1985    12245493
1986    12151345
1987    10584560
1988    11213660
1989    12422739
1990    12737604
1991    11612434
1992    11513600
1993    11742781
1994    12037012
1995    12309590
1996    11498311
1997    11023435
1998    12595862
1999    13922029
2000    14062323
2001    13279532
2002    11059222
2003     9912428
2004    10171653
2005     7157722
2006     7099153
2007     7846148
2008     8557775
2009     9584509
2010    10356050
2011     7802289
2012     6796025
2013     6436978
2014     3066137
2015      776645
Name: TotalRelease, dtype: int64

In [9]:
print("Chinook releases by year (SoG):")
#print(releases_df.loc[(releases_df['SPECIES_NAME']=='Chinook')].groupby(['BROOD_YEAR'])['TotalRelease'].sum())
print(releases_df.loc[(releases_df['SPECIES_NAME']=='Chinook')&
                ((releases_df['STOCK_PROD_AREA_CODE']=='LWFR')|
                 (releases_df['STOCK_PROD_AREA_CODE']=='GSMN')|
                 (releases_df['STOCK_PROD_AREA_CODE']=='GSVI')|
                 (releases_df['STOCK_PROD_AREA_CODE']=='UPFR')|
                 (releases_df['STOCK_PROD_AREA_CODE']=='TOMF')|
                 (releases_df['STOCK_PROD_AREA_CODE']=='GSMS'))].groupby(['BROOD_YEAR'])['TotalRelease'].sum())

Chinook releases by year (SoG):
BROOD_YEAR
1967      277630
1968      603964
1969       67326
1970      575466
1971      993309
1972      920059
1973      820762
1974      270419
1975     1666042
1976     2131779
1977     4606310
1978     3832086
1979     6939012
1980     8726071
1981     7142418
1982    10731644
1983    12477637
1984    14098527
1985    18637244
1986    25357893
1987    29884505
1988    26943634
1989    25096954
1990    32310368
1991    26428019
1992    23974446
1993    22201078
1994    24349470
1995    19850667
1996    24999327
1997    20655362
1998    25340700
1999    27140753
2000    23397423
2001    28722076
2002    24771213
2003    24899303
2004    22498296
2005    21032712
2006    20765313
2007    18363382
2008    17470964
2009    17218035
2010    15727027
2011    17162925
2012    13945284
2013    11077492
2014    13468360
2015     7236484
Name: TotalRelease, dtype: int64


In [10]:
# rename columns for easier join
releases_df["location_key"]=releases_df["RELEASE_SITE_NAME_G"]
locations_df["location_key"]=locations_df["name"]
joined_df = pd.merge(releases_df, locations_df, on='location_key', how='left')

print("Length of joined table:")
print(len(joined_df))

Length of joined table:
21451


In [12]:
joined_df.columns

Index(['PROGRAM_CODE', 'PROJ_NAME', 'SPECIES_NAME', 'RUN_NAME', 'BROOD_YEAR',
       'STOCK_NAME', 'STOCK_PROD_AREA_CODE', 'STOCK_CU_INDEX', 'STOCK_CU_NAME',
       'STOCK_TYPE_CODE', 'REARING_TYPE_CODE', 'FACILITY_NAME',
       'RELEASE_SITE_NAME', 'RELEASE_SITE_NAME_G', 'REL_CU_INDEX',
       'REL_CU_NAME', 'RELEASE_STAGE_NAME', 'MRP_TAGCODE', 'RELEASE_YEAR',
       'START_DATE', 'START_YR_REL', 'START_MO_REL', 'START_DAY_REL',
       'END_DATE', 'END_YR_REL', 'END_MO_REL', 'END_DAY_REL', 'AVE_WEIGHT',
       'PURPOSE_CODE', 'RELEASE_COMMENT', 'RowNum', 'TaggedClip', 'TaggedNum',
       'ShedTagNum', 'NoTagClip', 'NoTagNum', 'NoTagPartMarkNum',
       'UnmarkedNum', 'TotalRelease', 'NONZERO_MEAN_WEIGHT', 'BIOMASS_MT',
       'location_key', 'FID', 'objectid', 'lctype', 'location_code', 'name',
       'description', 'region', 'basin', 'rmis_latitude', 'rmis_longitude',
       'source', 'accuracy', 'notes', 'rpagency', 'submission'],
      dtype='object')

In [13]:
print("unique latitudes")
print(len(joined_df.rmis_latitude.unique()))
print("Number of null records for latitudes")
print(pd.isnull(joined_df["rmis_latitude"]).sum())
print("Number of records with a zero in latitude:")
print(len(joined_df.loc[(joined_df['rmis_latitude']==0)]))

unique latitudes
582
Number of null records for latitudes
1653
Number of records with a zero in latitude:
1824


In [14]:
joined_df_na = joined_df[joined_df["rmis_latitude"].isna()]
joined_df_zero = joined_df.loc[(joined_df['rmis_latitude']==0)]

joined_df_notna = joined_df[joined_df["rmis_latitude"].notna()]
joined_df_noproblems = joined_df_notna.loc[(joined_df_notna["rmis_latitude"]!=0)]
print("The number of EPAD release records:")
print(len(joined_df))
print("The number of EPAD release records with locations:")
print(len(joined_df_noproblems))
print("Number of unique locations with Null / NaN / Na latitudes that need to be fixed:")
print(len(joined_df_na['RELEASE_SITE_NAME_G'].unique()))
print("Number of unique locations with a zero in latitudes that need to be fixed:")
print(len((joined_df.loc[(joined_df['rmis_latitude']==0)])['RELEASE_SITE_NAME_G'].unique()))

The number of EPAD release records:
21451
The number of EPAD release records with locations:
17974
Number of unique locations with Null / NaN / Na latitudes that need to be fixed:
98
Number of unique locations with a zero in latitudes that need to be fixed:
134


In [15]:
joined_df_problems = joined_df_zero.append(joined_df_na)
print(len(joined_df_problems))
joined_df_problems['RELEASE_SITE_NAME_G'].unique()

3477


array(['R-Jimmy Charlie Sl', 'R-Majors Cr', 'R-Napoleon Cr',
       'R-First Lk/GSVI', 'R-Barnaby Swamp', 'R-Deadwood Cr',
       'R-Hong Kong Cr', 'R-Thatcher Cr', 'R-McGuffie Cr',
       'R-Blackwater/GSMN', 'R-Maclean Bay', 'R-Jitco Cr', 'R-Junior Cr',
       'R-Bear Cr/GSMN', 'R-Eel Cr', 'R-Jamieson Cr',
       'R-Homestead Pd/GSMN', 'R-Mid-Valley Pd', 'R-Steele Cr',
       'R-Cougar Cr', 'R-Washer Cr', 'R-Coal Ch', 'R-Apple Cr',
       'R-Cranberry Lk', 'R-Dudley Msh', 'R-Swayne Swamp',
       'R-Martindale Swamp', 'R-Trail Ch', 'R-Balbanian Cr',
       'R-Hennipen Cr', 'R-Laity Cr', 'R-Millionaire Cr', 'R-Clayton Ch',
       'R-Anderson Cr/Pender Hb', 'R-Katherine Lk', 'R-Mohawk Cr',
       'R-Mohawk Est', 'R-Twin Est', 'R-Timberwest Ch', 'R-Ackinclose Cr',
       'R-Cherry Cr/GSVI', 'R-Poam Lk', 'R-Relic Ch', 'R-Tseycum Cr',
       'R-Noble Cr', 'R-Beecher Cr', 'R-Oakalla Cr',
       'R-Robert Burnaby Cr', 'R-John Matthews Cr', 'R-Zirk Brk',
       'R-King Cr/GSMS', 'R-Scales Cr

In [16]:
# see how many are issues for coho and chinook
print("Total number of unique locations affected for coho:")
print(len(joined_df_problems.loc[(joined_df_problems['SPECIES_NAME']=='Coho')]['RELEASE_SITE_NAME_G'].unique()))

print("Total number of unique locations affected for Chinook:")
print(len(joined_df_problems.loc[(joined_df_problems['SPECIES_NAME']=='Chinook')]['RELEASE_SITE_NAME_G'].unique()))
print("Total number of releases (no. smolts) with no location for coho")
print((joined_df_problems.loc[(joined_df_problems['SPECIES_NAME']=='Coho')]['TotalRelease'].sum()))
print("Total number of releases (no. smolts) with no location for Chinook")
print((joined_df_problems.loc[(joined_df_problems['SPECIES_NAME']=='Chinook')]['TotalRelease'].sum()))
print("The problem locations for coho")
print(joined_df_problems.loc[(joined_df_problems['SPECIES_NAME']=='Coho')]['RELEASE_SITE_NAME_G'].unique())

print("The problem locations for Chinook")
print(joined_df_problems.loc[(joined_df_problems['SPECIES_NAME']=='Chinook')]['RELEASE_SITE_NAME_G'].unique())

Total number of unique locations affected for coho:
194
Total number of unique locations affected for Chinook:
30
Total number of releases (no. smolts) with no location for coho
24961190
Total number of releases (no. smolts) with no location for Chinook
24857160
The problem locations for coho
['R-Majors Cr' 'R-Barnaby Swamp' 'R-Deadwood Cr' 'R-Napoleon Cr'
 'R-First Lk/GSVI' 'R-Hong Kong Cr' 'R-Thatcher Cr' 'R-McGuffie Cr'
 'R-Blackwater/GSMN' 'R-Maclean Bay' 'R-Junior Cr' 'R-Bear Cr/GSMN'
 'R-Eel Cr' 'R-Jamieson Cr' 'R-Homestead Pd/GSMN' 'R-Mid-Valley Pd'
 'R-Jitco Cr' 'R-Washer Cr' 'R-Cougar Cr' 'R-Coal Ch' 'R-Steele Cr'
 'R-Apple Cr' 'R-Cranberry Lk' 'R-Dudley Msh' 'R-Swayne Swamp'
 'R-Martindale Swamp' 'R-Balbanian Cr' 'R-Hennipen Cr' 'R-Laity Cr'
 'R-Millionaire Cr' 'R-Clayton Ch' 'R-Anderson Cr/Pender Hb'
 'R-Katherine Lk' 'R-Ackinclose Cr' 'R-Cherry Cr/GSVI' 'R-Poam Lk'
 'R-Relic Ch' 'R-Noble Cr' 'R-Beecher Cr' 'R-Oakalla Cr'
 'R-Robert Burnaby Cr' 'R-John Matthews Cr' 'R-Zirk B

In [17]:
chincoho_problems = joined_df_problems.loc[(joined_df_problems['SPECIES_NAME']=='Chinook')|(joined_df_problems['SPECIES_NAME']=='Coho')]
print("The number of records with no locations for coho and Chinook:")
print(len(chincoho_problems))

The number of records with no locations for coho and Chinook:
2174


## 2. Use SSMSP Releases - Skip this Section <a class="anchor" id="section-2"></a>
- skip for now - no notes or history available from Terry at SOGDC on where the lats and lons he generated actually came from
- this section uses SOGDC to 'rmis_releases.csv' to fill in some missing lats / lons
- downloaded from the rmis_smolt_releases dataset as 'rmis_releases.csv' from http://sogdatacentre.ca/search-data/spatial-data/ from all_layers->rmis->rmis_smolt_releases

[BACK TO TOP](#top)

In [18]:
rmis_releases_df = pd.read_csv("C:/Users/Greig/Sync/6. SSMSP Model/Model Greig/Data/1. Salmon/Hatchery Releases/RMIS_HatcheryReleases_SSMSP_2020/ORIGINAL/rmis_releases.csv")



  interactivity=interactivity, compiler=compiler, result=result)


In [19]:
rmis_releases_df.columns

Index(['FID', 'record_code', 'reporting_agency', 'release_agency',
       'coordinator', 'tag_code_or_release_id', 'tag_type',
       'first_sequential_number', 'last_sequential_number',
       'related_group_type', 'related_group_id', 'species', 'run',
       'brood_year', 'first_release_date', 'datefirstflag',
       'last_release_date', 'datelastflag', 'name', 'release_location_code',
       'hatchery_location_code', 'stock_location_code', 'release_stage',
       'rearing_type', 'study_type', 'release_strategy', 'avg_weight',
       'avg_length', 'study_integrity', 'cwt_1st_mark', 'cwt_1st_mark_count',
       'cwt_2nd_mark', 'cwt_2nd_mark_count', 'non_cwt_1st_mark',
       'non_cwt_1st_mark_count', 'non_cwt_2nd_mark', 'non_cwt_2nd_mark_count',
       'counting_method', 'tag_loss_rate', 'tag_loss_days',
       'tag_loss_sample_size', 'tag_reused', 'comments',
       'release_location_name', 'hatchery_location_name',
       'stock_location_name', 'release_location_state',
       'rele

In [20]:
# NOT USED - keep only geo columns
rmis_r = rmis_releases_df[['name',
                           'long_comp',
                           'lat_comp',
                           'release_location_code',
                           'hatchery_location_code',
                           'stock_location_code',
                           'release_location_name',
                           'hatchery_location_name',
                           'stock_location_name',
                           'release_location_state',
                           'release_location_rmis_region',
                           'release_location_rmis_basin',
                           'longitude',
                           'latitude',
                           'rmis_longitude',
                           'rmis_latitude',
                           'difflong',
                           'difflat',
                           'location_code']]

#### Reduce and filter the release records down to unique locations

In [21]:
# 'lat_comp' 'long_comp' appears to be populated with either 'rmis_latitude' / 'rmis_longitude' or 'latitude' / 'longitude'
# when both were available the 'difflat' /'difflon' are populated to show the difference
# OPTING TO DROP EVERYTHING except 'name', 'lat_comp', 'long_comp'
rmis_r2 = rmis_releases_df[['name',
                           'long_comp',
                           'lat_comp']]
print("Number of release records:")
print(len(rmis_r2))


Number of release records:
170277


In [22]:
# filter the null values
rmis_r3 = rmis_r2[rmis_r2["long_comp"].notna()]
rmis_filtered = rmis_r3.loc[(rmis_r3["long_comp"]!=0)]
print("Number of non null and non zero release records:")
print(len(rmis_filtered))

Number of non null and non zero release records:
164869


In [23]:
# get only unique combos
rmis_unique = rmis_filtered.groupby(['name','long_comp','lat_comp']).size().reset_index()
rmis_unique

Unnamed: 0,name,long_comp,lat_comp,0
0,15 MILE CR 08.0207,-122.027298,47.485802,6
1,2200 RD PD (GRAY),-123.742897,46.980202,1
2,25 MILE CR 11.0095,-122.259399,46.923901,7
3,25 MILE CR 47.0195,-120.258698,48.002998,21
4,28 MILE CR 10.0129,-121.546501,47.122700,18
...,...,...,...,...
8063,YUBA RIVER,-121.596001,39.126999,84
8064,ZIEGLER CR 21.0469,-123.838402,47.478901,5
8065,ZIGZAG R (SANDY R),-123.487396,46.169102,1
8066,ZIGZAG R (SANDY R),-121.927101,45.641701,1


#### Join

In [24]:
# rename column to make join easier
rmis_unique["location_key"]=rmis_unique["name"]
# join
chincoho_problems2 = pd.merge(chincoho_problems, rmis_unique, on='location_key', how='left')

In [25]:
chincoho_fixed = chincoho_problems2[chincoho_problems2["long_comp"].notna()]
chincoho_problems2 = chincoho_problems2[chincoho_problems2["long_comp"].isna()]

print("Fixed this many unique release locations with no lat /lon for coho / Chinook:")
print(len(chincoho_fixed['RELEASE_SITE_NAME_G'].unique()))
print("This many remaining:")
print(len(chincoho_problems2['RELEASE_SITE_NAME_G'].unique()))

Fixed this many unique release locations with no lat /lon for coho / Chinook:
110
This many remaining:
97


In [26]:
chincoho_problems2['RELEASE_SITE_NAME_G'].unique()

array(['R-Cranberry Lk', 'R-Swayne Swamp', 'R-Lonespoon Cr',
       'R-Chickadee Lk', 'R-Hartl Cr', 'R-Charman Cr', 'R-Union Cr',
       'R-Airport Cr/LWFR', 'R-Three B Ch', 'R-Coal Cr', 'R-L Stawamus R',
       'R-232nd Street Pd', 'R-Adams Ch', 'R-Addington Msh',
       'R-Alder Run Ch', 'R-Alouette Tr 10-11', 'R-Anderson Pd',
       'R-Angel Wing Ch', 'R-Annacis Ch', 'R-Arden Ch', 'R-Atchelitz Pd',
       'R-Big Bend Ch', 'R-Big Silver Ch', 'R-Brandt Div',
       'R-Browne Road Ch', 'R-Camp Ch', 'R-Coho Ch', 'R-Coho Pd',
       'R-Colony Farm Ch', 'R-Delair Ch', "R-Donovan's Pd",
       'R-Douglas Is Ch', 'R-Elizabeth Joe Ch', 'R-Fell Ch',
       'R-Fish Hatchery Pd', 'R-Flood Ch', 'R-Foley Ch', 'R-Forestry Ch',
       'R-Foster Ch', 'R-Gouwenberg Pd', 'R-Griffin Pk Ch',
       'R-Hennipen Cr Lad', "R-Henry's Pd", 'R-Highfalls Ch',
       'R-Hixon Ch', 'R-Homestead Pd/LWFR', 'R-Hyde Ch', 'R-Jack Sl',
       'R-Jenny Wren Ch', 'R-Jitco Ch', 'R-John Charlie Sl',
       'R-Juliet Ch', 

## 3. Use PSF locations for unknown release locations <a class="anchor" id="section-3"></a>
- data provided by Eric Hertz of PSF in March, 2020
- data modified by Greig Oldford March 24-25, 2020 - see notes in the PSF Locations 'MODIFIED' folder

[BACK TO TOP](#top)

In [27]:
# the rmis_lats etc are empty
chincoho_problems.columns

Index(['PROGRAM_CODE', 'PROJ_NAME', 'SPECIES_NAME', 'RUN_NAME', 'BROOD_YEAR',
       'STOCK_NAME', 'STOCK_PROD_AREA_CODE', 'STOCK_CU_INDEX', 'STOCK_CU_NAME',
       'STOCK_TYPE_CODE', 'REARING_TYPE_CODE', 'FACILITY_NAME',
       'RELEASE_SITE_NAME', 'RELEASE_SITE_NAME_G', 'REL_CU_INDEX',
       'REL_CU_NAME', 'RELEASE_STAGE_NAME', 'MRP_TAGCODE', 'RELEASE_YEAR',
       'START_DATE', 'START_YR_REL', 'START_MO_REL', 'START_DAY_REL',
       'END_DATE', 'END_YR_REL', 'END_MO_REL', 'END_DAY_REL', 'AVE_WEIGHT',
       'PURPOSE_CODE', 'RELEASE_COMMENT', 'RowNum', 'TaggedClip', 'TaggedNum',
       'ShedTagNum', 'NoTagClip', 'NoTagNum', 'NoTagPartMarkNum',
       'UnmarkedNum', 'TotalRelease', 'NONZERO_MEAN_WEIGHT', 'BIOMASS_MT',
       'location_key', 'FID', 'objectid', 'lctype', 'location_code', 'name',
       'description', 'region', 'basin', 'rmis_latitude', 'rmis_longitude',
       'source', 'accuracy', 'notes', 'rpagency', 'submission'],
      dtype='object')

In [28]:
#PSFlocations_df = pd.read_csv("../../PSF_Locations_2020/MODIFIED/PSF_release_locations.csv")
PSFlocations_df = pd.read_csv("C:/Users/Greig/Sync/6. SSMSP Model/Model Greig/Data/1. Salmon/Hatchery Releases/PSF_Locations_2020/MODIFIED/PSF_RELEASE_LOCATIONS_FIXEDCOORDS.csv")


# rename location for easier join
PSFlocations_df["location_key2"]=PSFlocations_df["Release_Site"]
# use the original release site from the EPAD data (no 'R-' prefix added)
chincoho_problems["location_key2"]=chincoho_problems["RELEASE_SITE_NAME"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [29]:
chincoho_PSFjoin = pd.merge(chincoho_problems, PSFlocations_df, on='location_key2', how='left')
chincoho_fixed2 = chincoho_PSFjoin[chincoho_PSFjoin["FINAL_LAT"].notna()]
chincoho_problems3 = chincoho_PSFjoin[chincoho_PSFjoin["FINAL_LAT"].isna()]

print("Fixed this many unique release locations with no lat /lon for coho / Chinook:")
print(len(chincoho_fixed2['RELEASE_SITE_NAME'].unique()))
print("This many remaining:")
print(len(chincoho_problems3['RELEASE_SITE_NAME'].unique()))

Fixed this many unique release locations with no lat /lon for coho / Chinook:
105
This many remaining:
102


In [30]:
chincoho_problems3["RELEASE_SITE_NAME"].unique()

array(['Bear Cr/GSMN', 'Cranberry Lk', 'Swayne Swamp', 'Martindale Swamp',
       'Guthrie Cr', 'Valens Cr', 'Swordfern Cr', 'Hastings Cr/GSVI',
       'Cat Stream Cr', 'North Shore Strms', 'Parkside Cr', 'Cub Cr',
       'Rotary Pk Ch', 'Airport Cr/LWFR', 'Coal Cr', 'L Stawamus R',
       '232nd Street Pd', 'Adams Ch', 'Addington Msh', 'Alder Run Ch',
       'Alouette Tr 10-11', 'Anderson Pd', 'Angel Wing Ch', 'Annacis Ch',
       'Arden Ch', 'Atchelitz Pd', 'Big Bend Ch', 'Big Silver Ch',
       'Brandt Div', 'Browne Road Ch', 'Camp Ch', 'Coho Ch', 'Coho Pd',
       'Colony Farm Ch', 'Delair Ch', "Donovan's Pd", 'Douglas Is Ch',
       'Elizabeth Joe Ch', 'Fell Ch', 'Fish Hatchery Pd', 'Flood Ch',
       'Foley Ch', 'Forestry Ch', 'Foster Ch', 'Gouwenberg Pd',
       'Griffin Pk Ch', 'Hennipen Cr Lad', "Henry's Pd", 'Highfalls Ch',
       'Hixon Ch', 'Homestead Pd/LWFR', 'Hyde Ch', 'Jack Sl',
       'Jenny Wren Ch', 'Jitco Ch', 'John Charlie Sl', 'Juliet Ch',
       'Kawkawa Ch', 'La

In [31]:
missing_aggregated = chincoho_problems3.groupby(['BROOD_YEAR','SPECIES_NAME','RELEASE_SITE_NAME'])['TotalRelease'].sum()
missing_aggregated.to_csv(r'ChinookCoho_EPADLocationpProblems3.csv', index = True)

  


In [32]:
#releases_df.loc[(releases_df['SPECIES_NAME']=='Coho')&(releases_df['STOCK_PROD_AREA_CODE']=='GSVI')].groupby(['BROOD_YEAR'])['TotalRelease'].sum()
# number of coho releases by year
print("Total number coho released, all years:")
print(releases_df.loc[(releases_df['SPECIES_NAME']=='Coho')]['TotalRelease'].sum())
print("Number coho releases with no georef: ")
print(chincoho_problems3.loc[(chincoho_problems3['SPECIES_NAME']=='Coho')]['TotalRelease'].sum())
coho_tot = releases_df.loc[(releases_df['SPECIES_NAME']=='Coho')]['TotalRelease'].sum()
coho_nolocations = chincoho_problems3.loc[(chincoho_problems3['SPECIES_NAME']=='Coho')]['TotalRelease'].sum()
perc_missing = coho_nolocations / coho_tot * 100
print("As a percent: ")
print(perc_missing)

Total number coho released, all years:
403082244
Number coho releases with no georef: 
7023924
As a percent: 
1.7425535618482861


In [33]:
print("Total number Chinook released, all years:")
print(releases_df.loc[(releases_df['SPECIES_NAME']=='Chinook')]['TotalRelease'].sum())
print("Number coho releases with no georef: ")
print(chincoho_problems3.loc[(chincoho_problems3['SPECIES_NAME']=='Chinook')]['TotalRelease'].sum())
chinook_tot = releases_df.loc[(releases_df['SPECIES_NAME']=='Chinook')]['TotalRelease'].sum()
chinook_nolocations = chincoho_problems3.loc[(chincoho_problems3['SPECIES_NAME']=='Chinook')]['TotalRelease'].sum()
perc_missing = chinook_nolocations / chinook_tot * 100
print("As a percent: ")
print(perc_missing)

Total number Chinook released, all years:
780376122
Number coho releases with no georef: 
12075750
As a percent: 
1.5474268957706525


In [34]:
tot_year_coho = releases_df.loc[(releases_df['SPECIES_NAME']=='Coho')].groupby(['BROOD_YEAR'])['TotalRelease'].sum()
tot_year_chinook = releases_df.loc[(releases_df['SPECIES_NAME']=='Chinook')].groupby(['BROOD_YEAR'])['TotalRelease'].sum()

missing_year_coho = chincoho_problems3.loc[(chincoho_problems3['SPECIES_NAME']=='Coho')].groupby(['BROOD_YEAR'])['TotalRelease'].sum()
missing_year_chinook = chincoho_problems3.loc[(chincoho_problems3['SPECIES_NAME']=='Chinook')].groupby(['BROOD_YEAR'])['TotalRelease'].sum()

coho_mis_v_tot = pd.merge(tot_year_coho, missing_year_coho, on='BROOD_YEAR', how='left')
chin_mis_v_tot = pd.merge(tot_year_chinook, missing_year_chinook, on='BROOD_YEAR', how='left')

coho_mis_v_tot['percent'] = coho_mis_v_tot['TotalRelease_y'] / coho_mis_v_tot['TotalRelease_x'] * 100
chin_mis_v_tot['percent'] = chin_mis_v_tot['TotalRelease_y'] / chin_mis_v_tot['TotalRelease_x'] * 100

print("percent missing records in terms of total released, by year, Chinook:")
print(chin_mis_v_tot)

percent missing records in terms of total released, by year, Chinook:
            TotalRelease_x  TotalRelease_y   percent
BROOD_YEAR                                          
1967                277630             NaN       NaN
1968                603964             NaN       NaN
1969                 67326             NaN       NaN
1970                575466             NaN       NaN
1971                993309             NaN       NaN
1972                920059             NaN       NaN
1973                820762             NaN       NaN
1974                270419             NaN       NaN
1975               1666042             NaN       NaN
1976               2131779             NaN       NaN
1977               4606310             NaN       NaN
1978               3832086             NaN       NaN
1979               6939012             NaN       NaN
1980               8781684             NaN       NaN
1981               7235718             NaN       NaN
1982              10784044   

In [35]:
print("percent missing records in terms of total released, by year, Coho:")
coho_mis_v_tot

percent missing records in terms of total released, by year, Coho:


Unnamed: 0_level_0,TotalRelease_x,TotalRelease_y,percent
BROOD_YEAR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1967,6995,,
1968,69118,,
1969,409666,,
1970,766294,,
1971,1340584,,
1972,446236,,
1973,762572,,
1974,809207,,
1975,1513075,,
1976,2239821,,


## 4. Fix remaining release location coordinates <a class="anchor" id="section-4"></a>
- manually added by Greig

[BACK TO TOP](#top)

In [37]:
# new coordinates
coords_g = pd.read_csv("C:/Users/Greig/Sync/6. SSMSP Model/Model Greig/Data/1. Salmon/Hatchery Releases/EPADHatcherReleasesGST/MODIFIED/unique_release_sites_COORDS_ADDED.csv")
coords_g.columns

Index(['RELEASE_SITE_NAME', 'LAT_GLO', 'LON_GLO', 'WATERBODY_STREAM_LINE_ID',
       'NEW_WATERSHED_CODE', 'SOURCE', 'Note'],
      dtype='object')

In [38]:
coords_g["location_key2"]=coords_g["RELEASE_SITE_NAME"]
chincoho_join4 = pd.merge(chincoho_problems3, coords_g, on='location_key2', how='left')
chincoho_join4.columns

Index(['PROGRAM_CODE', 'PROJ_NAME', 'SPECIES_NAME', 'RUN_NAME', 'BROOD_YEAR',
       'STOCK_NAME', 'STOCK_PROD_AREA_CODE', 'STOCK_CU_INDEX', 'STOCK_CU_NAME',
       'STOCK_TYPE_CODE', 'REARING_TYPE_CODE', 'FACILITY_NAME',
       'RELEASE_SITE_NAME_x', 'RELEASE_SITE_NAME_G', 'REL_CU_INDEX',
       'REL_CU_NAME', 'RELEASE_STAGE_NAME', 'MRP_TAGCODE', 'RELEASE_YEAR',
       'START_DATE', 'START_YR_REL', 'START_MO_REL', 'START_DAY_REL',
       'END_DATE', 'END_YR_REL', 'END_MO_REL', 'END_DAY_REL', 'AVE_WEIGHT',
       'PURPOSE_CODE', 'RELEASE_COMMENT', 'RowNum', 'TaggedClip', 'TaggedNum',
       'ShedTagNum', 'NoTagClip', 'NoTagNum', 'NoTagPartMarkNum',
       'UnmarkedNum', 'TotalRelease', 'NONZERO_MEAN_WEIGHT', 'BIOMASS_MT',
       'location_key', 'FID', 'objectid', 'lctype', 'location_code', 'name',
       'description', 'region', 'basin', 'rmis_latitude', 'rmis_longitude',
       'source', 'accuracy', 'notes', 'rpagency', 'submission',
       'location_key2', 'Release_Site', 'Watershed_

In [39]:
chincoho_problems4 = chincoho_join4[chincoho_join4["LAT_GLO_y"].isna()]

print("Total number Chinook released, all years:")
print(releases_df.loc[(releases_df['SPECIES_NAME']=='Chinook')]['TotalRelease'].sum())
print("Number coho releases with no georef: ")
print(chincoho_problems4.loc[(chincoho_problems4['SPECIES_NAME']=='Chinook')]['TotalRelease'].sum())
chinook_tot = releases_df.loc[(releases_df['SPECIES_NAME']=='Chinook')]['TotalRelease'].sum()
chinook_nolocations = chincoho_problems4.loc[(chincoho_problems4['SPECIES_NAME']=='Chinook')]['TotalRelease'].sum()
perc_missing = chinook_nolocations / chinook_tot * 100
print("As a percent: ")
print(perc_missing)

Total number Chinook released, all years:
780376122
Number coho releases with no georef: 
10575750
As a percent: 
1.3552118910168296


In [40]:
missing_year_coho = chincoho_problems4.loc[(chincoho_problems4['SPECIES_NAME']=='Coho')].groupby(['BROOD_YEAR'])['TotalRelease'].sum()
missing_year_chinook = chincoho_problems4.loc[(chincoho_problems4['SPECIES_NAME']=='Chinook')].groupby(['BROOD_YEAR'])['TotalRelease'].sum()

coho_mis_v_tot = pd.merge(tot_year_coho, missing_year_coho, on='BROOD_YEAR', how='left')
chin_mis_v_tot = pd.merge(tot_year_chinook, missing_year_chinook, on='BROOD_YEAR', how='left')

coho_mis_v_tot['percent'] = coho_mis_v_tot['TotalRelease_y'] / coho_mis_v_tot['TotalRelease_x'] * 100
chin_mis_v_tot['percent'] = chin_mis_v_tot['TotalRelease_y'] / chin_mis_v_tot['TotalRelease_x'] * 100

print("percent missing records in terms of total released, by year, Chinook:")
chin_mis_v_tot

percent missing records in terms of total released, by year, Chinook:


Unnamed: 0_level_0,TotalRelease_x,TotalRelease_y,percent
BROOD_YEAR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1967,277630,,
1968,603964,,
1969,67326,,
1970,575466,,
1971,993309,,
1972,920059,,
1973,820762,,
1974,270419,,
1975,1666042,,
1976,2131779,,


In [535]:
print("percent missing records in terms of total released, by year, Coho:")
coho_mis_v_tot

percent missing records in terms of total released, by year, Coho:


Unnamed: 0_level_0,TotalRelease_x,TotalRelease_y,percent
BROOD_YEAR,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1967,6995,,
1968,69118,,
1969,409666,,
1970,766294,,
1971,1340584,,
1972,446236,,
1973,762572,,
1974,809207,,
1975,1513075,,
1976,2239821,,


## 5. Re-Merge and Write to CSV <a class="anchor" id="section-5"></a>

[BACK TO TOP](#top)

In [41]:
# fixed in 1st step
# also there are non-coho and non-chinook that are 'problems' - don't forget
otherspecies_problems = joined_df_problems.loc[(joined_df_problems['SPECIES_NAME']!='Chinook')&(joined_df_problems['SPECIES_NAME']!='Coho')]

# fixed in 3rd step:
#chincoho_fixed2
# fixed and remaining unfixed in 4th step:
#chincoho_join4

print("Check that data in = data out")
print("total records out:")
tot_out = len(chincoho_join4) + len(chincoho_fixed2) + len(otherspecies_problems) + len(joined_df_noproblems)
print(tot_out)
print("total records in:")
print(len(releases_df))

Check that data in = data out
total records out:
21451
total records in:
21451


### Python doesn't like this method of setting new columns to values but it works

[BACK TO TOP](#top)

In [42]:
# create a common final field for lats and lons
# chincoho_fixed2 has 'FINAL_LAT' and 'FINAL_LON' fields all populated
joined_df_noproblems["FINAL_LAT"] = joined_df_noproblems["rmis_latitude"]
joined_df_noproblems["FINAL_LON"] = joined_df_noproblems["rmis_longitude"]
#chincoho_fixed2.loc[(chincoho_fixed2["FINAL_LAT"].isna())]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [43]:
# checks
len(joined_df_noproblems.loc[joined_df_noproblems['FINAL_LAT'].isna()])

0

In [44]:
otherspecies_problems["FINAL_LAT"] = otherspecies_problems["rmis_latitude"]
otherspecies_problems["FINAL_LON"] = otherspecies_problems["rmis_longitude"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [45]:
chincoho_join4["FINAL_LAT"] = chincoho_join4["LAT_GLO_y"]
chincoho_join4["FINAL_LON"] = chincoho_join4["LAT_GLO_x"]

# checks code for ref
#print(len(chincoho_join4.loc[chincoho_join4['FINAL_LAT'].notna()]))
#print(len(chincoho_join4.loc[chincoho_join4['FINAL_LAT']==0]))

### Append all tables back together and clean up, write to file <a class="anchor" id="section-6"></a>

[BACK TO TOP](#top)

In [46]:
releases_df_COORDS = joined_df_noproblems.append(otherspecies_problems, ignore_index = True)
releases_df_COORDS = releases_df_COORDS.append(chincoho_fixed2, ignore_index = True)
releases_df_COORDS = releases_df_COORDS.append(chincoho_join4, ignore_index = True)
print("Num records in: ")
print(len(releases_df))
print("Num records out: ")
print(len(releases_df_COORDS))

Num records in: 
21451
Num records out: 
21451


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


In [47]:
releases_df_COORDS.columns

Index(['AVE_WEIGHT', 'BIOMASS_MT', 'BROOD_YEAR', 'COORD_SourceRound',
       'END_DATE', 'END_DAY_REL', 'END_MO_REL', 'END_YR_REL', 'FACILITY_NAME',
       'FID', 'FINAL_LAT', 'FINAL_LON', 'FeatureType', 'GAZETTED_NAME',
       'LATITUDE_PSF', 'LAT_GLO', 'LAT_GLO_x', 'LAT_GLO_y', 'LONGITUDE_PSF',
       'LON_GLO', 'LON_GLO_x', 'LON_GLO_y', 'MRP_TAGCODE',
       'NEW_WATERSHED_CODE', 'NONZERO_MEAN_WEIGHT', 'NoTagClip', 'NoTagNum',
       'NoTagPartMarkNum', 'Note', 'PROGRAM_CODE', 'PROJ_NAME', 'PURPOSE_CODE',
       'REARING_TYPE_CODE', 'RELEASE_COMMENT', 'RELEASE_SITE_NAME',
       'RELEASE_SITE_NAME_G', 'RELEASE_SITE_NAME_x', 'RELEASE_SITE_NAME_y',
       'RELEASE_STAGE_NAME', 'RELEASE_YEAR', 'REL_CU_INDEX', 'REL_CU_NAME',
       'RUN_NAME', 'Release_Site', 'RowNum', 'SOURCE', 'SPECIES_NAME',
       'START_DATE', 'START_DAY_REL', 'START_MO_REL', 'START_YR_REL',
       'STOCK_CU_INDEX', 'STOCK_CU_NAME', 'STOCK_NAME', 'STOCK_PROD_AREA_CODE',
       'STOCK_TYPE_CODE', 'ShedTagNum', 'Tagg

In [543]:
releases_df_COORDS.to_csv(r'actual_releases_COORDS.csv', index = True)

## To do (Maybe) Join a DFO SEP facility table to the EPAD Release table, use <a class="anchor" id="section-7"></a>
- not yet done
- not really a good idea because facilities are not locations of release

[BACK TO TOP](#top)

In [71]:
print("Unique locations of releases with no geo ref:")
print(len(chincoho_problems['RELEASE_SITE_NAME_G'].unique()))

Unique locations of releases with no geo ref:
207


In [184]:
print(len(chincoho_problems['FACILITY_NAME'].unique()))

179


In [185]:
chincoho_problems['FACILITY_NAME'].unique()

array(['Cowichan River H', 'Nanaimo River H', 'Lang Creek H',
       'MacLean Bay Seapen', 'Porpoise Bay H', 'Seymour River H',
       'Jitco Creek H', 'Rosewall Creek H', 'Cook Creek Inc',
       'Black Creek H', 'French Creek H', 'MacBlo/ Englishman Chan',
       'Alouette River, South H', 'Anderson Creek H', 'Chapman Creek H',
       'Englishman River H', 'Fanny Bay H', 'Goldstream River H',
       'Bell-Irving Kanaka Cr H', 'Little River H', 'Oyster River H',
       'Birkenhead River H', 'Serpentine River H', 'Big Qualicum River H',
       'Capilano River H', 'Chehalis River H', 'Chilliwack River H',
       'Clearwater River, Up, H', 'Little Qualicum Sp Chan',
       'Puntledge River H', 'Quinsam River H', 'Tenderfoot Creek H',
       'Brunette River H', 'Campbell River Schools', 'Cheakamus River H',
       'Chilliwack Schools', 'Coast Mountain Out Sch H',
       'Comox Valley Schools', 'Coquitlam Schools', 'Lake Cowichan H',
       'Cowichan Lake Schools', 'Cowichan Valley Schools

In [29]:
joined_df.loc()

Unnamed: 0,PROGRAM_CODE,PROJ_NAME,SPECIES_NAME,RUN_NAME,BROOD_YEAR,STOCK_NAME,STOCK_PROD_AREA_CODE,STOCK_CU_INDEX,STOCK_CU_NAME,STOCK_TYPE_CODE,...,description,region,basin,rmis_latitude,rmis_longitude,source,accuracy,notes,rpagency,submission
0,CDP,Chehalis Band,Chinook,Fall,1980,Harrison R,LWFR,CK-3,LOWER FRASER RIVER_FA_0.3,W,...,Billy Harris Sl LWFR,FRTH,LOFR,49.2324,-121.9379,O,5.0,,CDFO,2019-02-04
1,CDP,Chehalis Band,Chinook,Fall,1980,Harrison R,LWFR,CK-3,LOWER FRASER RIVER_FA_0.3,W,...,Billy Harris Sl LWFR,FRTH,LOFR,49.2324,-121.9379,O,5.0,,CDFO,2019-02-04
2,CDP,Chehalis Band,Chinook,Fall,1981,Harrison R,LWFR,CK-3,LOWER FRASER RIVER_FA_0.3,W,...,Harrison R LWFR,FRTH,LOFR,49.2189,-121.9451,O1,8.0,,CDFO,2019-02-04
3,CDP,Chehalis Band,Chinook,Fall,1982,Harrison R,LWFR,CK-3,LOWER FRASER RIVER_FA_0.3,M,...,Harrison R LWFR,FRTH,LOFR,49.2189,-121.9451,O1,8.0,,CDFO,2019-02-04
4,CDP,Chehalis Band,Chinook,Fall,1985,Harrison R,LWFR,CK-3,LOWER FRASER RIVER_FA_0.3,M,...,Harrison R LWFR,FRTH,LOFR,49.2189,-121.9451,O1,8.0,,CDFO,2019-02-04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21446,RRD,Yukalup Ch,Pink,Fall,1997,Chilliwack R,LWFR,PKO-1,FRASER RIVER,W,...,,,,,,,,,,
21447,RRD,Yukalup Ch,Pink,Fall,1999,Chilliwack R,LWFR,PKO-1,FRASER RIVER,M,...,,,,,,,,,,
21448,RRD,Yukalup Ch,Pink,Fall,2001,Chilliwack R,LWFR,PKO-1,FRASER RIVER,M,...,,,,,,,,,,
21449,RRD,Yukalup Ch,Pink,Fall,2003,Chilliwack R,LWFR,PKO-1,FRASER RIVER,M,...,,,,,,,,,,
