In [None]:
# last edited May 20, 2021 by GO
# - modified to aggregate to most basic level (species and month) for Ecosim

### Export Custom Time Series file for Ecosim / Ecospace
Jan 2021
By: G Oldford
<br> Purpose: Export a hatchery forcing or time series file to .csv's or ASCII's for EWE

Data In: 
  - HatcheryRel_TS_ForNextStep.csv - EPAD data from Carl (DFO / SEP)- from 'step 3'

Data Out:
 - CSV TBD <br>
 - ASCII TBD <br>

Purpose:
   - export the monthly timestep forcing file that Ecosim expects
   - export the monthly timestep spatial forcing file that Ecospace expects

Notes:
- EPAD data from Carl Walters and RMIS locations data from SOGDC

## TOC: <a class="anchor" id="top"></a>
* [1. Read file](#section-1)
* [2. Fix dates / timesteps](#section-2)
* [3. Produce Ecosim TS file](#section-3)
* [4. Produce Ecospace TS file](#section-4)
* [5. Write to File](#section-5)

In [3]:
import pandas as pd
import datetime
from dateutil.parser import parse

# params
start_year = 1950
end_year = 2020
aggregate_all_areas = "yes" # yes means aspatial
aggregate_to_level = "species" # otherwise will use codes in EWE_GROUP_CODE
species_groupcodes = pd.DataFrame(data = {'EWE_GROUP_CODE': ['CHINOOK-H-1','COHO-H-1'],
                                          'SPECIES':['CHINOOK','COHO']})
study_area = 7000 # used to calculate biomass density (mt / km^2)

# locations table from the SSMSP SOGDC (may have more lats / lons added than source at RMIS)
localpath_in = "C:/Users/Greig/Sync/6. SSMSP Model/Model Greig/Data/1. Salmon/Hatchery Releases/EPADHatcherReleasesGST"
releases_df = pd.read_csv(localpath_in + "/MODIFIED/HatcheryRel_TS_ForNextstep.csv")

# fix date
releases_df['RELEASE_DATE'] = releases_df['release_avg_date'].astype('datetime64[ns]')
releases_df['YEAR'] = pd.DatetimeIndex(releases_df['RELEASE_DATE']).year
releases_df['MONTH'] = pd.DatetimeIndex(releases_df['RELEASE_DATE']).month
releases_df['EWE_TIMESTEP'] = releases_df['MONTH'] + ((releases_df['YEAR'] - start_year) * 12)
print(releases_df['BIOMASS_MT'].sum().round())
releases_df

9678.0


Unnamed: 0.1,Unnamed: 0,EWE_GROUP_CODE,SPECIES_NAME,BIOMASS_MT,release_avg_date,FINAL_LAT,FINAL_LON,ROW_EWE,COL_EWE,TOTRELEASE_NO,SOURCE_ID,RELEASE_DATE,YEAR,MONTH,EWE_TIMESTEP
0,0,Chinook-H-LFR-2,Chinook,0.095770,1981-07-01,49.2324,-121.9379,136.0,48.0,19154,2,1981-07-01,1981,7,379
1,1,Chinook-H-LFR-2,Chinook,0.100720,1981-07-01,49.2324,-121.9379,136.0,48.0,20144,2,1981-07-01,1981,7,379
2,2,Chinook-H-LFR-2,Chinook,0.127346,1982-05-15,49.2189,-121.9451,136.0,48.0,79591,2,1982-05-15,1982,5,389
3,3,Chinook-H-LFR-2,Chinook,0.150289,1983-04-20,49.2189,-121.9451,136.0,48.0,70138,2,1983-04-20,1983,4,400
4,4,Chinook-H-LFR-2,Chinook,0.172651,1986-06-10,49.2189,-121.9451,136.0,48.0,61661,2,1986-06-10,1986,6,438
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14318,21446,Coho-H-LFR-2,Coho,0.022000,2003-05-16,,,132.0,46.0,2200,2,2003-05-16,2003,5,641
14319,21447,Coho-H-LFR-2,Coho,0.022000,2004-05-16,,,132.0,46.0,2200,2,2004-05-16,2004,5,653
14320,21448,Coho-H-LFR-2,Coho,0.022000,2004-05-16,,,132.0,46.0,2200,2,2004-05-16,2004,5,653
14321,21449,Coho-H-LFR-2,Coho,0.022000,2005-05-16,,,5.0,22.0,2200,2,2005-05-16,2005,5,665


In [4]:
if aggregate_all_areas == "yes":
    releases_df2 = releases_df.drop(['release_avg_date','FINAL_LAT','FINAL_LON','ROW_EWE','COL_EWE',
                                    'TOTRELEASE_NO','SOURCE_ID','RELEASE_DATE'], axis=1)
    releases_df2 = releases_df2.groupby(['EWE_GROUP_CODE','SPECIES_NAME','EWE_TIMESTEP','YEAR',
                                         'MONTH']).agg('sum').reset_index()
    releases_df = releases_df2

if aggregate_to_level == "species":
    releases_df2 = releases_df.drop(['EWE_GROUP_CODE'], axis=1)
    releases_df2 = releases_df2.groupby(['SPECIES_NAME','EWE_TIMESTEP','YEAR',
                                         'MONTH']).agg('sum').reset_index()
    releases_df2['EWE_GROUP_CODE'] = releases_df2['SPECIES_NAME']
    releases_df2 = pd.merge(releases_df2, species_groupcodes, on=['EWE_GROUP_CODE'], how='left')
    
    releases_df = releases_df2.drop(['SPECIES_NAME'], axis=1)
    
releases_df['BIOMASS_MT'] = releases_df['BIOMASS_MT'] / study_area

releases_df

Unnamed: 0.1,EWE_TIMESTEP,YEAR,MONTH,Unnamed: 0,BIOMASS_MT,EWE_GROUP_CODE,SPECIES
0,222,1968,6,18484,0.000092,Chinook,
1,223,1968,7,9245,0.000044,Chinook,
2,236,1969,8,18519,0.000282,Chinook,
3,243,1970,3,4630,0.000012,Chinook,
4,244,1970,4,4628,0.000006,Chinook,
...,...,...,...,...,...,...,...
661,784,2015,4,402848,0.001021,Coho,
662,785,2015,5,1156629,0.008584,Coho,
663,786,2015,6,448070,0.000824,Coho,
664,797,2016,5,134029,0.002373,Coho,


In [5]:

# round to 5 decimal places
# silly issues with floating point data storage causing rounding to not work so 
# using decimal library https://stackoverflow.com/questions/56820/round-doesnt-seem-to-be-rounding-properly
from decimal import *
releases_df['BIOMASS_MT']=releases_df['BIOMASS_MT'].apply(lambda x: Decimal(str(x)).quantize(Decimal('.00001'), rounding=ROUND_UP))
                                              
# add dummy variable containing all timesteps
dummy = pd.Series(range(1,((end_year - start_year)*12)))
dummy_df = (dummy.to_frame())
dummy_df['EWE_TIMESTEP'] = dummy_df[0]
dummy_df['EWE_GROUP_CODE'] = "DUMMY"
dummy_df = dummy_df[['EWE_GROUP_CODE','EWE_TIMESTEP']]
releases_df = releases_df.append(dummy_df, ignore_index = True)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


In [81]:

# ###############################################################################
# For Ecosim
#################################################################################
releasesEcosim = releases_df[['EWE_TIMESTEP','BIOMASS_MT','EWE_GROUP_CODE']]

# sum by EWE_GROUP_CODE and timestep
releasesEcosim = releasesEcosim.rename(columns={'EWE_TIMESTEP': 'TIMESTEP','EWE_GROUP_CODE': 'TITLE'})
releasesEcosim_gp = releasesEcosim.groupby(['TIMESTEP','TITLE']).sum().reset_index()

# pivot wide
releasesEcosim_wide = releasesEcosim_gp[['TITLE','BIOMASS_MT','TIMESTEP']].pivot(index='TIMESTEP', columns='TITLE', values='BIOMASS_MT')
releasesEcosim_wide = releasesEcosim_wide.drop(columns=['DUMMY'])

# fill NaNs with zeros (required by ecosim)
releasesEcosim_wide = releasesEcosim_wide.fillna(0)

# write to temp file
releasesEcosim_wide.to_csv(localpath_in + '/MODIFIED/temp.csv', index=True)

# open temp file and insert header

#Title	Combined_GST_FR_Escape_RelB_NuSEDS	Chin_Hatch_RelB_CW	Chin_1stYrM_1_CW	Chin_1stYrM_2_CW	Chin_C_Rel_CW
#Weight	1	1	1	1	1
#Pool Code	14	18	16	15	14
#Type	0	0	5	5	61
#1979	11.26655002	3.84	3.449022245	3.449022245	0.35
#1980	11.07767237	6.93	3.021428984	3.021428984	0.371
#1981	11.23108247	8.75	3.354206073	3.354206073	0.2533

# codes for 'type'
# relative biomass = 0 
# absolute biomass = 1
# biomass forcing = -1
# fishing mortality = 4
# relative fishing mortality = 104
# total mortality = 5
# constant total mortality = -5 (forcing?)
# catches = 6
# catches forcing = -6
# relative catches = 61
# average weight = 7

import copy 

f = open(localpath_in + '/MODIFIED/temp.csv', "r")
contents = f.readlines()
f.close()

line1 = contents[0].split(',')
line1[0] = 'Title'


line2 = copy.deepcopy(line1) 
line2[0] = 'Weight'
i = 0
for line in line2:
    if i > 0:
        if i == (len(line2) - 1):
            line2[i] = '1\n'
        else:
            line2[i] = 1
    i += 1

line3 = copy.deepcopy(line1) 
line3[0] = 'Type'
i = 0
for line in line3:
    if i > 0:
        if i == (len(line3) - 1):
            line3[i] = '-1\n'
        else:
            line3[i] = -1
    i += 1

line4 = copy.deepcopy(line1) 
line4[0] = 'Timestep'
i = 0
for line in line4:
    if i > 0:
        if i == (len(line4) - 1):
            line4[i] = 'Interval\n'
        else:
            line4[i] = 'Interval'
    i += 1
    
s=""
contents.insert(1,','.join(str(line) for line in line1))
contents.insert(2,','.join(str(line) for line in line2))
contents.insert(3, ','.join(str(line) for line in line3))
contents.insert(4, ','.join(str(line) for line in line4))

i = 0
with open(localpath_in + '/MODIFIED/HatcheryRel_Ecosim_TS_1.csv', 'w') as a_writer:
    for line in contents:
        if i > 0:
            a_writer.writelines(line)
        i += 1

In [7]:
# check - by year to get annual est... 
releases_df3 = releases_df[['YEAR','EWE_GROUP_CODE','BIOMASS_MT']]
releases_df3 = releases_df3.groupby(['EWE_GROUP_CODE','YEAR']).agg('sum').reset_index()
releases_df3.loc[releases_df3['EWE_GROUP_CODE']=='Chinook']

Unnamed: 0,EWE_GROUP_CODE,YEAR,BIOMASS_MT
0,Chinook,1968.0,0.00015
1,Chinook,1969.0,0.00029
2,Chinook,1970.0,0.0001
3,Chinook,1971.0,0.00021
4,Chinook,1972.0,0.00065
5,Chinook,1973.0,0.00068
6,Chinook,1974.0,0.00061
7,Chinook,1975.0,0.00051
8,Chinook,1976.0,0.00146
9,Chinook,1977.0,0.0017


In [None]:
# to do: ecospace forcing ASCII's

In [80]:
releasesEcosim_wide

TITLE,Chinook,Coho
TIMESTEP,Unnamed: 1_level_1,Unnamed: 2_level_1
1,,
2,,
3,,
4,,
5,,
...,...,...
835,,
836,,
837,,
838,,


In [199]:
#