In [None]:
# last edited Apr 12

# to do: something off with some timesteps being missed

### Export Custom Time Series file for Ecosim / Ecospace
 G Oldford
<br> Purpose: Export a hatchery forcing or time series file to .csv's or ASCII's for EWE

   - export the monthly timestep forcing file that Ecosim expects
   - export the monthly timestep spatial forcing file that Ecospace expects

Data In: 
  - HatcheryRel_TS_ForNextStep.csv - EPAD data from Carl (DFO / SEP)- from 'step 3'

Data Out:
 - CSV TBD <br>
 - ASCII TBD <br>

Notes:
- EPAD data from Carl Walters and RMIS locations data from SOGDC
- Apr 12 - the average weight or the weight field is off! not sure why, more so with coho so I just went to EPAD data and got avg non-zero weight for 1980 - 1990 from Puntledge
- Apr 12 - the annual out should be average of monthly b_mt released! Not sum over yr

## TOC: <a class="anchor" id="top"></a>
* [1. Read file](#section-1)
* [2. Fix dates / timesteps](#section-2)
* [3. Produce Ecosim TS file](#section-3)
* [4. Produce Ecospace TS file](#section-4)
* [5. Write to File](#section-5)

In [132]:
import pandas as pd
import numpy as np
import datetime
from dateutil.parser import parse

# params
start_year = 1950
end_year = 2020
aggregate_time = "year" # month or year
aggregate_all_areas = "yes" # yes means aspatial
aggregate_to_level = "species" # otherwise will use codes in EWE_GROUP_CODE
species_groupcodes = pd.DataFrame(data = {'EWE_GROUP_CODE': ['CHINOOK-H-1','COHO-H-1'],
                                          'SPECIES':['CHINOOK','COHO']})
study_area = 7000 # used to calculate biomass density (mt / km^2)

# locations table from the SSMSP SOGDC (may have more lats / lons added than source at RMIS)
localpath_in = "C:/Users/Greig/Sync/6. SSMSP Model/Model Greig/Data/1. Salmon/All Species Hatchery Releases/EPADHatcherReleasesGST"
releases_df = pd.read_csv(localpath_in + "/MODIFIED/HatcheryRel_TS_ForNextstep.csv")

# fix date
releases_df['RELEASE_DATE'] = releases_df['release_avg_date'].astype('datetime64[ns]')
releases_df['YEAR'] = pd.DatetimeIndex(releases_df['RELEASE_DATE']).year
releases_df['MONTH'] = pd.DatetimeIndex(releases_df['RELEASE_DATE']).month
releases_df['EWE_TIMESTEP'] = releases_df['MONTH'] + ((releases_df['YEAR'] - start_year) * 12)
print(releases_df['BIOMASS_MT'].sum().round())

# Apr 12 2022 - fix mean weight of releases 
# something is wrong with avg_weight field. Coho are consistently released at 
# 20 g but cross-checks indicate weight from spreadsheet far too low. 
# (from EPAD Puntledge river releases, hatchery-reared, tossing avg_weight = 0)
coho_weight = 0.020 # kg
chin_weight = 0.0062 # kg
releases_df.loc[releases_df['SPECIES_NAME']=='Chinook', 'BIOMASS_MT2'] = releases_df['TOTRELEASE_NO'] * chin_weight * 0.001
releases_df.loc[releases_df['SPECIES_NAME']=='Coho', 'BIOMASS_MT2'] = releases_df['TOTRELEASE_NO'] * coho_weight * 0.001
print(releases_df['BIOMASS_MT2'].sum().round())
releases_df

9678.0
12887.0


Unnamed: 0.1,Unnamed: 0,EWE_GROUP_CODE,SPECIES_NAME,BIOMASS_MT,release_avg_date,FINAL_LAT,FINAL_LON,ROW_EWE,COL_EWE,TOTRELEASE_NO,SOURCE_ID,RELEASE_DATE,YEAR,MONTH,EWE_TIMESTEP,BIOMASS_MT2
0,0,Chinook-H-LFR-2,Chinook,0.095770,1981-07-01,49.2324,-121.9379,136.0,48.0,19154,2,1981-07-01,1981,7,379,0.118755
1,1,Chinook-H-LFR-2,Chinook,0.100720,1981-07-01,49.2324,-121.9379,136.0,48.0,20144,2,1981-07-01,1981,7,379,0.124893
2,2,Chinook-H-LFR-2,Chinook,0.127346,1982-05-15,49.2189,-121.9451,136.0,48.0,79591,2,1982-05-15,1982,5,389,0.493464
3,3,Chinook-H-LFR-2,Chinook,0.150289,1983-04-20,49.2189,-121.9451,136.0,48.0,70138,2,1983-04-20,1983,4,400,0.434856
4,4,Chinook-H-LFR-2,Chinook,0.172651,1986-06-10,49.2189,-121.9451,136.0,48.0,61661,2,1986-06-10,1986,6,438,0.382298
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14318,21446,Coho-H-LFR-2,Coho,0.022000,2003-05-16,,,132.0,46.0,2200,2,2003-05-16,2003,5,641,0.044000
14319,21447,Coho-H-LFR-2,Coho,0.022000,2004-05-16,,,132.0,46.0,2200,2,2004-05-16,2004,5,653,0.044000
14320,21448,Coho-H-LFR-2,Coho,0.022000,2004-05-16,,,132.0,46.0,2200,2,2004-05-16,2004,5,653,0.044000
14321,21449,Coho-H-LFR-2,Coho,0.022000,2005-05-16,,,5.0,22.0,2200,2,2005-05-16,2005,5,665,0.044000


In [133]:
if aggregate_all_areas == "yes":
    releases_df2 = releases_df.drop(['release_avg_date','FINAL_LAT','FINAL_LON','ROW_EWE','COL_EWE','SOURCE_ID','RELEASE_DATE'], axis=1)
    releases_df2 = releases_df2.groupby(['EWE_GROUP_CODE','SPECIES_NAME','EWE_TIMESTEP','YEAR',
                                         'MONTH']).agg('sum').reset_index()
    releases_df = releases_df2

if aggregate_to_level == "species":
    releases_df2 = releases_df.drop(['EWE_GROUP_CODE'], axis=1)
    releases_df2 = releases_df2.groupby(['SPECIES_NAME','EWE_TIMESTEP','YEAR',
                                         'MONTH']).agg('sum').reset_index()
    releases_df2['EWE_GROUP_CODE'] = releases_df2['SPECIES_NAME']
    releases_df2 = pd.merge(releases_df2, species_groupcodes, on=['EWE_GROUP_CODE'], how='left')
    
    releases_df = releases_df2.drop(['SPECIES_NAME'], axis=1)
    
releases_df['BIOMASS_MT2'] = releases_df['BIOMASS_MT2'] / study_area
releases_df['BIOMASS_MT'] = releases_df['BIOMASS_MT'] / study_area

releases_df

Unnamed: 0.1,EWE_TIMESTEP,YEAR,MONTH,Unnamed: 0,BIOMASS_MT,TOTRELEASE_NO,BIOMASS_MT2,EWE_GROUP_CODE,SPECIES
0,222,1968,6,18484,0.000092,152047,1.346702e-04,Chinook,
1,223,1968,7,9245,0.000044,103824,9.195840e-05,Chinook,
2,236,1969,8,18519,0.000282,408503,3.618169e-04,Chinook,
3,243,1970,3,4630,0.000012,1646,1.457886e-06,Chinook,
4,244,1970,4,4628,0.000006,816,7.227429e-07,Chinook,
...,...,...,...,...,...,...,...,...,...
661,784,2015,4,402848,0.001021,619663,1.770466e-03,Coho,
662,785,2015,5,1156629,0.008584,4370040,1.248583e-02,Coho,
663,786,2015,6,448070,0.000824,612843,1.750980e-03,Coho,
664,797,2016,5,134029,0.002373,1236718,3.533480e-03,Coho,


In [134]:
# cross check for one year
releases_df.loc[releases_df['YEAR']==1980].groupby(['EWE_GROUP_CODE','YEAR']).sum().reset_index()

Unnamed: 0.1,EWE_GROUP_CODE,YEAR,EWE_TIMESTEP,MONTH,Unnamed: 0,BIOMASS_MT,TOTRELEASE_NO,BIOMASS_MT2
0,Chinook,1980,1825,25,118322,0.00615,6930550,0.006138
1,Coho,1980,2199,39,412727,0.005006,4552663,0.013008


In [135]:
# round to 5 decimal places due to issues with floating point data 
# storage causing rounding to not work so 
# using decimal library https://stackoverflow.com/questions/56820/round-doesnt-seem-to-be-rounding-properly
from decimal import *
releases_df['BIOMASS_MT2']=releases_df['BIOMASS_MT2'].apply(lambda x: Decimal(str(x)).quantize(Decimal('.00001'), rounding=ROUND_UP))
releases_df['BIOMASS_MT']=releases_df['BIOMASS_MT'].apply(lambda x: Decimal(str(x)).quantize(Decimal('.00001'), rounding=ROUND_UP))


# add dummy variable containing all timesteps
dummy = pd.Series(range(1,((end_year - start_year)*12)))
dummy_df = (dummy.to_frame())
dummy_df['EWE_TIMESTEP'] = dummy_df[0]
dummy_df['EWE_GROUP_CODE'] = "DUMMY"
dummy_df['YEAR'] = (dummy_df['EWE_TIMESTEP'] // 12)+start_year
dummy_df

Unnamed: 0,0,EWE_TIMESTEP,EWE_GROUP_CODE,YEAR
0,1,1,DUMMY,1950
1,2,2,DUMMY,1950
2,3,3,DUMMY,1950
3,4,4,DUMMY,1950
4,5,5,DUMMY,1950
...,...,...,...,...
834,835,835,DUMMY,2019
835,836,836,DUMMY,2019
836,837,837,DUMMY,2019
837,838,838,DUMMY,2019


In [136]:
dummy_df = dummy_df[['EWE_GROUP_CODE','EWE_TIMESTEP','YEAR']]
releases_df = releases_df.append(dummy_df, ignore_index = True)
releases_df

Unnamed: 0.1,BIOMASS_MT,BIOMASS_MT2,EWE_GROUP_CODE,EWE_TIMESTEP,MONTH,SPECIES,TOTRELEASE_NO,Unnamed: 0,YEAR
0,0.00010,0.00014,Chinook,222,6.0,,152047.0,18484.0,1968
1,0.00005,0.00010,Chinook,223,7.0,,103824.0,9245.0,1968
2,0.00029,0.00037,Chinook,236,8.0,,408503.0,18519.0,1969
3,0.00002,0.00001,Chinook,243,3.0,,1646.0,4630.0,1970
4,0.00001,0.00001,Chinook,244,4.0,,816.0,4628.0,1970
...,...,...,...,...,...,...,...,...,...
1500,,,DUMMY,835,,,,,2019
1501,,,DUMMY,836,,,,,2019
1502,,,DUMMY,837,,,,,2019
1503,,,DUMMY,838,,,,,2019


In [138]:
# ###############################################################################
# For Ecosim
#################################################################################
releasesEcosim = releases_df[['EWE_TIMESTEP','BIOMASS_MT2','EWE_GROUP_CODE','YEAR']]
releasesEcosim = releasesEcosim.fillna(0)

# sum by EWE_GROUP_CODE and timestep
releasesEcosim = releasesEcosim.rename(columns={'EWE_TIMESTEP': 'TIMESTEP','EWE_GROUP_CODE': 'TITLE'})

# for timestep = monthly
releasesEcosim_gp_mo = releasesEcosim.groupby(['TIMESTEP','TITLE','YEAR']).sum().reset_index()

# pivot wide
releasesEcosim_wide_mo = releasesEcosim_gp_mo.pivot_table(
        values=['BIOMASS_MT2'],
        index=['TIMESTEP', 'YEAR'], 
        columns='TITLE', 
        aggfunc=np.sum).reset_index()

# reset the multilevel index via hack
releasesEcosim_wide_mo['CHIN_H_MT'] = releasesEcosim_wide_mo[('BIOMASS_MT2', 'Chinook')].astype(float)
releasesEcosim_wide_mo['COHO_H_MT'] = releasesEcosim_wide_mo[('BIOMASS_MT2', 'Coho')].astype(float)
releasesEcosim_wide_mo['TIMESTEP'] = releasesEcosim_wide_mo[('TIMESTEP', '')].astype(float)
releasesEcosim_wide_mo['YEAR'] = releasesEcosim_wide_mo[('YEAR', '')].astype(float)
releasesEcosim_wide_mo = releasesEcosim_wide_mo[['YEAR','TIMESTEP','CHIN_H_MT','COHO_H_MT']]
releasesEcosim_wide_mo.columns = [f"{x}_{y}" for x, y in releasesEcosim_wide_mo.columns.to_flat_index()]

# releasesEcosim_wide = releasesEcosim_wide.drop(columns=[('BIOMASS_MT2',   'DUMMY')])

# fill NaNs with zeros (required by ecosim)
releasesEcosim_wide_mo = releasesEcosim_wide_mo.fillna(0)

#releasesEcosim_wide_mo = pd.DataFrame(releasesEcosim_wide_mo.to_records())
releasesEcosim_wide_mo
#print(releasesEcosim_wide_mo.columns)


Unnamed: 0,YEAR_,TIMESTEP_,CHIN_H_MT_,COHO_H_MT_
0,1950.0,1.0,0.0,0.0
1,1950.0,2.0,0.0,0.0
2,1950.0,3.0,0.0,0.0
3,1950.0,4.0,0.0,0.0
4,1950.0,5.0,0.0,0.0
...,...,...,...,...
855,2019.0,835.0,0.0,0.0
856,2019.0,836.0,0.0,0.0
857,2019.0,837.0,0.0,0.0
858,2019.0,838.0,0.0,0.0


In [153]:
# use average monthly for annual time series
releasesEcosim_wide_yr = releasesEcosim_wide_mo.groupby(['YEAR_']).mean().reset_index()
releasesEcosim_wide_yr = releasesEcosim_wide_yr[['YEAR_','CHIN_H_MT_','COHO_H_MT_']]

In [162]:
# if aggregate_time == "year":
#     releasesEcosim_wide = releasesEcosim_wide.drop(columns="('TIMESTEP', '')", axis=1)
#     releasesEcosim_wide['Chinook'] = releasesEcosim_wide["('BIOMASS_MT2', 'Chinook')"].astype(float)
#     releasesEcosim_wide['Coho'] = releasesEcosim_wide["('BIOMASS_MT2', 'Coho')"].astype(float)
#     releasesEcosim_wide = releasesEcosim_wide.groupby("('YEAR', '')").mean().reset_index()

    
# write to temp file
releasesEcosim_wide_yr.to_csv(localpath_in + '/MODIFIED/temp_yr.csv', index=True)
releasesEcosim_wide_mo.to_csv(localpath_in + '/MODIFIED/temp_mo.csv', index=True)

# this repeats same avg value each month, for silly workaround
repeated_yr_avg = pd.merge(releasesEcosim_wide_mo, releasesEcosim_wide_yr, on=['YEAR_'], how='left')
repeated_yr_avg = repeated_yr_avg[['YEAR_','TIMESTEP_','CHIN_H_MT__y','COHO_H_MT__y']]
repeated_yr_avg = repeated_yr_avg.rename(columns={'TIMESTEP_': 'TIMESTEP','YEAR_': 'YEAR',
                                'CHIN_H_MT__y': 'CHIN_H_MT', 'COHO_H_MT__y': 'COHO_H_MT'})
repeated_yr_avg.to_csv(localpath_in + '/MODIFIED/temp_yr_rep.csv', index=True)


# ===================================
# open temp file and insert header
# ===================================

#Title	Combined_GST_FR_Escape_RelB_NuSEDS	Chin_Hatch_RelB_CW	Chin_1stYrM_1_CW	Chin_1stYrM_2_CW	Chin_C_Rel_CW
#Weight	1	1	1	1	1
#Pool Code	14	18	16	15	14
#Type	0	0	5	5	61
#1979	11.26655002	3.84	3.449022245	3.449022245	0.35
#1980	11.07767237	6.93	3.021428984	3.021428984	0.371
#1981	11.23108247	8.75	3.354206073	3.354206073	0.2533

# codes for 'type'
# relative biomass = 0 
# absolute biomass = 1
# biomass forcing = -1
# fishing mortality = 4
# relative fishing mortality = 104
# total mortality = 5
# constant total mortality = -5 (forcing?)
# catches = 6
# catches forcing = -6
# relative catches = 61
# average weight = 7

import copy 

f = open(localpath_in + '/MODIFIED/temp_yr.csv', "r")
contents = f.readlines()
f.close()

line1 = contents[0].split(',')
line1[0] = 'Title'


line2 = copy.deepcopy(line1) 
line2[0] = 'Weight'
i = 0
for line in line2:
    if i > 0:
        if i == (len(line2) - 1):
            line2[i] = '1\n'
        else:
            line2[i] = 1
    i += 1

line3 = copy.deepcopy(line1) 
line3[0] = 'Type'
i = 0
for line in line3:
    if i > 0:
        if i == (len(line3) - 1):
            line3[i] = '-1\n'
        else:
            line3[i] = -1
    i += 1

line4 = copy.deepcopy(line1) 
line4[0] = 'Timestep'
i = 0
for line in line4:
    if i > 0:
        if i == (len(line4) - 1):
            line4[i] = 'Interval\n'
        else:
            line4[i] = 'Interval'
    i += 1
    
s=""
contents.insert(1,','.join(str(line) for line in line1))
contents.insert(2,','.join(str(line) for line in line2))
contents.insert(3, ','.join(str(line) for line in line3))
contents.insert(4, ','.join(str(line) for line in line4))

i = 0
with open(localpath_in + '/MODIFIED/HatcheryRel_Ecosim_TS_apr22_1.csv', 'w') as a_writer:
    for line in contents:
        if i > 0:
            a_writer.writelines(line)
        i += 1
        
f = open(localpath_in + '/MODIFIED/temp_yr_rep.csv', "r")
contents = f.readlines()
f.close()

line1 = contents[0].split(',')
line1[0] = 'Title'


line2 = copy.deepcopy(line1) 
line2[0] = 'Weight'
i = 0
for line in line2:
    if i > 0:
        if i == (len(line2) - 1):
            line2[i] = '1\n'
        else:
            line2[i] = 1
    i += 1

line3 = copy.deepcopy(line1) 
line3[0] = 'Type'
i = 0
for line in line3:
    if i > 0:
        if i == (len(line3) - 1):
            line3[i] = '-1\n'
        else:
            line3[i] = -1
    i += 1

line4 = copy.deepcopy(line1) 
line4[0] = 'Timestep'
i = 0
for line in line4:
    if i > 0:
        if i == (len(line4) - 1):
            line4[i] = 'Interval\n'
        else:
            line4[i] = 'Interval'
    i += 1
    
s=""
contents.insert(1,','.join(str(line) for line in line1))
contents.insert(2,','.join(str(line) for line in line2))
contents.insert(3, ','.join(str(line) for line in line3))
contents.insert(4, ','.join(str(line) for line in line4))

i = 0
with open(localpath_in + '/MODIFIED/HatcheryRel_Ecosim_TS_apr22_3.csv', 'w') as a_writer:
    for line in contents:
        if i > 0:
            a_writer.writelines(line)
        i += 1
        
line1 = contents[0].split(',')
line1[0] = 'Title'


line2 = copy.deepcopy(line1) 
line2[0] = 'Weight'
i = 0
for line in line2:
    if i > 0:
        if i == (len(line2) - 1):
            line2[i] = '1\n'
        else:
            line2[i] = 1
    i += 1

line3 = copy.deepcopy(line1) 
line3[0] = 'Type'
i = 0
for line in line3:
    if i > 0:
        if i == (len(line3) - 1):
            line3[i] = '-1\n'
        else:
            line3[i] = -1
    i += 1

line4 = copy.deepcopy(line1) 
line4[0] = 'Timestep'
i = 0
for line in line4:
    if i > 0:
        if i == (len(line4) - 1):
            line4[i] = 'Interval\n'
        else:
            line4[i] = 'Interval'
    i += 1
    
s=""
contents.insert(1,','.join(str(line) for line in line1))
contents.insert(2,','.join(str(line) for line in line2))
contents.insert(3, ','.join(str(line) for line in line3))
contents.insert(4, ','.join(str(line) for line in line4))

i = 0
with open(localpath_in + '/MODIFIED/HatcheryRel_Ecosim_TS_apr22_2.csv', 'w') as a_writer:
    for line in contents:
        if i > 0:
            a_writer.writelines(line)
        i += 1

### Just junk below

In [159]:
repeated_yr_avg = pd.merge(releasesEcosim_wide_mo, releasesEcosim_wide_yr, on=['YEAR_'], how='left')
repeated_yr_avg[360:400]

Unnamed: 0,YEAR_,TIMESTEP_,CHIN_H_MT__x,COHO_H_MT__x,CHIN_H_MT__y,COHO_H_MT__y
360,1980.0,361.0,0.0,0.0,0.000514,0.001086
361,1980.0,362.0,0.0,0.0,0.000514,0.001086
362,1980.0,363.0,0.00028,0.0,0.000514,0.001086
363,1980.0,364.0,4e-05,0.00093,0.000514,0.001086
364,1980.0,365.0,8e-05,0.01004,0.000514,0.001086
365,1980.0,366.0,0.00576,0.0017,0.000514,0.001086
366,1980.0,367.0,1e-05,0.00015,0.000514,0.001086
367,1980.0,368.0,0.0,0.0001,0.000514,0.001086
368,1980.0,369.0,0.0,0.00011,0.000514,0.001086
369,1980.0,370.0,0.0,0.0,0.000514,0.001086


In [10]:
# check - by year to get annual est... 
releases_df3 = releases_df[['YEAR','EWE_GROUP_CODE','BIOMASS_MT']]
# sum
releases_df3 = releases_df3.groupby(['EWE_GROUP_CODE','YEAR']).agg('sum').reset_index()
# mean
#releases_df3 = releases_df3.groupby(['EWE_GROUP_CODE','YEAR']).agg('mean').reset_index()

releases_df3.loc[releases_df3['EWE_GROUP_CODE']=='Chinook']

Unnamed: 0,EWE_GROUP_CODE,YEAR,BIOMASS_MT
0,Chinook,1968.0,0.00015
1,Chinook,1969.0,0.00029
2,Chinook,1970.0,0.0001
3,Chinook,1971.0,0.00021
4,Chinook,1972.0,0.00065
5,Chinook,1973.0,0.00068
6,Chinook,1974.0,0.00061
7,Chinook,1975.0,0.00051
8,Chinook,1976.0,0.00146
9,Chinook,1977.0,0.0017


In [14]:
releases_df

Unnamed: 0.1,BIOMASS_MT,EWE_GROUP_CODE,EWE_TIMESTEP,MONTH,SPECIES,Unnamed: 0,YEAR
0,0.00010,Chinook,222,6.0,,18484.0,1968.0
1,0.00005,Chinook,223,7.0,,9245.0,1968.0
2,0.00029,Chinook,236,8.0,,18519.0,1969.0
3,0.00002,Chinook,243,3.0,,4630.0,1970.0
4,0.00001,Chinook,244,4.0,,4628.0,1970.0
...,...,...,...,...,...,...,...
2339,,DUMMY,835,,,,
2340,,DUMMY,836,,,,
2341,,DUMMY,837,,,,
2342,,DUMMY,838,,,,


In [80]:
releasesEcosim_wide

TITLE,Chinook,Coho
TIMESTEP,Unnamed: 1_level_1,Unnamed: 2_level_1
1,,
2,,
3,,
4,,
5,,
...,...,...
835,,
836,,
837,,
838,,


In [199]:
#