In [1]:
import pandas as pd
import numpy as np
import os
from pathlib import Path

#### 20221222 Smolt performance Phase 3_copy.ipynb


In [2]:
#mortality_target_to_be_grouped - data on first 90 days after transfer
df1=pd.read_csv('data\\mortality_target_to_be_grouped.csv')
df1.drop(columns=['event_date.1', 'locus_id.1', 'fish_group_id.1'],inplace=True)

df2=pd.read_csv('data\\eb_stocking_edited2.csv',encoding='cp1251')
df2.from_date=pd.to_datetime(df2.from_date,format='%Y-%m-%d')
df2.to_date=pd.to_datetime(df2.to_date,format='%Y-%m-%d')
df2.transfer_date=pd.to_datetime(df2.transfer_date,format='%Y-%m-%d')
df2['days_btw_to_from']=(df2.to_date-df2.from_date).dt.days
df2['days_btw_to_transfer']=(df2.to_date-df2.transfer_date).dt.days
weight_bins = np.linspace(100, 225, num=6)
df2['to_avg_weight_binned'] = pd.cut(df2['to_avg_weight'], weight_bins)
freshwater_names=pd.read_csv('data\\from_locus_name_lookup.csv')
freshwater_names.from_date=pd.to_datetime(freshwater_names.from_date,format='%Y-%m-%d')

#below are mortality targets
df1['total_mortality_perc_90']=df1['total_mortality']/df1['total_count']
df1['transport_mortality_perc_90']=df1['transport_mortality']/df1['total_count']
df1['nontransport_mortality_perc_90']=df1['nontransport_mortality']/df1['total_count']

df1.transfer_date=pd.to_datetime(df1.transfer_date,format='%Y-%m-%d')
df1['transfer_year']=df1.transfer_date.dt.year
df1['transfer_month']=df1.transfer_date.dt.month
df1['transfer_month_year']=df1['transfer_month'].astype(str)+'_'+df1['transfer_year'].astype(str)
season_dic = {1: 'winter',2: 'spring',3: 'summer',4: 'autumn'}
df1['transfer_season']=(df1['transfer_date'].dt.month%12 // 3 + 1).apply(lambda x: season_dic[x])
season_dic2 = {1: 'Dec-Feb',2: 'Mar-May',3: 'Jun-Aug',4: 'Sep-Nov'}
df1['transfer_season2']=(df1['transfer_date'].dt.month%12 // 3 + 1).apply(lambda x: season_dic2[x])
reverse_season_dic = {v: k for k, v in season_dic.items()}
df1.event_date = pd.to_datetime(df1.event_date,format='%Y-%m-%d')

#do we need following filter?
#df1=df1[df1.transfer_date<='2023-02-28']
mortality=df1.dropna().groupby(['locus_id','fish_group_id']).agg({'transfer_year':'min'
                                                                 ,'transfer_month':'min'
                                                                 ,'transfer_month_year':'min' 
                                                                 ,'transfer_season':'min'
                                                                 ,'transfer_season2':'min'
                                                                 ,'total_count':'mean'
                                                                 ,'total_mortality_perc_90':'sum'
                                                                 ,'transport_mortality_perc_90':'sum'
                                                                 ,'nontransport_mortality_perc_90':'sum'
                                                                 }).reset_index()

#removing outliers
mortality = mortality[mortality.total_count <= mortality.total_count.quantile(.975)]
mortality = mortality[mortality.total_count > 10000]
mortality = mortality[mortality.transport_mortality_perc_90 < mortality.transport_mortality_perc_90.quantile(.995)]
mortality = mortality[mortality.total_mortality_perc_90 < mortality.total_mortality_perc_90.quantile(.99)]
mortality = mortality[mortality.nontransport_mortality_perc_90 < mortality.nontransport_mortality_perc_90.quantile(.99)]

df3 = mortality.merge(df2, how='left', left_on=['locus_id', 'fish_group_id'], right_on=['to_locus_id', 'to_fish_group_id'])
# df3 = mortality_grand.merge(df2, how='left', left_on=['locus_id', 'fish_group_id'], right_on=['to_locus_id', 'to_fish_group_id'])

df3.drop(columns=['to_avg_weight_binned']).to_csv('data\\smolt_dataset_transfers.csv',index=False)

#### 20230712 Target comparison_only_mortality_nSFR.ipynb


In [3]:
#This is James' approximation. w = weight, t=temperature
def eSFR (row):
    w = row['open_weight']
    t = row['degree_days']
    yf = (.2735797591)+(-.0720137809*t)+(.0187408253*t**2)+(-.0008145337*t**3)
    y0 = (-.79303459)+(.43059382*t)+(-.01471246*t**2)
    log_alpha = (-7.8284505676)+(.3748824960*t)+(-.0301640851*t**2)+(.0006516355*t**3)
    return (yf - (yf-y0)*np.exp(-np.exp(log_alpha)*w))

# Function to generate event dates
def generate_event_dates(row):
    event_dates = pd.date_range(row['transfer_date'], row['sw90_date'], freq='D')
    return pd.DataFrame({'locus_id': row['locus_id'],
                         'fish_group_id': row['fish_group_id'],
                         'transfer_year': row['transfer_year'],
                         'event_date': event_dates})

In [4]:
mortality=pd.read_csv(os.path.join(os.getcwd(), 'data/smolt_dataset_transfers_short.csv'))

inv=pd.read_csv('data/evt_inventory_only_SW_cages_only_since_2017.csv')
inv['event_date']=pd.to_datetime(inv['event_date'])

key_columns = ['locus_id','fish_group_id','transfer_year']
df=mortality[key_columns+['to_avg_weight','total_mortality_perc_90','transport_mortality_perc_90','nontransport_mortality_perc_90']]
df = df.rename({'to_avg_weight':'stocking_weight'}, axis=1)

inv['open_biomass_kg']=inv['open_count']*inv['open_weight']/1000
# inv=inv[inv['open_biomass_kg']>0]
inv['oSFR'] = np.where(inv['open_biomass_kg'] == 0, np.nan, inv['feed_amount'] / inv['open_biomass_kg'] * 100)
inv['eSFR'] = inv.apply(eSFR,axis=1)
inv['nSFR'] = np.where(inv['eSFR'] == 0, np.nan, inv['oSFR'] / inv['eSFR'])

#creating new dataframe with 90 dates for each transfer
tmpp=mortality[key_columns+['transfer_date']]
tmpp['transfer_date']=pd.to_datetime(tmpp['transfer_date'])
tmpp['sw90_date'] = tmpp['transfer_date'] + pd.Timedelta(90,'d')

# Apply the function to each row and concatenate the results
new_df = pd.concat(tmpp.apply(generate_event_dates, axis=1).tolist(), ignore_index=True)

#check negative nSFR values here
inv_grouped=inv.groupby(['event_date','locus_id'])[['oSFR','eSFR','nSFR']].max().reset_index()
# inv_grouped[inv_grouped.locus_id==7205269]
df_daily = new_df.merge(mortality[key_columns+['transfer_date']]).merge(inv_grouped, how='left')
df_daily['transfer_date']=pd.to_datetime(df_daily['transfer_date'])
#filter out first date of seawater (transfer_date), because of non-typical values of SFR
df_daily=df_daily[df_daily.transfer_date < df_daily.event_date]
df_daily['nSFR'] = np.where(df_daily['eSFR'] < 0, np.nan, df_daily['nSFR'])
df_daily['oSFR'] = df_daily['oSFR'].fillna(0)

df=df.merge(df_daily.groupby(key_columns)[['oSFR','nSFR']].mean().reset_index())
growth_targets=['oSFR', 'nSFR']
df['log_mortality']=np.log(df['total_mortality_perc_90'])

df.to_csv('data/targets.csv',index=False)

  return (yf - (yf-y0)*np.exp(-np.exp(log_alpha)*w))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tmpp['transfer_date']=pd.to_datetime(tmpp['transfer_date'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tmpp['sw90_date'] = tmpp['transfer_date'] + pd.Timedelta(90,'d')


#### factors_ranking_log_mortality.ipynb

In [5]:
def smape(y_true, y_pred):
    return np.mean(np.abs(y_pred - y_true) / (np.abs(y_pred) + np.abs(y_true))) * 100

def mape(y_true, y_pred): 
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

TIME_FORMAT = '%Y-%m-%d'
DROPNA = True
key_columns = ['locus_id', 'fish_group_id', 'transfer_year']

root_path = Path('./data/')
targets = pd.read_csv(root_path / 'targets.csv')

mortality = pd.read_csv('data/smolt_dataset_transfers_short.csv')
mortality['transfer_date'] = pd.to_datetime(mortality['transfer_date'], format=TIME_FORMAT)
mortality['transport_year']= mortality['transfer_date'].dt.year

#tgc = pd.read_csv('../data/transfers_until2023Feb28_with_sw_growth_targets.csv')
img_path = Path('./img')
result_data_path = Path('./result_data')
img_path.mkdir(exist_ok=True)
result_data_path.mkdir(exist_ok=True)

PREDICTIVE_SAVE_PATH = result_data_path / 'predictive' 
PREDICTIVE_SAVE_PATH.mkdir(exist_ok=True)

MODEL_SAVE_PATH = result_data_path / 'predictive' / 'models'
MODEL_SAVE_PATH.mkdir(exist_ok=True)

METRICS_SAVE_PATH = result_data_path / 'predictive' / 'metrics'
METRICS_SAVE_PATH.mkdir(exist_ok=True)

IMPORTANCE_SAVE_PATH = result_data_path / 'predictive' / 'importance'
IMPORTANCE_SAVE_PATH.mkdir(exist_ok=True)

IMPORTANT_IMG_SAVE_PATH = img_path / 'predictive' / 'importance'
IMPORTANT_IMG_SAVE_PATH.mkdir(exist_ok=True)

#### Temperature 

In [8]:
locus_weights=pd.read_csv('data\\evt_movement_ratio_with_dates.csv')
locus_weights.starttime = pd.to_datetime(locus_weights.starttime,format='%Y-%m-%d')
locus_weights.endtime = pd.to_datetime(locus_weights.endtime,format='%Y-%m-%d')

lw_dates=locus_weights.groupby('final_locus_population_id').agg({'starttime':'min','endtime':'max'})
lw_dates.starttime = pd.to_datetime(lw_dates.starttime,format='%Y-%m-%d')
lw_dates.endtime = pd.to_datetime(lw_dates.endtime,format='%Y-%m-%d')
#to be checked
lw_dates['FW_cycle_length'] = (lw_dates.endtime - lw_dates.starttime).dt.days+1
lw_dates['starttime_year']=lw_dates['starttime'].dt.year
#we limit FW cycles to those started in 2017 because there are issues with temperature readings for 2015-2016
lw_dates_2017=lw_dates[lw_dates.starttime_year>=2017]

df_dates=pd.read_csv('data\\FW_cycle_dates.csv')
for d in ['first_movement_date', 'first_feeding_date', 'shipout_date']:
    df_dates[d] = pd.to_datetime(df_dates[d],format='%Y-%m-%d')
df_dates_2017=df_dates.merge(lw_dates_2017.reset_index()[['final_locus_population_id']],left_on='pretransfer_fw_locus_population_id',right_on='final_locus_population_id',how='inner')
df_dates_2017.drop(columns=['final_locus_population_id'],inplace=True)

dft=pd.read_csv('data\\lw_alldates_final_grouped.csv')
dft.event_date = pd.to_datetime(dft.event_date,format='%Y-%m-%d')

tmp_list=[]
for ind,row in df_dates_2017.iterrows():
    lp = row.pretransfer_fw_locus_population_id
    start = row.first_movement_date
    end = row.shipout_date
    for d in pd.date_range(start,end):
        tmp_list.append([lp,d])
tmp_df=pd.DataFrame(tmp_list,columns=['final_locus_population_id','event_date'])
dft_=tmp_df.merge(dft, how='left')

#interpolation method #1 without handling outliers
output_df_temp = pd.DataFrame()
for ind,curr_df in dft_.groupby('final_locus_population_id'):
    tmp_df=curr_df.copy()
    tmp_df.temperature=curr_df.temperature.interpolate()
    output_df_temp=pd.concat([output_df_temp,tmp_df])
dft_filled = output_df_temp.copy()
dft_filled.to_csv('data\\FW_temperature_filled.csv',index=False)


In [12]:
df_date_temperature = pd.read_csv('data/FW_temperature_filled.csv')
df_date_temperature = df_date_temperature.dropna()
part_number_list = df_date_temperature['final_locus_population_id'].tolist()
unic_part_number_list = []
df_temperature_cleared = pd.DataFrame(columns = ['final_locus_population_id','event_date', 'temperature_cleared'])
for item in part_number_list:
    if item not in unic_part_number_list:
        unic_part_number_list.append(item)
n = 0
while n < len(unic_part_number_list):
    part_number = unic_part_number_list[n]
    df_temperature_filtered = df_date_temperature[(df_date_temperature['final_locus_population_id'] == part_number)]
    df_temperature_filtered.reset_index(drop = True, inplace = True)
    #df_temperanure_filtered = df_temperature_filtered.reindex(columns = df_temperature_filtered.columns.tolist()+['rolling_tempr','tempr_cleared'])
    df_temperature_filtered['rolling_tempr'] = df_temperature_filtered['temperature'].rolling(30).mean()
    df_temporary = df_temperature_filtered.head(30)
    average_tempr_30 = df_temporary['temperature'].mean()
    half_average_tempr_30 = average_tempr_30 / 2 
    k = 0
    while k < len(df_temporary):
        t_1 = df_temperature_filtered.at[k,'temperature']
        t_dif = t_1 - average_tempr_30
        if abs(t_dif) < half_average_tempr_30:
                df_temperature_filtered.at[k,'rolling_tempr'] = t_1
        else:
                df_temperature_filtered.at[k,'rolling_tempr'] = average_tempr_30
        k = k + 1        
    df_temperature_cleared = pd.concat([df_temperature_cleared, df_temperature_filtered],axis=0)
    n = n + 1
    #print ('n', n)
#datatypes_1 = df_temperature_cleared.dtypes
m = 0
cc = len(df_temperature_cleared)
for m in range (cc):
    t_2 = df_temperature_cleared.iloc[m]['temperature']
    t_rolling_aver = df_temperature_cleared.iloc[m]['rolling_tempr']
    half_t_rolling_aver = t_rolling_aver / 2
    t_dif_clear = t_2 - t_rolling_aver
    if abs(t_dif_clear) < half_t_rolling_aver:
        df_temperature_cleared.at[m,'temperature_cleared'] = t_2
    else:
        df_temperature_cleared.at[m,'temperature_cleared'] = t_rolling_aver
    m = m + 1
    #print ('m', m)    
    
df_temperature_cleared.to_csv('data/FW_temperature_cleared.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_temperature_filtered['rolling_tempr'] = df_temperature_filtered['temperature'].rolling(30).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_temperature_filtered['rolling_tempr'] = df_temperature_filtered['temperature'].rolling(30).mean()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_

#### Treatments