In [1]:
import pystan as stan
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
model = stan.StanModel(file='src/model2.stan')

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_9e94d5dc5b854e810aeeb6700d9bb512 NOW.


In [3]:
def load_data(surv, obs, temp_reg=0.05, mat_reg=0.01, alpha_egg=0.00056, beta_egg=0.001044):
    stan = {'temp_reg': temp_reg, 'mat_reg': mat_reg, 'alpha_egg': alpha_egg, 'beta_egg': beta_egg}
    obs_df = pd.read_csv(obs)
    surv_df = pd.read_csv(surv)
    obs_df = obs_df[obs_df['temp'] > 1.0]
    
    # process population data
    surv_df.columns = [c.lower() for c in surv_df.columns]
    surv_df = surv_df.rename(columns={'fry_at_redbluff': 'fry', 'adult_females': 'females'})
    surv_df = surv_df[['year', 'females', 'fecundity', 'fry']]
    surv_df = surv_df.dropna()
    surv_df['eggs'] = surv_df['females'] * surv_df['fecundity']
    surv_df = surv_df[['year', 'eggs', 'fry']]
    
    # number of observations
    N = len(obs_df)
    S = len(surv_df)
    stan['N'] = N
    stan['S'] = S
    
    # compute relative year and total number of years
    y_min = min(surv_df['year'].min(), obs_df['year'].min())
    y_max = max(surv_df['year'].max(), obs_df['year'].max())
    Y = y_max - y_min + 1
    stan['Y'] = Y
    obs_df['year'] = obs_df['year'] - y_min
    surv_df['year'] = surv_df['year'] - y_min
    
    # replace location ids with numbers
    loc_idx = dict((l, n) for (n, l) in enumerate(obs_df['loc'].unique()))
    L = len(loc_idx)
    stan['L'] = L
    obs_df['loc'] = obs_df['loc'].apply(lambda l: loc_idx[l])
    
    # observation indexing matrix
    obs_grp = obs_df.groupby(['loc', 'year'], as_index=False)
    obs_len = obs_grp['day'].count().rename(columns={'day': 'length'})
    obs_length = np.zeros((L, Y), dtype=int)
    obs_length[obs_len['loc'].values, obs_len['year'].values] = obs_len['length'].astype(int).values
    stan['obs_length'] = obs_length
    obs_len['offset'] = obs_len['length'].cumsum() - obs_len['length'] + 1   
    obs_offset = np.zeros((L, Y), dtype=int)
    obs_offset[obs_len['loc'].values, obs_len['year'].values] = obs_len['offset'].astype(int).values
    stan['obs_offset'] = obs_offset
    
    # survival data
    stan['surv_year'] = 1 + surv_df['year'].astype(int).values
    stan['surv_eggs'] = surv_df['eggs'].astype(int).values
    stan['surv_fry'] = surv_df['fry'].astype(int).values
    
    # redd and temp data
    obs_df = obs_df.sort_values(by=['loc', 'year', 'day'])
    stan['obs_location'] = obs_df['loc'].astype(int).values + 1
    stan['obs_year'] = obs_df['year'].astype(int).values + 1
    stan['obs_day'] = obs_df['day'].astype(int).values
    stan['obs_redds'] = obs_df['redds'].astype(int).values
    stan['obs_temp'] = obs_df['temp'].values
    
    return stan

In [4]:
stan_data = load_data('../../data/Martin_reanalysis/Data_Martin_reanalysis.csv', '../../data/Martin_reanalysis/redd_temp_data.csv')

INFO:numexpr.utils:NumExpr defaulting to 4 threads.


In [5]:
stan_data

{'temp_reg': 0.05,
 'mat_reg': 0.01,
 'alpha_egg': 0.00056,
 'beta_egg': 0.001044,
 'N': 507,
 'S': 23,
 'Y': 25,
 'L': 5,
 'obs_length': array([[ 6,  4,  8, 14,  0,  0,  8, 11, 10,  9,  4,  6,  5,  0,  4,  1,
          1,  4,  5,  2,  3,  1,  6,  8,  8],
        [11, 10,  9, 13,  0,  0,  9, 12, 10,  9,  6,  9, 10,  4, 10,  4,
         11, 11,  8, 10,  9,  5, 10,  7, 10],
        [ 1,  0,  1,  1,  0,  0,  9, 11,  8,  9,  4,  9,  7,  1,  6,  1,
         12, 10, 10, 10,  0,  0, 10,  4, 11],
        [ 0,  0,  1,  6,  0,  0,  1,  0,  0,  0,  0,  2,  0,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  0,  0],
        [ 0,  0,  1,  7,  0,  0,  3,  2,  2,  2,  0,  4,  1,  0,  0,  0,
          0,  0,  0,  0,  0,  0,  0,  5,  0]]),
 'obs_offset': array([[  1,   7,  11,  19,   0,   0,  33,  41,  52,  62,  71,  75,  81,
           0,  86,  90,  91,  92,  96, 101, 103, 106, 107, 113, 121],
        [129, 140, 150, 159,   0,   0, 172, 181, 193, 203, 212, 218, 227,
         237, 241, 251, 255, 266, 

In [None]:
fit = model.sampling(data=stan_data, iter=2000, chains=4)

In [None]:
print(fit.stansummary())

In [None]:
fit.traceplot()

In [None]:
plt.plot(x,y)

In [None]:
min(y)

In [None]:
obs_df['day'].describe()

In [None]:
obs_df = pd.read_csv('../../data/Martin_reanalysis/redd_temp_data.csv')

In [None]:
obs_df = obs_df.sort_values(by=['loc', 'year', 'day'])
obs_df['time'] = (obs_df['year'] - 1) * 365 + obs_df['day']
obs_df['time'] = obs_df['time'] - obs_df['time'].min()

In [None]:
obs_df[obs_df['loc'] == 'RKM479'].plot('time', 'temp', kind='scatter')

In [None]:
a[[0,2], [0,1]] = 100

In [None]:
a