In [21]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import re

# Daily temperature data

In [87]:
daily_temp_path = Path('/home/alex/data/daily_temp/')

In [102]:
def load_one_file(fpath):
    df = pd.read_csv(fpath)
    year = re.fullmatch('(\d+)\.csv', fpath.name).group(1)
    df.columns = [c.lower() for c in df.columns]
    df['day'] = df['day'].astype('timedelta64[D]') + np.datetime64(year, 'D') - np.timedelta64(1, 'D')
    return df

def load_temp_data(fpath):
    dfs = [load_one_file(f) for f in fpath.iterdir()]
    df = pd.concat(dfs).groupby(['day'], as_index=False).mean()
    df = df.melt(id_vars='day', value_vars=set(df.columns).difference({'day'}))
    df = df.rename(columns={'variable': 'location', 'value': 'temperature'})
    df = df.sort_values(by=['location', 'day'])
    return df

In [103]:
df = load_temp_data(daily_temp_path).sort_values(by=['day'])

In [104]:
df

Unnamed: 0,day,location,temperature
37984,1996-01-01,rkm440,
0,1996-01-01,rkm483,11.343
9496,1996-01-01,rkm450,
56976,1996-01-01,rkm444,11.099
47480,1996-01-01,rkm479,11.318
...,...,...,...
18991,2021-12-30,rkm450,10.171
56975,2021-12-30,rkm479,10.798
66471,2021-12-30,rkm444,
28487,2021-12-30,rkm470,10.604


In [105]:
df.to_csv('/home/alex/src/martin/data/temp_data.csv', index=False)

# Redd count data

In [155]:
redd_count_path = Path('/home/alex/src/martin/data/redd_temp_data.csv')

In [156]:
def load_redd_data(fpath):
    df = pd.read_csv(fpath)
    df['timestamp'] = df['year'].astype(str).astype('datetime64[D]') + (df['day'] - 1).astype('timedelta64[D]')
    df = df[['timestamp', 'loc', 'redds']]
    df = df.rename(columns={'timestamp': 'day', 'loc': 'location'})
    df['location'] = df['location'].apply(lambda x: x.lower())
    df = df.sort_values(by=['location', 'day'])
    return df

In [157]:
df = load_redd_data(redd_count_path)

In [158]:
df.to_csv('/home/alex/src/martin/data/redd_data.csv', index=False)

# Survival data

In [121]:
survival_data_path = Path('/home/alex/src/martin/data/Data_Martin_reanalysis.csv')

In [150]:
def load_survival_data(fpath):
    df = pd.read_csv(fpath)
    df = df.rename(columns=dict((c, c.lower().strip()) for c in df.columns))
    df = df[['year', 'egg_to_fry']].dropna()
    df = df.rename(columns={'egg_to_fry': 'survival'})
    return df

In [151]:
df = load_survival_data(survival_data_path)

In [152]:
df

Unnamed: 0,year,survival
0,1996,0.22227
1,1997,0.414082
2,1998,0.281141
3,1999,0.224489
6,2002,0.273541
7,2003,0.229983
8,2004,0.20939
9,2005,0.18463
10,2006,0.154293
11,2007,0.211196


In [153]:
df.to_csv('/home/alex/src/martin/data/survival_data.csv', index=False)