In [None]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np

In [2]:
proj_dir = Path("../../../")

In [3]:
mcnary_df = pd.read_csv(proj_dir / "Data/insitu/fish/processed/DART_MCN.csv")
priest_rapids_df = pd.read_csv(proj_dir / "Data/insitu/fish/processed/DART_PRD.csv")
prosser_df = pd.read_csv(proj_dir / "Data/insitu/fish/processed/DART_PRO.csv")
ice_harbor_df = pd.read_csv(proj_dir / "Data/insitu/fish/processed/DART_IHR.csv")

mcnary_df["date"] = pd.to_datetime(mcnary_df["date"])
priest_rapids_df["date"] = pd.to_datetime(priest_rapids_df["date"])
prosser_df["date"] = pd.to_datetime(prosser_df["date"])
ice_harbor_df["date"] = pd.to_datetime(ice_harbor_df["date"])

In [4]:
mcnary_df['year'] = mcnary_df['date'].dt.year
priest_rapids_df['year'] = priest_rapids_df['date'].dt.year
prosser_df['year'] = prosser_df['date'].dt.year
ice_harbor_df['year'] = ice_harbor_df['date'].dt.year

mcnary_df['month'] = mcnary_df['date'].dt.month
priest_rapids_df['month'] = priest_rapids_df['date'].dt.month
prosser_df['month'] = prosser_df['date'].dt.month
ice_harbor_df['month'] = ice_harbor_df['date'].dt.month

mcnary_df['day'] = mcnary_df['date'].dt.day
priest_rapids_df['day'] = priest_rapids_df['date'].dt.day
prosser_df['day'] = prosser_df['date'].dt.day
ice_harbor_df['day'] = ice_harbor_df['date'].dt.day

In [5]:
# deviation data
deviation_df = pd.DataFrame(columns=['Date'])

for rkm in range(450, 650, 10):
    deviation = pd.read_csv(proj_dir / f"Data/database/deviations/{rkm}.csv")
    deviation['Date'] = pd.to_datetime(deviation['Date'])
    deviation[rkm] = deviation['Deviation']
    deviation_df = pd.merge(deviation_df, deviation[['Date', rkm]], on='Date', how='outer')


## 2000

In [6]:
closeup_year = 2000
mcnary_df_closeup = mcnary_df[(mcnary_df['date'] >= f'{closeup_year}-08-01') & (mcnary_df['date'] <= f'{closeup_year}-11-15')].resample('W', on='date').mean().reset_index()
priest_rapids_df_closeup = priest_rapids_df[(priest_rapids_df['date'] >= f'{closeup_year}-08-01') & (priest_rapids_df['date'] <= f'{closeup_year}-11-15')].resample('W', on='date').mean().reset_index()
prosser_df_closeup = prosser_df[(prosser_df['date'] >= f'{closeup_year}-08-01') & (prosser_df['date'] <= f'{closeup_year}-11-15')].resample('W', on='date').mean().reset_index()
ice_harbor_df_closeup = ice_harbor_df[(ice_harbor_df['date'] >= f'{closeup_year}-08-01') & (ice_harbor_df['date'] <= f'{closeup_year}-11-15')].resample('W', on='date').mean().reset_index()

In [7]:
combined_closeup = pd.merge(mcnary_df_closeup, priest_rapids_df_closeup, on='date', suffixes=('_mcn', '_prd'), how='outer')

In [None]:
# plot with month-day on x-axis
fig, ax = plt.subplots(2, 1, figsize=(10, 8), sharex=True)
ax[0].bar(combined_closeup['date'], combined_closeup['chinook_mcn'], label='McNary', width=5)
ax[0].bar(combined_closeup['date'], combined_closeup['chinook_prd'], label='Priest Rapids', width=3)
ax[0].set_ylabel('Fish count')
ax[0].set_xlabel('Date')

ax[0].set_xticks(combined_closeup['date'])
ax[0].set_xticklabels(combined_closeup['date'].dt.strftime('%m-%d'))
ax[0].legend(loc='upper right')

prp_mcn_ratio = combined_closeup['chinook_prd'] / combined_closeup['chinook_mcn'] *100
# put the values on top of the bars
ax[1].bar(combined_closeup['date'], prp_mcn_ratio, width=4)
ax[1].set_ylabel('Ratio (%)')
ax[1].set_xlabel('Date')

ax[1].set_xticks(combined_closeup['date'])
ax[1].set_xticklabels(combined_closeup['date'].dt.strftime('%m-%d'))

fig.tight_layout()

In [9]:
thorr_df = pd.read_csv(proj_dir / "Data/database/thorr_data.csv")
thorr_df['date'] = pd.to_datetime(thorr_df['Date'])

thorr_df = thorr_df[thorr_df['RiverID'] == 9].copy()
thorr_df_closeup = thorr_df[(thorr_df['date'] >= f'{closeup_year}-08-01') & (thorr_df['date'] <= f'{closeup_year}-11-15')]

In [10]:
grouped_closeup = thorr_df_closeup.groupby('RKm')

merged_thorr = pd.DataFrame(columns=['date'])
for group in grouped_closeup:
    resampled = group[1].resample('W', on='date').mean(numeric_only=True).reset_index()
    # print(resampled)
    resampled.rename(columns={'EstTempC': resampled['RKm'].iloc[0]}, inplace=True)
    merged_thorr = pd.merge(merged_thorr, resampled[['date', resampled['RKm'].iloc[0]]], on='date', how='outer')

merged_thorr['date'] = pd.to_datetime(merged_thorr['date'])

#make date the index
merged_thorr.set_index('date', inplace=True)
# sort by date
merged_thorr.sort_index(inplace=True)

In [11]:
transposed_merge = merged_thorr.transpose()
# transposed_merge.columns

In [None]:
rkm_450_640 = transposed_merge.loc[450:640]

fig, ax = plt.subplots(1, 1, figsize=(10, 4), sharex=True)
for date in rkm_450_640.columns:
    if date >= pd.to_datetime(f'{closeup_year}-09-01') and date <= pd.to_datetime(f'{closeup_year}-10-20'):
        ax.plot(rkm_450_640.index, rkm_450_640[date], label=f'{date.strftime("%m-%d")}', marker='o')
    # ax.plot(rkm_450_640.index, rkm_450_640[date], label=f'{date.strftime("%m-%d")}', marker='o')

ax.axhline(y=20, color='r', linestyle='--')
# put the legend outside the plot
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('Temperature (C)')
ax.set_xlabel('River Kilometer')

In [13]:
deviation_450_640 = deviation_df[(deviation_df['Date'] >= f'{closeup_year}-08-01') & (deviation_df['Date'] <= f'{closeup_year}-11-15')].copy()
# deviation_450_640.set_index('Date', inplace=True).reset_index()
deviation_450_640 = deviation_450_640.set_index('Date')
deviation_450_640 = deviation_450_640.sort_index()
deviation_450_640 = deviation_450_640.transpose()


In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 4), sharex=True)
for date in deviation_450_640.columns:
    if date >= pd.to_datetime(f'{closeup_year}-09-01') and date <= pd.to_datetime(f'{closeup_year}-10-15'):
        ax.plot(deviation_450_640.index, deviation_450_640[date], label=f'{date.strftime("%m-%d")}', marker='o')


# put the legend outside the plot
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('Temperature anormaly (C)')
ax.set_xlabel('River Kilometer')

## 2013

In [15]:
closeup_year = 2013
mcnary_df_closeup = mcnary_df[(mcnary_df['date'] >= f'{closeup_year}-08-01') & (mcnary_df['date'] <= f'{closeup_year}-11-15')].resample('W', on='date').mean().reset_index()
priest_rapids_df_closeup = priest_rapids_df[(priest_rapids_df['date'] >= f'{closeup_year}-08-01') & (priest_rapids_df['date'] <= f'{closeup_year}-11-15')].resample('W', on='date').mean().reset_index()
prosser_df_closeup = prosser_df[(prosser_df['date'] >= f'{closeup_year}-08-01') & (prosser_df['date'] <= f'{closeup_year}-11-15')].resample('W', on='date').mean().reset_index()
ice_harbor_df_closeup = ice_harbor_df[(ice_harbor_df['date'] >= f'{closeup_year}-08-01') & (ice_harbor_df['date'] <= f'{closeup_year}-11-15')].resample('W', on='date').mean().reset_index()

In [16]:
combined_closeup = pd.merge(mcnary_df_closeup, priest_rapids_df_closeup, on='date', suffixes=('_mcn', '_prd'), how='outer')

In [None]:
# plot with month-day on x-axis
fig, ax = plt.subplots(2, 1, figsize=(10, 8), sharex=True)
ax[0].bar(combined_closeup['date'], combined_closeup['chinook_mcn'], label='McNary', width=5)
ax[0].bar(combined_closeup['date'], combined_closeup['chinook_prd'], label='Priest Rapids', width=3)
ax[0].set_ylabel('Fish count')
ax[0].set_xlabel('Date')

ax[0].set_xticks(combined_closeup['date'])
ax[0].set_xticklabels(combined_closeup['date'].dt.strftime('%m-%d'))
ax[0].legend(loc='upper right')

prp_mcn_ratio = combined_closeup['chinook_prd'] / combined_closeup['chinook_mcn'] *100
# put the values on top of the bars
ax[1].bar(combined_closeup['date'], prp_mcn_ratio, width=4)
ax[1].set_ylabel('Ratio (%)')
ax[1].set_xlabel('Date')

ax[1].set_xticks(combined_closeup['date'])
ax[1].set_xticklabels(combined_closeup['date'].dt.strftime('%m-%d'))

fig.tight_layout()

In [18]:
thorr_df = pd.read_csv(proj_dir / "Data/database/thorr_data.csv")
thorr_df['date'] = pd.to_datetime(thorr_df['Date'])

thorr_df = thorr_df[thorr_df['RiverID'] == 9].copy()
thorr_df_closeup = thorr_df[(thorr_df['date'] >= f'{closeup_year}-08-01') & (thorr_df['date'] <= f'{closeup_year}-11-15')]

In [19]:
grouped_closeup = thorr_df_closeup.groupby('RKm')

merged_thorr = pd.DataFrame(columns=['date'])
for group in grouped_closeup:
    resampled = group[1].resample('W', on='date').mean(numeric_only=True).reset_index()
    # print(resampled)
    resampled.rename(columns={'EstTempC': resampled['RKm'].iloc[0]}, inplace=True)
    merged_thorr = pd.merge(merged_thorr, resampled[['date', resampled['RKm'].iloc[0]]], on='date', how='outer')

merged_thorr['date'] = pd.to_datetime(merged_thorr['date'])

#make date the index
merged_thorr.set_index('date', inplace=True)
# sort by date
merged_thorr.sort_index(inplace=True)

In [20]:
transposed_merge = merged_thorr.transpose()
# transposed_merge.columns

In [None]:
rkm_450_640 = transposed_merge.loc[450:640]

fig, ax = plt.subplots(1, 1, figsize=(10, 4), sharex=True)
for date in rkm_450_640.columns:
    if date >= pd.to_datetime(f'{closeup_year}-09-01') and date <= pd.to_datetime(f'{closeup_year}-10-20'):
        ax.plot(rkm_450_640.index, rkm_450_640[date], label=f'{date.strftime("%m-%d")}', marker='o')
    # ax.plot(rkm_450_640.index, rkm_450_640[date], label=f'{date.strftime("%m-%d")}', marker='o')

ax.axhline(y=20, color='r', linestyle='--')
# put the legend outside the plot
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('Temperature (C)')
ax.set_xlabel('River Kilometer')

In [22]:
deviation_450_640 = deviation_df[(deviation_df['Date'] >= f'{closeup_year}-08-01') & (deviation_df['Date'] <= f'{closeup_year}-11-15')].copy()
# deviation_450_640.set_index('Date', inplace=True).reset_index()
deviation_450_640 = deviation_450_640.set_index('Date')
deviation_450_640 = deviation_450_640.sort_index()
deviation_450_640 = deviation_450_640.transpose()


In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 4), sharex=True)
for date in deviation_450_640.columns:
    if date >= pd.to_datetime(f'{closeup_year}-09-01') and date <= pd.to_datetime(f'{closeup_year}-10-15'):
        ax.plot(deviation_450_640.index, deviation_450_640[date], label=f'{date.strftime("%m-%d")}', marker='o')


# put the legend outside the plot
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('Temperature anormaly (C)')
ax.set_xlabel('River Kilometer')

## 2002

In [24]:
closeup_year = 2002
mcnary_df_closeup = mcnary_df[(mcnary_df['date'] >= f'{closeup_year}-08-01') & (mcnary_df['date'] <= f'{closeup_year}-11-15')].resample('W', on='date').mean().reset_index()
priest_rapids_df_closeup = priest_rapids_df[(priest_rapids_df['date'] >= f'{closeup_year}-08-01') & (priest_rapids_df['date'] <= f'{closeup_year}-11-15')].resample('W', on='date').mean().reset_index()
prosser_df_closeup = prosser_df[(prosser_df['date'] >= f'{closeup_year}-08-01') & (prosser_df['date'] <= f'{closeup_year}-11-15')].resample('W', on='date').mean().reset_index()
ice_harbor_df_closeup = ice_harbor_df[(ice_harbor_df['date'] >= f'{closeup_year}-08-01') & (ice_harbor_df['date'] <= f'{closeup_year}-11-15')].resample('W', on='date').mean().reset_index()

In [25]:
combined_closeup = pd.merge(mcnary_df_closeup, priest_rapids_df_closeup, on='date', suffixes=('_mcn', '_prd'), how='outer')

In [None]:
# plot with month-day on x-axis
fig, ax = plt.subplots(2, 1, figsize=(10, 8), sharex=True)
ax[0].bar(combined_closeup['date'], combined_closeup['chinook_mcn'], label='McNary', width=5)
ax[0].bar(combined_closeup['date'], combined_closeup['chinook_prd'], label='Priest Rapids', width=3)
ax[0].set_ylabel('Fish count')
ax[0].set_xlabel('Date')

ax[0].set_xticks(combined_closeup['date'])
ax[0].set_xticklabels(combined_closeup['date'].dt.strftime('%m-%d'))
ax[0].legend(loc='upper right')

prp_mcn_ratio = combined_closeup['chinook_prd'] / combined_closeup['chinook_mcn'] *100
# put the values on top of the bars
ax[1].bar(combined_closeup['date'], prp_mcn_ratio, width=4)
ax[1].set_ylabel('Ratio (%)')
ax[1].set_xlabel('Date')

ax[1].set_xticks(combined_closeup['date'])
ax[1].set_xticklabels(combined_closeup['date'].dt.strftime('%m-%d'))

fig.tight_layout()

In [27]:
thorr_df = pd.read_csv(proj_dir / "Data/database/thorr_data.csv")
thorr_df['date'] = pd.to_datetime(thorr_df['Date'])

thorr_df = thorr_df[thorr_df['RiverID'] == 9].copy()
thorr_df_closeup = thorr_df[(thorr_df['date'] >= f'{closeup_year}-08-01') & (thorr_df['date'] <= f'{closeup_year}-11-15')]

In [28]:
grouped_closeup = thorr_df_closeup.groupby('RKm')

merged_thorr = pd.DataFrame(columns=['date'])
for group in grouped_closeup:
    resampled = group[1].resample('W', on='date').mean(numeric_only=True).reset_index()
    # print(resampled)
    resampled.rename(columns={'EstTempC': resampled['RKm'].iloc[0]}, inplace=True)
    merged_thorr = pd.merge(merged_thorr, resampled[['date', resampled['RKm'].iloc[0]]], on='date', how='outer')

merged_thorr['date'] = pd.to_datetime(merged_thorr['date'])

#make date the index
merged_thorr.set_index('date', inplace=True)
# sort by date
merged_thorr.sort_index(inplace=True)

In [29]:
transposed_merge = merged_thorr.transpose()
# transposed_merge.columns

In [None]:
rkm_450_640 = transposed_merge.loc[450:640]

fig, ax = plt.subplots(1, 1, figsize=(10, 4), sharex=True)
for date in rkm_450_640.columns:
    if date >= pd.to_datetime(f'{closeup_year}-09-01') and date <= pd.to_datetime(f'{closeup_year}-10-20'):
        ax.plot(rkm_450_640.index, rkm_450_640[date], label=f'{date.strftime("%m-%d")}', marker='o')
    # ax.plot(rkm_450_640.index, rkm_450_640[date], label=f'{date.strftime("%m-%d")}', marker='o')

ax.axhline(y=20, color='r', linestyle='--')
# put the legend outside the plot
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('Temperature (C)')
ax.set_xlabel('River Kilometer')

In [31]:
deviation_450_640 = deviation_df[(deviation_df['Date'] >= f'{closeup_year}-08-01') & (deviation_df['Date'] <= f'{closeup_year}-11-15')].copy()
# deviation_450_640.set_index('Date', inplace=True).reset_index()
deviation_450_640 = deviation_450_640.set_index('Date')
deviation_450_640 = deviation_450_640.sort_index()
deviation_450_640 = deviation_450_640.transpose()


In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 4), sharex=True)
for date in deviation_450_640.columns:
    if date >= pd.to_datetime(f'{closeup_year}-09-01') and date <= pd.to_datetime(f'{closeup_year}-10-15'):
        ax.plot(deviation_450_640.index, deviation_450_640[date], label=f'{date.strftime("%m-%d")}', marker='o')


# put the legend outside the plot
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('Temperature anormaly (C)')
ax.set_xlabel('River Kilometer')

## 2015

In [33]:
closeup_year = 2015
mcnary_df_closeup = mcnary_df[(mcnary_df['date'] >= f'{closeup_year}-08-01') & (mcnary_df['date'] <= f'{closeup_year}-11-15')].resample('W', on='date').mean().reset_index()
priest_rapids_df_closeup = priest_rapids_df[(priest_rapids_df['date'] >= f'{closeup_year}-08-01') & (priest_rapids_df['date'] <= f'{closeup_year}-11-15')].resample('W', on='date').mean().reset_index()
prosser_df_closeup = prosser_df[(prosser_df['date'] >= f'{closeup_year}-08-01') & (prosser_df['date'] <= f'{closeup_year}-11-15')].resample('W', on='date').mean().reset_index()
ice_harbor_df_closeup = ice_harbor_df[(ice_harbor_df['date'] >= f'{closeup_year}-08-01') & (ice_harbor_df['date'] <= f'{closeup_year}-11-15')].resample('W', on='date').mean().reset_index()

In [34]:
combined_closeup = pd.merge(mcnary_df_closeup, priest_rapids_df_closeup, on='date', suffixes=('_mcn', '_prd'), how='outer')

In [None]:
# plot with month-day on x-axis
fig, ax = plt.subplots(2, 1, figsize=(10, 8), sharex=True)
ax[0].bar(combined_closeup['date'], combined_closeup['chinook_mcn'], label='McNary', width=5)
ax[0].bar(combined_closeup['date'], combined_closeup['chinook_prd'], label='Priest Rapids', width=3)
ax[0].set_ylabel('Fish count')
ax[0].set_xlabel('Date')

ax[0].set_xticks(combined_closeup['date'])
ax[0].set_xticklabels(combined_closeup['date'].dt.strftime('%m-%d'))
ax[0].legend(loc='upper right')

prp_mcn_ratio = combined_closeup['chinook_prd'] / combined_closeup['chinook_mcn'] *100
# put the values on top of the bars
ax[1].bar(combined_closeup['date'], prp_mcn_ratio, width=4)
ax[1].set_ylabel('Ratio (%)')
ax[1].set_xlabel('Date')

ax[1].set_xticks(combined_closeup['date'])
ax[1].set_xticklabels(combined_closeup['date'].dt.strftime('%m-%d'))

fig.tight_layout()

In [36]:
thorr_df = pd.read_csv(proj_dir / "Data/database/thorr_data.csv")
thorr_df['date'] = pd.to_datetime(thorr_df['Date'])

thorr_df = thorr_df[thorr_df['RiverID'] == 9].copy()
thorr_df_closeup = thorr_df[(thorr_df['date'] >= f'{closeup_year}-08-01') & (thorr_df['date'] <= f'{closeup_year}-11-15')]

In [37]:
grouped_closeup = thorr_df_closeup.groupby('RKm')

merged_thorr = pd.DataFrame(columns=['date'])
for group in grouped_closeup:
    resampled = group[1].resample('W', on='date').mean(numeric_only=True).reset_index()
    # print(resampled)
    resampled.rename(columns={'EstTempC': resampled['RKm'].iloc[0]}, inplace=True)
    merged_thorr = pd.merge(merged_thorr, resampled[['date', resampled['RKm'].iloc[0]]], on='date', how='outer')

merged_thorr['date'] = pd.to_datetime(merged_thorr['date'])

#make date the index
merged_thorr.set_index('date', inplace=True)
# sort by date
merged_thorr.sort_index(inplace=True)

In [38]:
transposed_merge = merged_thorr.transpose()
# transposed_merge.columns

In [None]:
rkm_450_640 = transposed_merge.loc[450:640]

fig, ax = plt.subplots(1, 1, figsize=(10, 4), sharex=True)
for date in rkm_450_640.columns:
    if date >= pd.to_datetime(f'{closeup_year}-09-01') and date <= pd.to_datetime(f'{closeup_year}-10-20'):
        ax.plot(rkm_450_640.index, rkm_450_640[date], label=f'{date.strftime("%m-%d")}', marker='o')
    # ax.plot(rkm_450_640.index, rkm_450_640[date], label=f'{date.strftime("%m-%d")}', marker='o')

ax.axhline(y=20, color='r', linestyle='--')
# put the legend outside the plot
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('Temperature (C)')
ax.set_xlabel('River Kilometer')

In [40]:
deviation_450_640 = deviation_df[(deviation_df['Date'] >= f'{closeup_year}-08-01') & (deviation_df['Date'] <= f'{closeup_year}-11-15')].copy()
# deviation_450_640.set_index('Date', inplace=True).reset_index()
deviation_450_640 = deviation_450_640.set_index('Date')
deviation_450_640 = deviation_450_640.sort_index()
deviation_450_640 = deviation_450_640.transpose()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 4), sharex=True)
for date in deviation_450_640.columns:
    if date >= pd.to_datetime(f'{closeup_year}-09-01') and date <= pd.to_datetime(f'{closeup_year}-10-15'):
        ax.plot(deviation_450_640.index, deviation_450_640[date], label=f'{date.strftime("%m-%d")}', marker='o')


# put the legend outside the plot
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_ylabel('Temperature anormaly (C)')
ax.set_xlabel('River Kilometer')

In [5]:
def migration_stats(df, date_col='date', fish_col='chinook', start_month=8, start_day=1, end_month=11, end_day=15):
    df[date_col] = pd.to_datetime(df[date_col])

    df['year'] = df[date_col].dt.year
    df['month'] = df[date_col].dt.month
    df['day'] = df[date_col].dt.day

    df = df.dropna(subset=[fish_col])

    # group bon_df by year and plot
    grouped = df.groupby('year')

    stats_df = pd.DataFrame(columns=['year', 'start_date', 'end_date', 'total_fish', 'daily_mean', 'max_count', '5th_percentile', '5th_date', '25th_percentile', '25th_date', '50th_percentile', '50th_date', '75th_percentile', '75th_date', '95th_percentile', '95th_date'])

    for name, group in grouped:
        stats_dict = {}
        run_start = pd.Timestamp(f"{name}-{start_month}-{start_day}")
        run_end = pd.Timestamp(f"{name}-{end_month}-{end_day}")

        run_df = group[(group[date_col] >= run_start) & (group[date_col] <= run_end)]
        run_df = run_df.sort_values(by=date_col) # sort run_df by date
        
        total_fish = run_df[fish_col].sum()
        # daily mean which is the total fish divided by the number of days from start to end
        daily_mean = total_fish / (run_end - run_start).days
        max_count = run_df[fish_col].max()


        stats_dict['year'] = name
        stats_dict['start_date'] = run_start
        stats_dict['end_date'] = run_end
        stats_dict['total_fish'] = total_fish
        stats_dict['daily_mean'] = daily_mean
        stats_dict['max_count'] = max_count

        # TODO: find a better way to calculate percentiles
        fish_sum = 0
        for i, row in run_df.iterrows():
            fish_sum += row[fish_col]
            
            if fish_sum >= total_fish * 0.05:
                stats_dict['5th_percentile'] = fish_sum
                stats_dict['5th_date'] = row[date_col]
                break

        fish_sum = 0

        for i, row in run_df.iterrows():
            fish_sum += row[fish_col]
            
            if fish_sum >= total_fish * 0.25:
                stats_dict['25th_percentile'] = fish_sum
                stats_dict['25th_date'] = row[date_col]
                break

        fish_sum = 0

        for i, row in run_df.iterrows():
            fish_sum += row[fish_col]
            
            if fish_sum >= total_fish * 0.50:
                stats_dict['50th_percentile'] = fish_sum
                stats_dict['50th_date'] = row[date_col]
                break

        fish_sum = 0

        for i, row in run_df.iterrows():
            fish_sum += row[fish_col]
            
            if fish_sum >= total_fish * 0.75:
                stats_dict['75th_percentile'] = fish_sum
                stats_dict['75th_date'] = row[date_col]
                break

        fish_sum = 0

        for i, row in run_df.iterrows():
            fish_sum += row[fish_col]
            
            if fish_sum >= total_fish * 0.95:
                stats_dict['95th_percentile'] = fish_sum
                stats_dict['95th_date'] = row[date_col]
                break

        stats_df = pd.concat([stats_df, pd.DataFrame(stats_dict, index=[0])])

    return stats_df
                



In [6]:
mcnary_stats_df = migration_stats(mcnary_df, start_month=8, start_day=9, end_month=10, end_day=31)
priest_rapids_stats_df = migration_stats(priest_rapids_df, start_month=8, start_day=14, end_month=11, end_day=15)
prosser_stats_df = migration_stats(prosser_df, start_month=8, start_day=16, end_month=12, end_day=28)
ice_harbor_stats_df = migration_stats(ice_harbor_df, start_month=8, start_day=12, end_month=12, end_day=15)

In [7]:
# filter out 2020 to 2023
mcnary_df_ = mcnary_df[(mcnary_df['date'].dt.year >= 2013) & (mcnary_df['date'].dt.year <= 2018)]
priest_rapids_df_ = priest_rapids_df[(priest_rapids_df['date'].dt.year >= 2013) & (priest_rapids_df['date'].dt.year <= 2018)]
prosser_df_ = prosser_df[(prosser_df['date'].dt.year >= 2013) & (prosser_df['date'].dt.year <= 2018)]
ice_harbor_df_ = ice_harbor_df[(ice_harbor_df['date'].dt.year >= 2013) & (ice_harbor_df['date'].dt.year <= 2018)]

In [None]:
# fig, (ax1, ax2, ax3) = plt.subplots(3,1, figsize=(15, 5), sharex=True)
fig, axs = plt.subplots(4, 1, figsize=(15, 5), sharex=True)
mcnary_df_.plot(x='date', y=['chinook'], style='-', ax=axs[0], title='McNary Dam')
ice_harbor_df_.plot(x='date', y=['chinook'], style='-', ax=axs[1], title='Ice Harbor Dam')
priest_rapids_df_.plot(x='date', y=['chinook'], style='-', ax=axs[2], title='Priest Rapids Dam')
prosser_df_.plot(x='date', y=['chinook'], style='-', ax=axs[3], title='Prosser Dam')

fig.tight_layout()

In [None]:
fig, axs = plt.subplots(1, 4, figsize=(10, 10), sharey=True)
for i, (stats_df, loc) in enumerate(zip([mcnary_stats_df, priest_rapids_stats_df, prosser_stats_df, ice_harbor_stats_df], ['McNary', 'Priest Rapids', 'Prosser', 'Ice Harbor'])):
    print(i)
    boxes = []
    for j, row in stats_df.iterrows():
        
        boxes.append(
            {
                "label": f"{row['year']}",
                "whislo": (row['5th_date'] -row["start_date"]).days,  # Bottom whisker position
                "q1": (row['25th_date'] -row["start_date"]).days,  # First quartile (25th percentile)
                "med": (row['50th_date'] -row["start_date"]).days,  # Median         (50th percentile)
                "q3": (row['75th_date'] -row["start_date"]).days,  # Third quartile (75th percentile)
                "whishi": (row['95th_date'] -row["start_date"]).days,  # Top whisker position
                "fliers": [],  # Outliers
            }
        )

    axs[i].bxp(boxes, showfliers=False, vert=False)
    axs[i].set_title(loc)

In [None]:
# stack bar plot of the total fish count except for McNary
data_dict = {
    "McNary": mcnary_stats_df[["year", "total_fish"]].copy(),
    "Priest Rapids": priest_rapids_stats_df[["year", "total_fish"]].copy(),
    "Prosser": prosser_stats_df[["year", "total_fish"]].copy(),
    "Ice Harbor": ice_harbor_stats_df[["year", "total_fish"]].copy(),
}

merged_data = pd.merge(
    data_dict["McNary"],
    data_dict["Priest Rapids"],
    on="year",
    how="outer",
    suffixes=("_1", "_2"),
)
merged_data = pd.merge(merged_data, data_dict["Prosser"], on="year", how="outer")
merged_data = pd.merge(
    merged_data, data_dict["Ice Harbor"], on="year", how="outer", suffixes=("_3", "_4")
)

# fig, axs = plt.subplots(3, 1, figsize=(15, 10), sharex=True)
fig, axs = plt.subplots(2, 1, figsize=(10, 7), sharex=True)
axs = axs[::-1]

locations = ["Priest Rapids", "Ice Harbor", "Prosser"]

percentages = {
    "Priest Rapids": merged_data["total_fish_2"] / merged_data["total_fish_1"] * 100,
    "Ice Harbor": merged_data["total_fish_4"] / merged_data["total_fish_1"] * 100,
    "Prosser": merged_data["total_fish_3"] / merged_data["total_fish_1"] * 100,
}

totals = {
    "Priest Rapids": merged_data["total_fish_2"],
    "Ice Harbor": merged_data["total_fish_4"],
    "Prosser": merged_data["total_fish_3"],
}

labels = {
    "Priest Rapids": "[5] Priest Rapids",
    "Ice Harbor": "[12] Ice Harbor",
    "Prosser": "[16] Prosser",
}

bottom = np.zeros(len(merged_data[:-1]))
bottom_totals = np.zeros(len(merged_data[:-1]))

for loc in locations:
    p = axs[0].bar(
        merged_data["year"][:-1].values.astype(int),
        percentages[loc][:-1].values.astype(int),
        label=labels[loc],
        bottom=bottom,
    )
    # axs[1].bar(merged_data['year'][:-1], totals[loc][:-1], label=loc, bottom=bottom_totals)
    bottom += percentages[loc][:-1]
    bottom_totals += totals[loc][:-1]

    axs[0].bar_label(p, label_type="center")

axs[0].set_title("Percentage of fish count at upstream locations")
# axs[1].set_title("Total fish count at each location")
axs[0].legend(loc="upper right")
axs[0].set_ylabel("Percentage of total fish count")

# axs[2].bar(merged_data['year'], merged_data['total_fish_1'], label='McNary')
axs[1].bar(merged_data["year"], merged_data["total_fish_1"], label="McNary")
axs[1].set_title("Total Fall Adult Chinook count at [4] McNary Dam")
axs[1].set_ylabel("Total fish count")

# axs[1].set_xlim(2013, 2018)


# species = (
#     "Adelie\n $\\mu=$3700.66g",
#     "Chinstrap\n $\\mu=$3733.09g",
#     "Gentoo\n $\\mu=5076.02g$",
# )
# weight_counts = {
#     "Below": np.array([70, 31, 58]),
#     "Above": np.array([82, 37, 66]),
# }
# width = 0.5

# fig, ax = plt.subplots()
# bottom = np.zeros(3)

# for boolean, weight_count in weight_counts.items():
#     p = axs[1].bar(species, weight_count, width, label=boolean, bottom=bottom)
#     bottom += weight_count

# axs[1].set_title("Number of penguins with above average body mass")
# axs[1].legend(loc="upper right")

In [172]:
col_510 = pd.read_csv(proj_dir / "Code/notebooks/EDA/columbia_510.csv")
col_520 = pd.read_csv(proj_dir / "Code/notebooks/EDA/columbia_520.csv")
col_470 = pd.read_csv(proj_dir / "Code/notebooks/EDA/columbia_470bw.csv")
col_630 = pd.read_csv(proj_dir / "Code/notebooks/EDA/columbia_630bw.csv")
sna_0 = pd.read_csv(proj_dir / "Code/notebooks/EDA/snake_0.csv")

col_510.rename(columns={"Date": "date", "WaterTemperature(C)": "col_510"}, inplace=True)
col_520.rename(columns={"Date": "date", "WaterTemperature(C)": "col_520"}, inplace=True)
col_470.rename(columns={"Date": "date", "WaterTemperature(C)": "col_470"}, inplace=True)
col_630.rename(columns={"Date": "date", "WaterTemperature(C)": "col_630"}, inplace=True)
sna_0.rename(columns={"Date": "date", "WaterTemperature(C)": "sna_0"}, inplace=True)

col_510['date'] = pd.to_datetime(col_510['date'])
col_520['date'] = pd.to_datetime(col_520['date'])
col_470['date'] = pd.to_datetime(col_470['date'])
col_630['date'] = pd.to_datetime(col_630['date'])
sna_0['date'] = pd.to_datetime(sna_0['date'])

col_510['year'] = col_510['date'].dt.year
col_520['year'] = col_520['date'].dt.year
col_470['year'] = col_470['date'].dt.year
col_630['year'] = col_630['date'].dt.year
sna_0['year'] = sna_0['date'].dt.year

col_510['month'] = col_510['date'].dt.month
col_520['month'] = col_520['date'].dt.month
col_470['month'] = col_470['date'].dt.month
col_630['month'] = col_630['date'].dt.month
sna_0['month'] = sna_0['date'].dt.month

col_510['day'] = col_510['date'].dt.day
col_520['day'] = col_520['date'].dt.day
col_470['day'] = col_470['date'].dt.day
col_630['day'] = col_630['date'].dt.day
sna_0['day'] = sna_0['date'].dt.day


In [None]:
col_470

In [175]:
temp_df = pd.merge(col_510, col_520, on=['date', 'year', 'month', 'day'], how='outer')
temp_df = pd.merge(temp_df, col_470, on=['date', 'year', 'month', 'day'], how='outer')
temp_df = pd.merge(temp_df, col_630, on=['date', 'year', 'month', 'day'], how='outer')
temp_df = pd.merge(temp_df, sna_0, on=['date', 'year', 'month', 'day'], how='outer')
temp_df['week'] = temp_df['date'].dt.isocalendar().week

# temp_df = temp_df[(temp_df['date'].dt.year >= 2000) & (temp_df['date'].dt.year <= 2009)]
# temp_df = temp_df[(temp_df['date'].dt.year == 2000) | (temp_df['date'].dt.year == 2001) | (temp_df['date'].dt.year == 2002) | (temp_df['date'].dt.year == 2003) | (temp_df['date'].dt.year == 2004) | (temp_df['date'].dt.year == 2005) | (temp_df['date'].dt.year == 2006) | (temp_df['date'].dt.year == 2007) | (temp_df['date'].dt.year == 2008) | (temp_df['date'].dt.year == 2009)]
temp_df = temp_df[(temp_df['date'].dt.year == 2000) | (temp_df['date'].dt.year == 2002) | (temp_df['date'].dt.year == 2008) | (temp_df['date'].dt.year == 2009) | (temp_df['date'].dt.year == 2014) | (temp_df['date'].dt.year == 2013) | (temp_df['date'].dt.year == 2015)]


# filter out june to october

temp_df = temp_df[(temp_df['month'] >= 6) & (temp_df['month'] <= 10)]
# temp_df['diff'] = temp_df['col_510'] - temp_df['sna_0']
temp_df['diff'] = temp_df['col_470'] - temp_df['col_630']

In [159]:
# fig, ax = plt.subplots(figsize=(15, 5))
# temp_df.plot(x='date', y=['col_510', 'sna_0'], style='-', title='Water temperature at different locations', ax=ax)

In [None]:
fall_grouped = temp_df.groupby('year')

fig, ax = plt.subplots(1, 1, figsize=(10, 5))
for name, group in fall_grouped:
    group.plot(x='week', y='diff', ax=ax, label=name)
    # group.plot(x='week', y='col_510', ax=ax[1], label=f"{name} col_510")
    # group.plot(x='week', y='sna_0', ax=ax[1], label=f"{name} sna_0")

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 5))
for name, group in fall_grouped:
    group.plot(x='week', y='sna_0', ax=ax, label=name)

In [47]:
def time_analysis(stats_df1, stats_df2,):
    # merge the two dataframes on year
    merged_df = pd.merge(stats_df1, stats_df2, on='year', suffixes=('_1', '_2'), how='inner')

    time_analysis_df = pd.DataFrame()

    # calculate the difference between the two start dates
    time_analysis_df['year'] = merged_df['year']
    time_analysis_df['start_date_diff'] = (merged_df['start_date_2'] - merged_df['start_date_1']).dt.days
    time_analysis_df['end_date_diff'] = (merged_df['end_date_2'] - merged_df['end_date_1']).dt.days
    time_analysis_df['total_fish_diff'] = merged_df['total_fish_2'] - merged_df['total_fish_1']
    time_analysis_df['5th_date_diff'] = (merged_df['5th_date_2'] - merged_df['5th_date_1']).dt.days
    time_analysis_df['25th_date_diff'] = (merged_df['25th_date_2'] - merged_df['25th_date_1']).dt.days
    time_analysis_df['50th_date_diff'] = (merged_df['50th_date_2'] - merged_df['50th_date_1']).dt.days
    time_analysis_df['75th_date_diff'] = (merged_df['75th_date_2'] - merged_df['75th_date_1']).dt.days
    time_analysis_df['95th_date_diff'] = (merged_df['95th_date_2'] - merged_df['95th_date_1']).dt.days

    return time_analysis_df

In [None]:
priest_rapids_stats_df

In [None]:
mcnary_stats_df

In [52]:
priest_rapids_ta = time_analysis(mcnary_stats_df, priest_rapids_stats_df)
prosser_ta = time_analysis(mcnary_stats_df, prosser_stats_df)
ice_harbor_ta = time_analysis(mcnary_stats_df, ice_harbor_stats_df)

In [None]:
fig, ax = plt.subplots()
priest_rapids_ta.plot(x='year', y='start_date_diff', ax=ax,)
priest_rapids_ta.plot(x='year', y='5th_date_diff', ax=ax, label='Priest Rapids')
prosser_ta.plot(x='year', y='5th_date_diff', ax=ax, label='Prosser')
ice_harbor_ta.plot(x='year', y='5th_date_diff', ax=ax, label='Ice Harbor')


In [None]:
fig, axs = plt.subplots(2, 3, figsize=(15, 15))
axs = axs.flatten()

boxes = []
for i, row in bon_stats_df.iterrows():
    
    boxes.append(
        {
            "label": f"{row['year']}",
            "whislo": (row['5th_date'] -row["start_date"]).days,  # Bottom whisker position
            "q1": (row['25th_date'] -row["start_date"]).days,  # First quartile (25th percentile)
            "med": (row['50th_date'] -row["start_date"]).days,  # Median         (50th percentile)
            "q3": (row['75th_date'] -row["start_date"]).days,  # Third quartile (75th percentile)
            "whishi": (row['95th_date'] -row["start_date"]).days,  # Top whisker position
            "fliers": [],  # Outliers
        }
    )

axs[0].bxp(boxes, showfliers=False, vert=False);
axs[0].set_title("Chinook Fall Migration Timing")

# plot total fish count
axs[1].barh(bon_stats_df['year'], bon_stats_df['total_fish'])
axs[1].set_title("Total Fish Count")
axs[1].set_ylim(1998.5, 2024.5)

# plot max count
axs[2].barh(bon_stats_df['year'], bon_stats_df['max_count'])
axs[2].set_title("Max Fish Count")
axs[2].set_ylim(1998.5, 2024.5)

boxes = []
for i, row in tda_stats_df.iterrows():
    
    boxes.append(
        {
            "label": f"{row['year']}",
            "whislo": (row['5th_date'] -row["start_date"]).days,  # Bottom whisker position
            "q1": (row['25th_date'] -row["start_date"]).days,  # First quartile (25th percentile)
            "med": (row['50th_date'] -row["start_date"]).days,  # Median         (50th percentile)
            "q3": (row['75th_date'] -row["start_date"]).days,  # Third quartile (75th percentile)
            "whishi": (row['95th_date'] -row["start_date"]).days,  # Top whisker position
            "fliers": [],  # Outliers
        }
    )

axs[3].bxp(boxes, showfliers=False, vert=False);
axs[3].set_title("Chinook Fall Migration Timing")

# plot total fish count
axs[4].barh(tda_stats_df['year'], tda_stats_df['total_fish'])
axs[4].set_title("Total Fish Count")
axs[4].set_ylim(1998.5, 2024.5)

# plot max count
axs[5].barh(tda_stats_df['year'], tda_stats_df['max_count'])
axs[5].set_title("Max Fish Count")
axs[5].set_ylim(1998.5, 2024.5)

fig.tight_layout()


In [18]:
bon_tda_temp = pd.read_csv(proj_dir / "Code/notebooks/EDA/bon-tda.csv")
bon_tda_temp['date'] = pd.to_datetime(bon_tda_temp['Date'])
bon_tda_temp = bon_tda_temp[bon_tda_temp['date'].dt.year >= 1999].copy()

bon_tda_temp['year'] = bon_tda_temp['date'].dt.year
bon_tda_temp['month'] = bon_tda_temp['date'].dt.month
bon_tda_temp['day'] = bon_tda_temp['date'].dt.day

fish_dif = bon_stats_df['total_fish'] - tda_stats_df['total_fish']
fish_dif_percent = (bon_stats_df['total_fish'] - tda_stats_df['total_fish']) / bon_stats_df['total_fish'] * 100

lyl_percent = lyl_stats_df['total_fish'] / bon_stats_df['total_fish'] * 100

In [None]:
# lyl_stats_df.plot(x='year', y='total_fish', style='-')
lyl_stats_df

In [None]:
# lyl_stats_df.plot(x='year', y='total_fish', style='-')
lyl_stats_df

In [61]:
grouped_temp = bon_tda_temp.groupby('year')

In [None]:
grouped_temp['year'].unique()

In [None]:
# for each year, plot the temperature from August 1 to November 15
fig, axs = plt.subplots(len(grouped_temp['year'].unique()), 1, figsize=(10, 1*len(grouped_temp['year'].unique())))

for i, (name, group) in enumerate(grouped_temp):
    axs[i].plot(group['date'], group['AvgTemp'])
    axs[i].set_title(name)
    axs[i].axvline(pd.Timestamp(f"{name}-{fall_start_month}-{fall_start_day}"), color='r', linestyle='--')
    axs[i].axvline(pd.Timestamp(f"{name}-{fall_end_month}-{fall_end_day}"), color='r', linestyle='--')

    axs[i].set_xlim(pd.Timestamp(f"{name}-01-01"), pd.Timestamp(f"{name}-12-31"))

    # remove x-axis labels
    axs[i].set_xticklabels([])

fig.tight_layout()

In [None]:
# for each year, plot the temperature from August 1 to November 15
fig, ax = plt.subplots(figsize=(10, 5))

years = []
mean_fall_temp = []

for i, (name, group) in enumerate(grouped_temp):

    group['date'] = pd.to_datetime(group['date'])
    group = group[(group['date']>f"{name}-{fall_start_month}-{fall_start_day}") & (group['date']<f"{name}-{fall_end_month}-{fall_end_day}")].copy()
    group['year'] = group['date'].dt.year
    group['month'] = group['date'].dt.month 
    group['day'] = group['date'].dt.day

    group['time_elapsed'] = (group['date'] - pd.Timestamp(f"{name}-{fall_start_month}-{fall_start_day}")).dt.days

    if not group.empty:
        ax.scatter(name, group['AvgTemp'].mean(), label=name)
        years.append(name)
        mean_fall_temp.append(group['AvgTemp'].mean())

        # print(group)

    # # create a new date column that is this_year-month-day
    # group['modified_date'] = pd.to_datetime(f"2020-{group['month']}-{group['day']}")
    # ax.plot(group['modified_date'], group['AvgTemp'], label=name)



    # # ax.axvline(pd.Timestamp(f"{name}-{fall_start_month}-{fall_start_day}"), color='r', linestyle='--')
    # # ax.axvline(pd.Timestamp(f"{name}-{fall_end_month}-{fall_end_day}"), color='r', linestyle='--')

    # # ax.set_xlim(pd.Timestamp(f"{name}-01-01"), pd.Timestamp(f"{name}-12-31"))

    # # remove x-axis labels
    # ax.set_xticklabels([])

fig.tight_layout()

In [None]:
# plot the fish count difference between BON and TDA
fig, axs = plt.subplots(3, 1, figsize=(10, 7.5), sharex=True)
axs[0].bar(bon_stats_df['year'], fish_dif_percent)
axs[0].set_title("Fish Count Difference (%)")
axs[0].set_ylabel("Percentage difference")

axs[1].bar(bon_stats_df['year'], fish_dif)
axs[1].set_title("Fish Count Difference")
axs[1].set_ylabel(" Count")

axs[2].plot(years, mean_fall_temp)
axs[2].set_title("Mean Fall Temperature")

fig.tight_layout()

In [None]:
fig, ax = plt.subplots()
# plot duration and total fish annotated with the duration
ax.scatter(stats_df['duration'], stats_df['total_fish'])
for i, row in stats_df.iterrows():
    ax.annotate(row['year'], (row['duration'], row['total_fish']))

# ax.scatter(stats_df['duration'], stats_df['max_count'], color='r')
# ax.scatter((stats_df['5th_date'] -stats_df["start_date"]).dt.days, stats_df['total_fish'], color='b')
# ax.set_ylim(0, .8e6)


In [None]:
fig, ax = plt.subplots()
# plot duration and total fish annotated with the duration
# ax.scatter(stats_df['duration'], stats_df['total_fish'])
# for i, row in stats_df.iterrows():
#     ax.annotate(row['duration'], (row['duration'], row['total_fish']))

# ax.scatter(stats_df['duration'], stats_df['max_count'], color='r')
m = ax.scatter((stats_df['95th_date'] -stats_df["start_date"]).dt.days,(stats_df['5th_date'] -stats_df["start_date"]).dt.days, c=stats_df['total_fish'])
# add colorbar
cbar = fig.colorbar(m)
