# DTW and Zero Flow Analysis Code
Written by Danielle Tadych

 The purpose of this script is to determine if there is a statistical relationship between DTW and number of zero flow days for reaches

## Loading packages and files

In [5]:
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gp
import scipy.stats as sp
import sys

# Add the path to the Utils folder
utils_path = os.path.abspath(os.path.join('..', 'Utils'))
if utils_path not in sys.path:
    sys.path.append(utils_path)

import CustomFunctions as cf


# Data paths
datapath = '../../Data'
inputpath = '../../Data/Input'
outputpath = '../../Data/Output/Local'
shapepath = inputpath+'/Shapefiles'
figurepath = '../../Figures/Local/'

In [46]:
# %%  ==== Reading in the data ====
# Define some things about your file

StudyArea = "HUC12s"
Naming_col = "huc12"
distance = 2000

# Now to read in the streamflow data
filename_ts = f'/{Naming_col}_average_flow_onlyoutlets.csv'
annual_db = pd.read_csv(outputpath+filename_ts, header=1, index_col=0)

#Block to do some formatting
annual_db = annual_db[0:168102]
annual_db.index = annual_db.index.astype('int64')
zero_flow = annual_db.reset_index(inplace=True)
zero_flow = annual_db.rename(columns = {'Year':'index'})
zero_flow.set_index('index', inplace=True)
print(zero_flow.head())

# Read in the annual time series database
filepath = outputpath + f'{StudyArea}bufferedflowlines{distance}_MedianYearlyDTW.csv'
dtw = pd.read_csv(filepath, index_col=0)
dtw = dtw.transpose()
dtw.index = dtw.index.astype('int64')
dtw.reset_index(inplace=True)
dtw = dtw.rename(columns = {'year':'index'})
dtw.set_index('index', inplace=True)
# print("Water Levels for "+StudyArea+" Analysis loaded.")
# dtw


              1980  1985  1986  1987  1988  1989  1990  1991  1992  1993  ...  \
index                                                                     ...   
150100010508   NaN   0.0   NaN   NaN   NaN   0.0   0.0   0.0   0.0   0.0  ...   
150100010607   NaN   NaN   NaN   NaN   NaN   NaN   0.0   0.0   0.0   0.0  ...   
150100010608   NaN   NaN   NaN   NaN   NaN   NaN   0.0   0.0   0.0   0.0  ...   
150100020101   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  ...   
150100020405   NaN   NaN   NaN   NaN   NaN   0.0   0.0   0.0   0.0   0.0  ...   

              2016  2017  2018  2019  2020  2021  2022  2023  2024  2025  
index                                                                     
150100010508   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  
150100010607   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  
150100010608   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  
150100020101   NaN   0.0   0.0   0.0   0.0   0.0   0.0   

In [47]:
dtw

year,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
150100070107,139.925,143.80,143.94,144.30,144.60,144.550,145.000,145.20,145.000,144.600,...,146.50,146.90,146.50,147.40,147.40,161.400,168.400,148.40,147.70,
150100070108,139.925,143.80,143.94,144.30,144.60,144.550,145.000,145.20,145.000,144.600,...,146.50,146.90,146.50,147.40,147.40,161.400,168.400,148.40,147.70,
150100070110,133.150,143.80,143.94,79.65,144.60,124.000,145.000,134.70,145.000,144.600,...,146.50,146.90,146.50,147.40,147.40,161.400,168.400,148.40,147.70,
150100100208,10.000,55.00,54.90,26.00,40.15,60.000,55.000,58.00,50.000,42.000,...,,,16.00,,,,,28.00,,
150100100312,63.100,55.27,54.90,26.00,40.15,60.000,55.000,52.00,32.100,34.500,...,,,,,,,,28.00,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150801020310,79.420,78.60,79.55,81.10,79.90,80.615,81.575,82.62,83.385,84.490,...,,,,,,,,,,
150803010305,,,,55.75,10.60,9.000,15.160,,,14.175,...,7.19,10.87,13.06,6.13,10.64,7.335,9.305,4.63,5.29,10.13
150803010307,,,85.00,11.50,10.60,9.000,15.160,,40.000,16.850,...,7.19,10.87,13.06,6.13,10.64,7.335,9.305,4.63,5.29,10.13
150803010310,51.000,90.00,55.00,57.00,65.50,70.000,60.000,59.50,59.000,57.400,...,61.20,,87.00,92.80,,,67.200,,58.00,


In [48]:
# %% Creating colors
c_1 = '#8d5a99' # Reservation
c_2 = "#d7191c" # Regulated with CAP (Water Category Color)
c_3 = '#e77a47' # Regulated without CAP (Water Category Color)
c_4 = '#2cbe21' # Lower CO River - SW (Water Category Color)
c_5 = '#2f8c73' # Upper CO River - Mixed (Water Category Color)
c_6 = '#6db7e8' # SE - GW
c_7 = '#165782' # NW - GW (Water Category color)
c_8 = '#229ce8' # SC - GW
c_9 = '#1f78b4' # NE - GW
c_10 = '#41bf9e' # N - Mixed
c_11 = '#7adec4' # C - Mixed
drought_color = '#ffa6b8'
wet_color = '#b8d3f2'

reg_colors = [c_2,c_7]
georeg_colors = [c_1,c_2,c_3,c_4,c_5,c_6,c_7,c_8,c_9,c_10,c_11]
SW_colors = [c_2,c_3,c_4,c_5,c_7]

bar_watercatc = [c_2,c_3,c_4,c_5,c_7]


# Color blind palette
# https://jacksonlab.agronomy.wisc.edu/2016/05/23/15-level-colorblind-friendly-palette/
cblind =["#000000","#004949","#009292","#ff6db6","#ffb6db",
 "#490092","#006ddb","#b66dff","#6db6ff","#b6dbff",
 "#920000","#924900","#db6d00","#24ff24","#ffff6d"]

# Matching new map

grace_color = '#A92B2A'
az_wells_color = '#1D4E77'

cap = '#77A8A5' #'#C6652B'
cap_secondary = '#004D40'
# noCAP = '#EDE461' # This is one from the map
noCAP = '#CCC339' # This color but darker for lines
GWdom = '#3B76AF'
mixed = '#6EB2E4'
swdom = '#469B76'

In [25]:
dtw.info()

<class 'pandas.core.frame.DataFrame'>
Index: 235 entries, 150100070107 to 150803010407
Data columns (total 45 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   1980    173 non-null    float64
 1   1981    202 non-null    float64
 2   1982    202 non-null    float64
 3   1983    203 non-null    float64
 4   1984    204 non-null    float64
 5   1985    205 non-null    float64
 6   1986    210 non-null    float64
 7   1987    223 non-null    float64
 8   1988    202 non-null    float64
 9   1989    209 non-null    float64
 10  1990    212 non-null    float64
 11  1991    194 non-null    float64
 12  1992    200 non-null    float64
 13  1993    211 non-null    float64
 14  1994    210 non-null    float64
 15  1995    227 non-null    float64
 16  1996    220 non-null    float64
 17  1997    218 non-null    float64
 18  1998    216 non-null    float64
 19  1999    224 non-null    float64
 20  2000    218 non-null    float64
 21  2001    221 non-null    

In [55]:
zero_flow.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 54 entries, 150100010508 to 150803010412
Data columns (total 42 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   1980    2 non-null      float64
 1   1985    1 non-null      float64
 2   1986    3 non-null      float64
 3   1987    19 non-null     float64
 4   1988    23 non-null     float64
 5   1989    25 non-null     float64
 6   1990    31 non-null     float64
 7   1991    31 non-null     float64
 8   1992    30 non-null     float64
 9   1993    34 non-null     float64
 10  1994    31 non-null     float64
 11  1995    33 non-null     float64
 12  1996    32 non-null     float64
 13  1997    32 non-null     float64
 14  1998    32 non-null     float64
 15  1999    30 non-null     float64
 16  2000    32 non-null     float64
 17  2001    34 non-null     float64
 18  2002    37 non-null     float64
 19  2003    37 non-null     float64
 20  2004    38 non-null     float64
 21  2005    40 non-null 

In [50]:
# Find common HUC12s in both datasets
common_hucs = dtw.index.intersection(zero_flow.index)
common_hucs

Int64Index([150100100208, 150100100602, 150200020103, 150200080206,
            150301070105, 150502020507, 150503010908, 150503040405,
            150601060306, 150602020108, 150701020206, 150701020607,
            150701030311, 150701030507, 150702011502],
           dtype='int64', name='index')

In [82]:
# Subset both datasets to only include common HUC12s
dtw_df = dtw.loc[common_hucs]
zero_flow_df = zero_flow.loc[common_hucs]


In [83]:
zero_flow_df = zero_flow_df.iloc[:, :-1]  # Drops the last column


In [77]:
# Compute row-wise correlation
# correlations = dtw_df.corrwith(zero_flow_df,method='kendall', axis=1)

# Convert to numeric (just in case)
dtw_df = dtw_df.apply(pd.to_numeric, errors="coerce")
zero_flow_df = zero_flow_df.apply(pd.to_numeric, errors="coerce")

# Compute row-wise correlation while ignoring NaNs
correlations = dtw_df.corrwith(zero_flow_df, axis=1, drop=True)
correlations

index
150100100208   NaN
150100100602   NaN
150200020103   NaN
150200080206   NaN
150301070105   NaN
150502020507   NaN
150503010908   NaN
150503040405   NaN
150601060306   NaN
150602020108   NaN
150701020206   NaN
150701020607   NaN
150701030311   NaN
150701030507   NaN
150702011502   NaN
dtype: float64

In [78]:
dtw_df

year,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,...,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
150100100208,10.0,55.0,54.9,26.0,40.15,60.0,55.0,58.0,50.0,42.0,...,,,16.0,,,,,28.0,,
150100100602,10.0,,,10.0,25.0,10.0,,25.0,7.0,5.0,...,,18.2,,,,,,28.0,,
150200020103,,15.0,,,45.0,54.0,33.5,20.0,43.5,130.0,...,22.5,,,35.0,15.0,,25.0,,,
150200080206,19.5,107.0,24.0,13.0,,33.0,,,,,...,,,,,,,,,,
150301070105,11.0,,11.5,14.0,10.5,11.5,8.0,15.5,15.0,16.0,...,79.0,,13.0,16.0,,,,,,
150502020507,40.0,46.0,33.0,51.0,90.0,47.5,44.0,59.9,44.0,62.5,...,,45.0,,,63.0,,325.0,80.0,,
150503010908,110.0,78.85,84.4,,30.0,66.8,76.6,101.55,99.0,0.0,...,159.345,119.8,119.0,119.1,114.795,,,,,
150503040405,,604.9,603.0,,385.0,605.15,604.3,385.0,603.85,150.0,...,382.17,269.86,381.37,381.1,162.12,601.65,601.3,601.15,601.5,600.9
150601060306,0.0,24.35,13.5,43.0,7.0,22.65,38.0,42.9,52.0,53.5,...,55.0,,79.7,98.9,34.5,30.5,63.5,83.5,29.38,
150602020108,74.25,74.25,80.0,109.5,100.5,94.85,80.6,43.0,99.4,90.5,...,107.87,91.01,108.87,109.31,97.24,109.7,110.01,110.0,108.89,108.71


In [79]:
zero_flow_df

Unnamed: 0_level_0,1980,1985,1986,1987,1988,1989,1990,1991,1992,1993,...,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
150100100208,,,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
150100100602,,,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
150200020103,,,,0.0,0.0,0.0,0.0,0.0,,0.0,...,313.0,319.0,260.0,291.0,228.0,239.0,310.0,312.0,220.0,250.0
150200080206,,,,,,,,,,,...,4.0,44.0,56.0,96.0,87.0,0.0,0.0,0.0,0.0,0.0
150301070105,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
150502020507,,,,,,,78.0,63.0,,,...,0.0,141.0,196.0,268.0,189.0,228.0,298.0,195.0,243.0,327.0
150503010908,,,,,,3.0,41.0,0.0,122.0,2.0,...,191.0,96.0,65.0,10.0,88.0,0.0,0.0,0.0,2.0,0.0
150503040405,,,,,,,,,137.0,214.0,...,338.0,347.0,341.0,341.0,330.0,353.0,334.0,351.0,354.0,341.0
150601060306,,,,,,,,,,,...,,,,,,,,,,
150602020108,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [84]:
from scipy.stats import spearmanr, pearsonr

# Convert to numeric
dtw_df = dtw_df.apply(pd.to_numeric, errors="coerce")
zero_flow_df = zero_flow_df.apply(pd.to_numeric, errors="coerce")

# Compute correlation for each HUC12 using SciPy's spearmanr or pearsonr
correlation_results = {}

for huc in common_hucs:
    dtw_values = dtw_df.loc[huc].dropna()  # Remove NaNs
    zero_flow_values = zero_flow_df.loc[huc].dropna()  # Remove NaNs
    
    # Find years that exist in both datasets after dropping NaNs
    common_years = dtw_values.index.intersection(zero_flow_values.index)
    
    if len(common_years) > 1:  # Need at least 2 years for correlation
        corr, _ = spearmanr(dtw_values[common_years], zero_flow_values[common_years], nan_policy='omit')
        correlation_results[huc] = corr
    else:
        correlation_results[huc] = np.nan  # Not enough data

# Convert to DataFrame and save results
correlations = pd.Series(correlation_results)

In [85]:
correlations

150100100208   NaN
150100100602   NaN
150200020103   NaN
150200080206   NaN
150502020507   NaN
150503010908   NaN
150503040405   NaN
150601060306   NaN
150701020206   NaN
150701020607   NaN
150701030311   NaN
150701030507   NaN
150702011502   NaN
dtype: float64