# First, read in seasonal count data

In [1]:
import pandas as pd

ctrl_counts = pd.read_csv("../table_data/combined_CTRL_daily_count.csv")
ctrl_counts['date'] = pd.to_datetime(ctrl_counts.date)
ctrl_counts = ctrl_counts.set_index('date')

pgw_counts = pd.read_csv("../table_data/combined_PGW_daily_count.csv")
pgw_counts['date'] = pd.to_datetime(pgw_counts.date)
pgw_counts = pgw_counts.set_index('date')

print('CTRL counts', ctrl_counts.sum(axis=0).values)
print('PGW Counts', pgw_counts.sum(axis=0).values)

CTRL counts [ 95. 114. 116. 110.  90. 109. 105.  97.  98. 105. 101. 113.]
PGW Counts [80. 81. 83. 89. 66. 63. 80. 72. 62. 83. 64. 83.]


Significance test for seasonal counts

In [2]:
from scipy.stats import ks_2samp

ks_2samp(ctrl_counts.sum(axis=0).values, pgw_counts.sum(axis=0).values)

KstestResult(statistic=np.float64(1.0), pvalue=np.float64(7.396023010506791e-07), statistic_location=np.float64(89.0), statistic_sign=np.int8(-1))

Seasonal SWE

In [3]:
import pandas as pd

ctrl_swe = pd.read_csv("../table_data/combined_CTRL_daily_swe.csv")
ctrl_swe['date'] = pd.to_datetime(ctrl_swe.date)
ctrl_swe = ctrl_swe.set_index('date')

pgw_swe = pd.read_csv("../table_data/combined_PGW_daily_swe.csv")
pgw_swe['date'] = pd.to_datetime(pgw_swe.date)
pgw_swe = pgw_swe.set_index('date')

print('CTRL swe', ctrl_swe.sum(axis=0).values)
print('PGW swe', pgw_swe.sum(axis=0).values)

CTRL swe [ 821.64770855  839.0103679   833.64716186  967.10251815  896.08847683
  932.25457426 1043.83580724  976.73582836  763.96922754 1126.72004531
  754.67064982 1093.27790239]
PGW swe [580.95776791 571.05052909 589.43063742 697.35459293 557.39370814
 605.17288133 714.28026215 733.84215384 434.35119407 768.99828366
 421.56475585 723.06020675]


In [4]:
ks_2samp(ctrl_swe.sum(axis=0).values, pgw_swe.sum(axis=0).values)

KstestResult(statistic=np.float64(0.9166666666666666), pvalue=np.float64(1.77504552252163e-05), statistic_location=np.float64(733.8421538400088), statistic_sign=np.int8(-1))

In [5]:
ctrl_extent = pd.read_csv("../table_data/CTRL_extent.csv")
pgw_extent = pd.read_csv("../table_data/PGW_extent.csv")

ctrl_extent.head()

Unnamed: 0.1,Unnamed: 0,storm_num,season,start_date_compare,end_date_compare,start_date,end_date,duration,swe,swath_area,swath_area_50p,swath_area_90p
0,0,CTRL_2000_2001_10133,2000_2001,2003-11-18 06:00:00,2003-11-19 15:00:00,2000-11-18 06:00:00,2000-11-19 15:00:00,33.0,1.742072,1505712.0,508080.0,85056.0
1,1,CTRL_2000_2001_10626,2000_2001,2003-11-18 03:00:00,2003-11-23 09:00:00,2000-11-18 03:00:00,2000-11-23 09:00:00,126.0,15.65519,4506304.0,3301936.0,612848.0
2,2,CTRL_2000_2001_12248,2000_2001,2003-11-21 06:00:00,2003-11-22 09:00:00,2000-11-21 06:00:00,2000-11-22 09:00:00,27.0,1.09367,1078928.0,417856.0,38976.0
3,3,CTRL_2000_2001_13818,2000_2001,2003-11-24 12:00:00,2003-11-25 12:00:00,2000-11-24 12:00:00,2000-11-25 12:00:00,24.0,1.165468,958720.0,531536.0,6032.0
4,4,CTRL_2000_2001_14284,2000_2001,2003-11-26 21:00:00,2003-11-30 15:00:00,2000-11-26 21:00:00,2000-11-30 15:00:00,90.0,6.564101,1376528.0,932704.0,262400.0


Durations significance test

(I'm just showing the seasonal values as an example.. the test is for all swaths)

In [6]:
print('CTRL duration', ctrl_extent.groupby('season')['duration'].sum())
print('PGW swe', pgw_extent.groupby('season')['duration'].sum())

ks_2samp(ctrl_extent['duration'].values, pgw_extent['duration'].values)

CTRL duration season
2000_2001    5565.0
2001_2002    5994.0
2002_2003    5979.0
2003_2004    6159.0
2005_2006    5007.0
2006_2007    5739.0
2007_2008    5694.0
2008_2009    5526.0
2009_2010    5457.0
2010_2011    6525.0
2011_2012    4923.0
2012_2013    6771.0
Name: duration, dtype: float64
PGW swe season
2000_2001    4335.0
2001_2002    4281.0
2002_2003    4134.0
2003_2004    4515.0
2005_2006    3609.0
2006_2007    3732.0
2007_2008    4443.0
2008_2009    4326.0
2009_2010    3342.0
2010_2011    4599.0
2011_2012    3120.0
2012_2013    4869.0
Name: duration, dtype: float64


KstestResult(statistic=np.float64(0.04567228497081618), pvalue=np.float64(0.2141664039812162), statistic_location=np.float64(57.0), statistic_sign=np.int8(-1))

SWE significance test

(I'm just showing the seasonal values as an example.. the test is for all swaths)

In [7]:
print('CTRL swe', ctrl_extent.groupby('season')['swe'].sum())
print('PGW swe', pgw_extent.groupby('season')['swe'].sum())

ks_2samp(ctrl_extent['swe'].values, pgw_extent['swe'].values)

CTRL swe season
2000_2001     821.647709
2001_2002     839.010368
2002_2003     833.647162
2003_2004     967.102518
2005_2006     896.088477
2006_2007     932.254574
2007_2008    1043.883109
2008_2009     976.735828
2009_2010     763.969228
2010_2011    1127.141805
2011_2012     754.670650
2012_2013    1093.851352
Name: swe, dtype: float64
PGW swe season
2000_2001    580.957768
2001_2002    571.050529
2002_2003    589.430637
2003_2004    697.354593
2005_2006    557.393708
2006_2007    605.172881
2007_2008    714.338331
2008_2009    733.842154
2009_2010    434.351194
2010_2011    768.998284
2011_2012    421.564756
2012_2013    723.060207
Name: swe, dtype: float64


KstestResult(statistic=np.float64(0.04485746350040257), pvalue=np.float64(0.23153107793979597), statistic_location=np.float64(8.364897044691443), statistic_sign=np.int8(-1))

Swath area significance test

(I'm just showing the seasonal values as an example.. the test is for all swaths)

In [8]:
print('CTRL area', ctrl_extent.groupby('season')['swath_area'].sum() * 10**-6)
print('PGW area', pgw_extent.groupby('season')['swath_area'].sum() * 10**-6)

ks_2samp(ctrl_extent['swath_area'].values, pgw_extent['swath_area'].values)

CTRL area season
2000_2001    183.935344
2001_2002    195.118000
2002_2003    195.471520
2003_2004    210.799008
2005_2006    171.329472
2006_2007    186.769584
2007_2008    217.683104
2008_2009    195.167472
2009_2010    144.273568
2010_2011    221.989888
2011_2012    176.304000
2012_2013    214.452848
Name: swath_area, dtype: float64
PGW area season
2000_2001    128.800432
2001_2002    122.540688
2002_2003    130.512992
2003_2004    135.793120
2005_2006    101.210928
2006_2007    108.515136
2007_2008    134.225808
2008_2009    133.107296
2009_2010     83.105920
2010_2011    144.078880
2011_2012     95.413760
2012_2013    134.343824
Name: swath_area, dtype: float64


KstestResult(statistic=np.float64(0.09200964043910509), pvalue=np.float64(0.000249220278462126), statistic_location=np.float64(1373024.0), statistic_sign=np.int8(-1))

50th percentile swath area sig. test

(I'm just showing the seasonal values as an example.. the test is for all swaths)

In [9]:
print('CTRL area', ctrl_extent.groupby('season')['swath_area_50p'].sum() * 10**-6)
print('PGW area', pgw_extent.groupby('season')['swath_area_50p'].sum() * 10**-6)

ks_2samp(ctrl_extent['swath_area_50p'].values, pgw_extent['swath_area_50p'].values)

CTRL area season
2000_2001    135.141136
2001_2002    141.409680
2002_2003    142.408336
2003_2004    158.604336
2005_2006    131.290096
2006_2007    140.526288
2007_2008    164.893360
2008_2009    150.608544
2009_2010    106.578096
2010_2011    166.952528
2011_2012    135.325792
2012_2013    159.574944
Name: swath_area_50p, dtype: float64
PGW area season
2000_2001     97.408288
2001_2002     91.392528
2002_2003     97.540512
2003_2004    105.564080
2005_2006     77.351808
2006_2007     83.988992
2007_2008    105.491792
2008_2009    106.342352
2009_2010     61.100784
2010_2011    110.191504
2011_2012     73.996176
2012_2013    104.673888
Name: swath_area_50p, dtype: float64


KstestResult(statistic=np.float64(0.0693435093523887), pvalue=np.float64(0.011967624430912422), statistic_location=np.float64(1514672.0), statistic_sign=np.int8(-1))

90th percentile swath area sig. test

(I'm just showing the seasonal values as an example.. the test is for all swaths)

In [10]:
print('CTRL area', ctrl_extent.groupby('season')['swath_area_90p'].sum() * 10**-6)
print('PGW area', pgw_extent.groupby('season')['swath_area_90p'].sum() * 10**-6)

ks_2samp(ctrl_extent['swath_area_90p'].values, pgw_extent['swath_area_90p'].values)

CTRL area season
2000_2001    40.729920
2001_2002    48.824816
2002_2003    46.007664
2003_2004    55.292368
2005_2006    49.361712
2006_2007    53.965520
2007_2008    60.220496
2008_2009    58.547408
2009_2010    36.650096
2010_2011    62.639360
2011_2012    44.018000
2012_2013    60.417744
Name: swath_area_90p, dtype: float64
PGW area season
2000_2001    30.872496
2001_2002    32.303632
2002_2003    33.252736
2003_2004    40.522320
2005_2006    30.370960
2006_2007    33.487232
2007_2008    42.048560
2008_2009    44.875584
2009_2010    21.596544
2010_2011    43.275728
2011_2012    25.768688
2012_2013    43.532944
Name: swath_area_90p, dtype: float64


KstestResult(statistic=np.float64(0.08430539332533488), pvalue=np.float64(0.001048981649510773), statistic_location=np.float64(73424.0), statistic_sign=np.int8(1))