In [1]:
import numpy as np
import pandas as pd
import tools
import pickle

In [32]:
class splitsville():
    
    def __init__(self):
        self.split_years={}
        self.n_years_list = {'test':[], 'val':[], 'train':[]}
        self.dates = {'train':{},'val':{}, 'test':{}}
        with open(tools.get_main_dir()+'ri-calc.csv') as f:
            self.df = pd.read_csv(f)
    
    #___________________________________________________________________________
    def prepare_peakflows(self, pf, peakflows):

        # Don't include years before 1980
        if int(str(np.array(peakflows.iloc[pf,:])[0]).split('-')[0]) < 1980:
            return pd.to_datetime('1900-01-01')
        
         #  some of the early months/days are not known, so are input as zero!!!
         #  Just put a month/day so the the water year is the provided calendar year.
        try:
            date_of_peak = pd.to_datetime(np.array(peakflows.iloc[pf,:])[0])
        except:
            datestr=str(np.array(peakflows.iloc[pf,:])[0]).split('-')
            if datestr[1] == '00':
                if datestr[2] == '00':
                    date_of_peak = pd.to_datetime(datestr[0]+'-12-31')
                else:
                    date_of_peak = pd.to_datetime('1900-01-01')
            else:
                date_of_peak = pd.to_datetime('1900-01-01')
                
        return date_of_peak
    #___________________________________________________________________________    
    def split_year_to_test_val_train(self, annual_peak, ri, water_year_of_peak, basin_str):
        # Now split up the years into train (25-75)
        try:   ### Annual Peak might be NaN or some other non-numeric. So skip...
            if isinstance(annual_peak, str):
                annual_peak = float(annual_peak.strip())
            if float(annual_peak) < ri[5.0]:
                if int(water_year_of_peak) > 1990 and int(water_year_of_peak) < 2015:
                    self.split_years[basin_str]['train'].append(int(water_year_of_peak))
        except:
            print('basin_str', basin_str)
            print('annual_peak{}annual_peak'.format(annual_peak))
            print('type(annual_peak)', type(annual_peak))
            print('float(annual_peak)', float(annual_peak))
            print('water_year_of_peak', water_year_of_peak)
            print('type(water_year_of_peak)', type(water_year_of_peak))
            print('int(water_year_of_peak)', int(water_year_of_peak))
            print('self.split_years[basin_str][train]', self.split_years[basin_str]['train'])
            return 0

        try:
            if isinstance(annual_peak, str):
                annual_peak = float(annual_peak.strip())
            if float(annual_peak) >= ri.loc[10.0] and annual_peak < ri[25.0]:
                self.split_years[basin_str]['val'].append(int(water_year_of_peak))
        except:
            print('basin_str', basin_str)
            print('annual_peak{}annual_peak'.format(annual_peak))
            print('type(annual_peak)', type(annual_peak))
            print('float(annual_peak)', float(annual_peak))
            print('water_year_of_peak', water_year_of_peak)
            print('type(water_year_of_peak)', type(water_year_of_peak))
            print('int(water_year_of_peak)', int(water_year_of_peak))
            print('self.split_years[basin_str][val]', self.split_years[basin_str]['val'])
            return 0

        try:
            if isinstance(annual_peak, str):
                annual_peak = float(annual_peak.strip())
            if float(annual_peak) >= ri[25.0]:
                self.split_years[basin_str]['test'].append(int(water_year_of_peak))
        except:
            print('basin_str', basin_str)
            print('annual_peak{}annual_peak'.format(annual_peak))
            print('type(annual_peak)', type(annual_peak))
            print('float(annual_peak)', float(annual_peak))
            print('water_year_of_peak', water_year_of_peak)
            print('type(water_year_of_peak)', type(water_year_of_peak))
            print('int(water_year_of_peak)', int(water_year_of_peak))
            print('self.split_years[basin_str][test]', self.split_years[basin_str]['test'])
            return 0
        
        return 1
    
    #___________________________________________________________________________    
    def clean_test_val_train_lists(self, basin_str):
        # We need to go through both the train and test sets
        # and remove years that cannot maintain a 1 year gap.
        # For instance, we can not train 1980 and test 1981

        for i in self.split_years[basin_str]['train']:
            if i-1 in self.split_years[basin_str]['test']:
                self.split_years[basin_str]['train'].remove(i)
                
        for i in self.split_years[basin_str]['test']:
            if i-1 in self.split_years[basin_str]['train']:
                self.split_years[basin_str]['train'].remove(i-1)
                
        # Take only N years for training
        self.split_years[basin_str]['train'] = self.split_years[basin_str]['train'][0:13]
                
    #___________________________________________________________________________
    def append_dates(self, basin_str):
        for split_type in ['test', 'train', 'val']:
            for i in self.split_years[basin_str][split_type]:
                self.dates[split_type][basin_str]['start_dates'].append(pd.to_datetime('01/10/{}'.format(i-1)))
                self.dates[split_type][basin_str]['end_dates'].append(pd.to_datetime('30/09/{}'.format(i)))
        
    #___________________________________________________________________________
    def summarize_dates(self, basin_str):
        print(basin_str)
        print('_____________________________')
        for split_type in ['test', 'train', 'val']:
            self.n_years_list[split_type].append(len(self.split_years[basin_str][split_type]))
            print('Number of {} years'.format(split_type), len(self.split_years[basin_str][split_type]))
            print('{} years'.format(split_type), self.split_years[basin_str][split_type])
        print('\n')
    
    #___________________________________________________________________________
    def do_splits(self):
        for basin in self.df.gauge_id:
            basin_str = tools.gauge_id_str(basin)
            
            for split_type in ['test', 'train', 'val']:
                self.dates[split_type][basin_str] = {'start_dates':[], 'end_dates':[]}
            
        #    print(basin_str)

            ri = tools.read_b17(basin_str)
        #    print(ri)

            peakflows = tools.read_peak_flows(basin_str)

            low_flow_cutoff = tools.closest_value(1.1,ri.index.values)

            # Loop through the peaks, and assign the years to either test or train
            self.split_years[basin_str] = {'test':[], 'val':[], 'train':[]}

            for pf in range(peakflows.shape[0]):

                date_of_peak = self.prepare_peakflows(pf, peakflows)
                if date_of_peak.year < 1980:
                    continue
                
                annual_peak = np.array(peakflows.iloc[pf,:])[1]

                water_year_of_peak = tools.get_water_year(date_of_peak.year, date_of_peak.month)

                good_split = self.split_year_to_test_val_train(annual_peak,ri,water_year_of_peak,basin_str)
                if good_split == 0:
                    continue

                self.clean_test_val_train_lists(basin_str)

            self.append_dates(basin_str)

            self.summarize_dates(basin_str)

    #___________________________________________________________________________
    def clean_start_end_lists(self):
        for basin in self.df.gauge_id:
            basin_str = tools.gauge_id_str(basin)
                        
            addsubtract = [-1, 1]

            for split_type in ['test', 'train', 'val']:
                temp_dates = self.dates[split_type][basin_str]

                for i, start_end in enumerate(['start_dates','end_dates']):
                    tempdays = []
                    tempdayyears = []
                    for d in temp_dates[start_end]:
                        tempdayyears.append(d.year)
                    for d in temp_dates[start_end]:
                        if start_end == 'start_dates':
                            if len(tempdays) == 0:
                                tempdays.append(d)
                                continue
                        if d.year + addsubtract[i] in tempdayyears:
                            continue
                        else:
                            tempdays.append(d)
                    self.dates[split_type][basin_str][start_end] = tempdays

#                 enddays = []
#                 enddayyears = []
#                 for d in temp_dates['end_dates']:
#                     enddayyears.append(d.year)
#                 for d in temp_dates['end_dates']:
#                     if d.year < 1981:
#                         continue
#                     if len(enddays) == 0:
#                         enddays.append(d)
#                         continue
#                     if d.year + 1 in enddayyears:
#                         continue
#                     else:
#                         enddays.append(d)
#                 self.dates[split_type][basin_str]['end_dates'] = enddays

In [33]:
split_these = splitsville()

In [34]:
split_these.do_splits()

01466500
_____________________________
Number of test years 2
test years [2004, 2011]
Number of train years 13
train years [1992, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2006, 2008, 2009]
Number of val years 1
val years [1991]


10244950
_____________________________
Number of test years 3
test years [1983, 1985, 2005]
Number of train years 13
train years [1991, 1992, 1993, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2007]
Number of val years 2
val years [1984, 2011]


10172800
_____________________________
Number of test years 1
test years [1998]
Number of train years 13
train years [1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2003, 2004, 2007, 2008, 2009]
Number of val years 6
val years [1983, 1984, 1986, 1995, 2005, 2011]


09378630
_____________________________
Number of test years 0
test years []
Number of train years 13
train years [1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004]
Number of val years 5
val years [1980, 1983

08269000
_____________________________
Number of test years 0
test years []
Number of train years 13
train years [1992, 1993, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2006, 2007]
Number of val years 2
val years [1984, 1991]


06409000
_____________________________
Number of test years 1
test years [2002]
Number of train years 13
train years [1991, 1992, 1993, 1994, 1997, 2004, 2005, 2006, 2007, 2008, 2009, 2011, 2012]
Number of val years 4
val years [1996, 2000, 2003, 2010]


10234500
_____________________________
Number of test years 2
test years [1984, 1995]
Number of train years 13
train years [1991, 1992, 1993, 1997, 1999, 2000, 2001, 2002, 2003, 2004, 2006, 2007, 2008]
Number of val years 4
val years [1983, 2005, 2011, 2019]


09404450
_____________________________
Number of test years 2
test years [2011, 2013]
Number of train years 13
train years [1991, 1992, 1993, 1994, 1995, 1997, 2000, 2001, 2002, 2003, 2004, 2006, 2007]
Number of val years 3
val years [2012, 2014

05120500
_____________________________
Number of test years 1
test years [2009]
Number of train years 13
train years [1991, 1992, 1993, 1994, 1998, 2000, 2002, 2003, 2004, 2005, 2006, 2007, 2012]
Number of val years 3
val years [1995, 1996, 2011]


02324400
_____________________________
Number of test years 0
test years []
Number of train years 13
train years [1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2005]
Number of val years 1
val years [2003]


02312200
_____________________________
Number of test years 0
test years []
Number of train years 13
train years [1991, 1992, 1993, 1994, 1995, 1997, 1999, 2000, 2001, 2002, 2005, 2006, 2007]
Number of val years 1
val years [2019]


06221400
_____________________________
Number of test years 1
test years [2017]
Number of train years 13
train years [1991, 1992, 1993, 1994, 1996, 1998, 2000, 2001, 2002, 2003, 2005, 2006, 2007]
Number of val years 3
val years [2004, 2018, 2019]


01487000
___________________________

05123400
_____________________________
Number of test years 0
test years []
Number of train years 13
train years [1991, 1992, 1993, 1994, 1997, 1998, 2000, 2002, 2003, 2004, 2007, 2008, 2010]
Number of val years 4
val years [1999, 2005, 2011, 2017]


11237500
_____________________________
Number of test years 2
test years [1996, 1997]
Number of train years 13
train years [1991, 1992, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2007, 2008, 2009, 2010]
Number of val years 3
val years [2006, 2017, 2018]


01545600
_____________________________
Number of test years 1
test years [1994]
Number of train years 13
train years [1991, 1992, 1998, 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2008, 2009]
Number of val years 4
val years [1984, 1996, 1997, 2004]


06224000
_____________________________
Number of test years 4
test years [1981, 2010, 2011, 2017]
Number of train years 13
train years [1992, 1993, 1994, 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2006]
Number of val years 2
val

01549500
_____________________________
Number of test years 1
test years [1996]
Number of train years 13
train years [1991, 1992, 1998, 2000, 2001, 2002, 2003, 2006, 2007, 2008, 2009, 2010, 2012]
Number of val years 2
val years [1984, 1994]


14020000
_____________________________
Number of test years 2
test years [1996, 1997]
Number of train years 13
train years [1992, 1994, 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010]
Number of val years 4
val years [1986, 2013, 2014, 2017]


10258500
_____________________________
Number of test years 0
test years []
Number of train years 13
train years [1991, 1991, 1992, 1996, 1997, 1999, 2000, 2001, 2003, 2003, 2004, 2008, 2008]
Number of val years 4
val years [1980, 1983, 1993, 2019]


01567500
_____________________________
Number of test years 3
test years [1989, 2004, 2011]
Number of train years 13
train years [1991, 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002, 2007, 2008]
Number of val years 2
val years [1984

12048000
_____________________________
Number of test years 1
test years [2002]
Number of train years 13
train years [1993, 1994, 1996, 1998, 1999, 2000, 2005, 2006, 2007, 2008, 2009, 2010, 2012]
Number of val years 4
val years [1986, 1991, 1997, 2004]


03078000
_____________________________
Number of test years 1
test years [1996]
Number of train years 13
train years [1991, 1992, 1993, 1994, 1998, 1999, 2001, 2002, 2004, 2005, 2006, 2007, 2008]
Number of val years 2
val years [1986, 2018]


04256000
_____________________________
Number of test years 2
test years [1982, 1985]
Number of train years 13
train years [1991, 1995, 1997, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008]
Number of val years 3
val years [1984, 1993, 1998]


11476600
_____________________________
Number of test years 1
test years [1997]
Number of train years 13
train years [1991, 1992, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007, 2008]
Number of val years 4
val years [1983, 1995, 2003, 

12147500
_____________________________
Number of test years 1
test years [2009]
Number of train years 13
train years [1992, 1994, 1995, 1997, 1998, 1999, 2001, 2002, 2003, 2005, 2006, 2011, 2012]
Number of val years 5
val years [1993, 1996, 2000, 2004, 2015]


02299950
_____________________________
Number of test years 3
test years [1998, 2003, 2017]
Number of train years 13
train years [1993, 1994, 1995, 1996, 2000, 2005, 2006, 2007, 2009, 2010, 2011, 2012, 2014]
Number of val years 2
val years [1988, 1992]


02327100
_____________________________
Number of test years 2
test years [2000, 2012]
Number of train years 13
train years [1992, 1993, 1995, 1996, 1997, 1998, 2003, 2004, 2005, 2006, 2007, 2008, 2010]
Number of val years 0
val years []


11264500
_____________________________
Number of test years 2
test years [1997, 2018]
Number of train years 13
train years [1991, 1992, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2007, 2008, 2009, 2012]
Number of val years 2
val years [1996, 2005

05556500
_____________________________
Number of test years 3
test years [1994, 1997, 2008]
Number of train years 12
train years [1991, 1992, 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2010, 2011, 2014]
Number of val years 0
val years []


01580000
_____________________________
Number of test years 0
test years []
Number of train years 13
train years [1991, 1992, 1993, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2005, 2008]
Number of val years 5
val years [1985, 1989, 1994, 1996, 2011]


02065500
_____________________________
Number of test years 2
test years [1987, 1996]
Number of train years 13
train years [1991, 1992, 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2009]
Number of val years 6
val years [1985, 1993, 1994, 2003, 2014, 2019]


06453600
_____________________________
Number of test years 0
test years []
Number of train years 13
train years [1991, 1994, 1997, 1998, 2000, 2002, 2003, 2004, 2005, 2006, 2009, 2011, 2012]
Number of val years 4
val years [1993, 

08109700
_____________________________
Number of test years 0
test years []
Number of train years 13
train years [1991, 1993, 1994, 1996, 1997, 1998, 2000, 2001, 2004, 2006, 2007, 2008, 2009]
Number of val years 4
val years [1992, 2003, 2016, 2019]


02070000
_____________________________
Number of test years 2
test years [1985, 1987]
Number of train years 13
train years [1991, 1993, 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006]
Number of val years 2
val years [1986, 2010]


07299670
_____________________________
Number of test years 1
test years [1984]
Number of train years 13
train years [1992, 1993, 1994, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006]
Number of val years 4
val years [1991, 1995, 2007, 2009]


02245500
_____________________________
Number of test years 1
test years [2017]
Number of train years 13
train years [1991, 1992, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2005, 2006]
Number of val years 2
val years [1997, 2013]


04

05525500
_____________________________
Number of test years 0
test years []
Number of train years 13
train years [1992, 1993, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006]
Number of val years 3
val years [1994, 2008, 2018]


07145700
_____________________________
Number of test years 0
test years []
Number of train years 13
train years [1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2004]
Number of val years 3
val years [1999, 2008, 2009]


01634500
_____________________________
Number of test years 1
test years [1996]
Number of train years 13
train years [1991, 1992, 1994, 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007, 2008]
Number of val years 1
val years [2013]


07261000
_____________________________
Number of test years 1
test years [1983]
Number of train years 13
train years [1991, 1992, 1993, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2005]
Number of val years 1
val years [2017]


03238500
___________________________

08082700
_____________________________
Number of test years 1
test years [2005]
Number of train years 13
train years [1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2007, 2008]
Number of val years 3
val years [1982, 2018, 2019]


02143000
_____________________________
Number of test years 0
test years []
Number of train years 13
train years [1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2005]
Number of val years 2
val years [1991, 2004]


07315200
_____________________________
Number of test years 2
test years [1982, 2015]
Number of train years 13
train years [1991, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006]
Number of val years 5
val years [1989, 1990, 1992, 1993, 2016]


01550000
_____________________________
Number of test years 3
test years [1996, 2011, 2017]
Number of train years 13
train years [1991, 1992, 1993, 1998, 1999, 2000, 2001, 2002, 2003, 2007, 2008, 2009, 2013]
Number of val years 3
val years [1984

03340800
_____________________________
Number of test years 2
test years [2011, 2013]
Number of train years 13
train years [1992, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007]
Number of val years 2
val years [1989, 1991]


06360500
_____________________________
Number of test years 1
test years [2011]
Number of train years 13
train years [1991, 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2004]
Number of val years 7
val years [1982, 1987, 1997, 2008, 2009, 2010, 2019]


02479560
_____________________________
Number of test years 4
test years [1983, 1998, 2009, 2017]
Number of train years 13
train years [1992, 1993, 1994, 1995, 1996, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007]
Number of val years 2
val years [1991, 2012]


06191500
_____________________________
Number of test years 3
test years [1996, 1997, 2011]
Number of train years 13
train years [1991, 1992, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007, 2009]
Number of

06847900
_____________________________
Number of test years 0
test years []
Number of train years 13
train years [1991, 1993, 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005]
Number of val years 1
val years [1996]


06885500
_____________________________
Number of test years 0
test years []
Number of train years 13
train years [1991, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2006]
Number of val years 1
val years [2007]


02112120
_____________________________
Number of test years 0
test years []
Number of train years 13
train years [1991, 1992, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006]
Number of val years 1
val years [1994]


06784000
_____________________________
Number of test years 1
test years [2019]
Number of train years 13
train years [1991, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004]
Number of val years 2
val years [1984, 1993]


02059500
_____________________________
Number of test year

02064000
_____________________________
Number of test years 2
test years [1987, 1996]
Number of train years 13
train years [1991, 1992, 1994, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007]
Number of val years 0
val years []


02472500
_____________________________
Number of test years 2
test years [1990, 2016]
Number of train years 13
train years [1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2002, 2003, 2004, 2006]
Number of val years 2
val years [1980, 1983]


05501000
_____________________________
Number of test years 1
test years [2013]
Number of train years 13
train years [1992, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2007]
Number of val years 5
val years [1982, 1983, 1990, 1991, 1993]


01548500
_____________________________
Number of test years 1
test years [2004]
Number of train years 13
train years [1991, 1992, 1993, 1995, 1998, 1999, 2000, 2001, 2002, 2006, 2007, 2008, 2009]
Number of val years 3
val years [1984, 1994, 1996]


03

01543500
_____________________________
Number of test years 1
test years [1996]
Number of train years 13
train years [1991, 1992, 1993, 1994, 1998, 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2007]
Number of val years 2
val years [2004, 2011]


12041200
_____________________________
Number of test years 2
test years [2004, 2007]
Number of train years 13
train years [1992, 1993, 1994, 1995, 1998, 1999, 2000, 2001, 2009, 2010, 2011, 2012, 2013]
Number of val years 2
val years [1991, 2008]


14301000
_____________________________
Number of test years 3
test years [1990, 1996, 2008]
Number of train years 13
train years [1992, 1993, 1994, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2010]
Number of val years 1
val years [2016]


01552000
_____________________________
Number of test years 3
test years [1996, 2004, 2011]
Number of train years 13
train years [1992, 1993, 1994, 1998, 1999, 2000, 2001, 2002, 2006, 2007, 2008, 2009, 2013]
Number of val years 1
val years [2010]


06441500
_

12189500
_____________________________
Number of test years 5
test years [1981, 1991, 1996, 2004, 2007]
Number of train years 13
train years [1993, 1994, 1998, 1999, 2000, 2001, 2002, 2009, 2010, 2011, 2012, 2013, 2014]
Number of val years 1
val years [2018]


08198500
_____________________________
Number of test years 0
test years []
Number of train years 13
train years [1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, 2003, 2004, 2005, 2006]
Number of val years 2
val years [1997, 2002]


11522500
_____________________________
Number of test years 2
test years [1997, 2006]
Number of train years 13
train years [1991, 1992, 1993, 1994, 1995, 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009]
Number of val years 0
val years []


08196000
_____________________________
Number of test years 1
test years [2002]
Number of train years 13
train years [1991, 1992, 1993, 1994, 1996, 1999, 2000, 2004, 2005, 2006, 2007, 2008, 2009]
Number of val years 1
val years [1997]


05514500
_______________

02193340
_____________________________
Number of test years 0
test years []
Number of train years 13
train years [1991, 1992, 1993, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004]
Number of val years 1
val years [1990]


02198100
_____________________________
Number of test years 3
test years [1991, 1998, 2003]
Number of train years 13
train years [1994, 1995, 1996, 2000, 2001, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2014]
Number of val years 1
val years [2013]


02202600
_____________________________
Number of test years 2
test years [1995, 2005]
Number of train years 13
train years [1992, 1997, 1999, 2000, 2001, 2002, 2003, 2007, 2008, 2009, 2010, 2011, 2012]
Number of val years 0
val years []


02215100
_____________________________
Number of test years 2
test years [1998, 2005]
Number of train years 13
train years [1992, 1993, 1995, 1996, 2000, 2001, 2002, 2003, 2007, 2008, 2010, 2011, 2012]
Number of val years 2
val years [1994, 2004]


02216180
_____________________

05488200
_____________________________
Number of test years 3
test years [1982, 1993, 2015]
Number of train years 13
train years [1991, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006]
Number of val years 1
val years [2008]


05503800
_____________________________
Number of test years 1
test years [1996]
Number of train years 13
train years [1991, 1992, 1994, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007]
Number of val years 2
val years [1993, 2013]


05507600
_____________________________
Number of test years 1
test years [2008]
Number of train years 13
train years [1991, 1992, 1995, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2010]
Number of val years 2
val years [1993, 1996]


05508805
_____________________________
Number of test years 2
test years [2008, 2010]
Number of train years 13
train years [1991, 1992, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006]
Number of val years 1
val years [1993]


05591550
_____________

09386900
_____________________________
Number of test years 1
test years [1995]
Number of train years 13
train years [1991, 1992, 1993, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008]
Number of val years 1
val years [1998]


09447800
_____________________________
Number of test years 2
test years [1984, 1993]
Number of train years 13
train years [1991, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2007, 2008]
Number of val years 0
val years []


09508300
_____________________________
Number of test years 0
test years []
Number of train years 13
train years [1991, 1992, 1994, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006]
Number of val years 4
val years [1980, 1993, 2005, 2010]


09512280
_____________________________
Number of test years 0
test years []
Number of train years 13
train years [1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2009, 2011]
Number of val years 4
val years [1993, 2005, 2010, 2014]


10166430
_____________

In [37]:
minimum_train_years = 100
for i in split_these.n_years_list['train']:
    if i < minimum_train_years:
        minimum_train_years = i

In [38]:
print('average number of train years',np.nanmean(split_these.n_years_list['train']))
print('minimum number of train years',minimum_train_years)
print('average number of test years',np.nanmean(split_these.n_years_list['test']))
print('average number of val years',np.nanmean(split_these.n_years_list['val']))

average number of train years 12.958271236959762
minimum number of train years 10
average number of test years 1.427719821162444
average number of val years 2.299552906110283


In [39]:
split_these.clean_start_end_lists()

In [40]:
split_these.dates['train']['01539000']

{'start_dates': [Timestamp('1990-01-10 00:00:00'),
  Timestamp('1996-01-10 00:00:00'),
  Timestamp('2007-01-10 00:00:00')],
 'end_dates': [Timestamp('1995-09-30 00:00:00'),
  Timestamp('2003-09-30 00:00:00'),
  Timestamp('2008-09-30 00:00:00')]}

In [41]:
split_these.dates['val']['01539000']

{'start_dates': [Timestamp('1995-01-10 00:00:00')],
 'end_dates': [Timestamp('1996-09-30 00:00:00')]}

In [42]:
split_these.dates['test']['01539000']

{'start_dates': [Timestamp('2005-01-10 00:00:00'),
  Timestamp('2010-01-10 00:00:00'),
  Timestamp('2017-01-10 00:00:00')],
 'end_dates': [Timestamp('2006-09-30 00:00:00'),
  Timestamp('2011-09-30 00:00:00'),
  Timestamp('2018-09-30 00:00:00')]}

In [43]:
with open('per_basin_train_periods_file.plk', 'wb') as fb:
    pickle.dump(split_these.dates['train'], fb, protocol=pickle.HIGHEST_PROTOCOL)
with open('per_basin_test_periods_file.plk', 'wb') as fb:
    pickle.dump(split_these.dates['test'], fb, protocol=pickle.HIGHEST_PROTOCOL)
with open('per_basin_val_periods_file.plk', 'wb') as fb:
    pickle.dump(split_these.dates['val'], fb, protocol=pickle.HIGHEST_PROTOCOL)

In [45]:
split_these.dates['train']['01142500']

{'start_dates': [Timestamp('1990-01-10 00:00:00'),
  Timestamp('1999-01-10 00:00:00'),
  Timestamp('2002-01-10 00:00:00'),
  Timestamp('2007-01-10 00:00:00')],
 'end_dates': [Timestamp('1996-09-30 00:00:00'),
  Timestamp('2000-09-30 00:00:00'),
  Timestamp('2006-09-30 00:00:00'),
  Timestamp('2009-09-30 00:00:00')]}