# Using results from a simulated annealing calibration, find the variation in DDFs that were generated and determine the 20 "best" SA models


In [2]:
from __future__ import print_function
%pylab notebook
# import datetime as dt
import glob
import matplotlib.pyplot as plt
import matplotlib.dates as md
#from nose.tools import set_trace
from charistools.hypsometry import Hypsometry
from charistools.meltModels import CalibrationCost
from charistools.modelEnv import ModelEnv
import pandas as pd
import re
import os

Populating the interactive namespace from numpy and matplotlib


In [None]:
%cd /work/charis/ti_model/calibrations_correct_cost
%ls GA_Karn*2str*

In [None]:
saFile = "GA_Karnali_at_Benighat.2str_DDFnbr=10mm_N100_M050.SA_summary.dat"
#caldf = pd.read_pickle(saFile)

In [None]:
def find_ddf_variation(drainageID, nstrikes=3):
    
    # Read SA summary file, this is DDFs and z at end of each cycle
    dir = "/work/charis/ti_model/calibrations_correct_cost"
    params = "DDFnbr=10mm_N100_M050"
    list = glob.glob("%s/%s.%dstr_%s.SA_summary.dat" % (
        dir, drainageID, nstrikes, params))
    if 1 != len(list):
        print("Error looking for SA_summary file for %s" % drainageID, file=sys.stderr)
    SAFile = list[0]
    print("SA_summary file : %s" % SAFile, file=sys.stderr)
    SAdf = pd.read_pickle(SAFile)
    
    # Drop first 61 rows of SA output, to limit analysis to stable stuff at end
    num_cycles_to_drop = 61
    stabledf = SAdf.drop(SAdf.index[np.arange(num_cycles_to_drop)])
    summarydf = stabledf.describe()
    
    # Read all calibration stats output and parse best model parameters from it
    list = glob.glob("%s/%s.%dstr_%s.z*Best*stats.png" % (
        dir, drainageID, nstrikes, params))
    if 1 != len(list):
        print("Error looking for stats file for %s" % drainageID, file=sys.stderr)
    statsFile = list[0]
    print("stats file : %s" % statsFile, file=sys.stderr)
    
    best = np.zeros(4)
    best_low = np.zeros(4)
    best_high = np.zeros(4)
    
    # Parse the best model ddfs from the filename
    p = re.compile(r'Best(\d+\.\d+)_(\d+\.\d+)_(\d+\.\d+)_(\d+\.\d+)')
    m = p.search(statsFile)
    for i in np.arange(4):
        best[i] = float(m.group(i+1))
    
    # Use the variation from the stable cycles to calculate best_minus1std and best_plus1std
    best_low = best.copy()
    best_high = best.copy()
    
    ddf = ['winter_snow_ddf', 'summer_snow_ddf', 'winter_ice_ddf', 'summer_ice_ddf']
    for i in np.arange(len(ddf)):
        best_low[i] = best_low[i] - summarydf.loc['std', ddf[i]]
        best_high[i] = best_high[i] + summarydf.loc['std', ddf[i]]
        
    # Do QC to ensure that each range enforces low <= high
    if best_low[0] > best_low[1]:
        print("Warning: QC problem on low snow DDFs, forcing them to lower value")
        best_low[0] = best_low[1]
    if best_low[2] > best_low[3]:
        print("Warning: QC problem on low ice DDFs, forcing them to lower value")
        best_low[2] = best_low[3]
        
    if best_high[0] > best_high[1]:
        print("Warning: QC problem on high snow DDFs, forcing them to higher value")
        best_high[1] = best_high[0]
    if best_high[2] > best_high[3]:
        print("Warning: QC problem on high ice DDFs, forcing them to higher value")
        best_high[3] = best_high[2]
    
    # Make model strings and return them
    best_str = "_".join(["%.2f" % i for i in best])
    best_low_str = "_".join(["%.2f" % i for i in best_low])
    best_high_str = "_".join(["%.2f" % i for i in best_high])
    
    result = {'drainageID': drainageID,
              'nstrikes': nstrikes,
              'Best': best_str, 
              'High': best_high_str,
              'Low': best_low_str}
    
    result = pd.DataFrame.from_dict(result, orient='index').transpose()
    result.set_index('drainageID', inplace=True)
    result = result[['nstrikes', 'Low', 'Best', 'High']]
    
    return result

In [None]:
drainageID = "GA_Karnali_at_Benighat"
result = find_ddf_variation(drainageID, nstrikes=2)

In [None]:
drainageIDs = ['SY_Naryn_at_NarynTown',
               'AM_Vakhsh_at_Komsomolabad',
               'IN_Hunza_at_DainyorBridge',
               'GA_Karnali_at_Benighat',
               'GA_Narayani_at_Devghat',
               'GA_SaptaKosi_at_Chatara']
strikes = [2, 3]
df = pd.DataFrame([])
for drainageID in drainageIDs:
    for strike in strikes:
        result = find_ddf_variation(drainageID, nstrikes=strike)
        df = df.append(result)

In [None]:
#drainageID = "GA_Karnali_at_Benighat"
#nstrikes = 2
def best_models(drainageID, nstrikes=3):
    # Read SA summary file, this is DDFs and z at end of each cycle
    dir = "/work/charis/ti_model/calibrations_correct_cost"
    params = "DDFnbr=10mm_N100_M050"
    list = glob.glob("%s/%s.%dstr_%s.SA_summary.dat" % (
        dir, drainageID, nstrikes, params))
    if 1 != len(list):
        print("Error looking for SA_summary file for %s" % drainageID, file=sys.stderr)
    SAFile = list[0]
    print("SA_summary file : %s" % SAFile, file=sys.stderr)
    df = pd.read_pickle(SAFile)
    
    df.loc[:, "model"] = (
        df["winter_snow_ddf"].map(str) + "_" +
        df["summer_snow_ddf"].map(str) + "_" +
        df["winter_ice_ddf"].map(str) + "_" +
        df["summer_ice_ddf"].map(str))

    df["nstrikes"] = nstrikes

    outfile = "%s/%s.%dstr_%s.SA_summary.best20.dat" % (
        dir, drainageID, nstrikes, params)

    df.to_pickle(outfile)
    print("outfile: %s" % outfile, file=sys.stderr)

In [None]:
drainageIDs = ['SY_Naryn_at_NarynTown',
               'AM_Vakhsh_at_Komsomolabad',
               'IN_Hunza_at_DainyorBridge',
               'GA_Karnali_at_Benighat',
               'GA_Narayani_at_Devghat',
               'GA_SaptaKosi_at_Chatara']
strikes = [2, 3]
for drainageID in drainageIDs:
    for strike in strikes:
        best_models(drainageID, nstrikes=strike)

In [15]:
#newfile = '/work/charis/ti_model/calibrations_correct_cost/IN_Hunza_at_DainyorBridge.2str_DDFnbr=10mm_N100_M050.SA_summary.best20.dat'
newfile = '/work/charis/ti_model/calibrations_correct_cost/AM_Vakhsh_at_Komsomolabad.2str_DDFnbr=10mm_N100_M050.SA_summary.best20.dat'
new = pd.read_pickle(newfile)

In [17]:
new

Unnamed: 0,winter_snow_ddf,summer_snow_ddf,winter_ice_ddf,summer_ice_ddf,min_cycle_z,model,nstrikes
0,0.26,9.14,1.60,2.18,164.503597,0.26_9.14_1.6_2.18,2
1,37.01,38.55,57.74,58.72,1775.862519,37.01_38.55_57.74_58.72,2
2,14.25,49.26,39.42,43.69,1839.480091,14.25_49.26_39.42_43.69,2
3,9.10,32.17,51.53,55.45,1144.784359,9.1_32.17_51.53_55.45,2
4,6.50,20.72,54.40,56.12,688.183591,6.5_20.72_54.4_56.12,2
5,17.73,33.02,36.32,52.97,1291.909814,17.73_33.02_36.32_52.97,2
6,3.10,17.17,33.10,52.78,509.217053,3.1_17.17_33.1_52.78,2
7,21.27,33.69,51.26,56.93,1369.650870,21.27_33.69_51.26_56.93,2
8,12.67,16.40,49.34,57.72,616.982543,12.67_16.4_49.34_57.72,2
9,0.15,9.02,54.97,55.54,181.944114,0.15_9.02_54.97_55.54,2


to get the best model for cycle 95:


In [19]:
new.at[82, "model"]

'0.34_6.52_8.33_45.74'