In [42]:
# NOTEBOOK IS MEANT TO BE RUN ENTIRE WAY THROUGH AFTER EACH ITERATION
# AUTOLOCKING OF DATA HELPS TO PRESERVE MODEL OUTPUT THAT MAY HAVE BEEN OVERWRITTEN ON EACH MODEL RUN
# INDIVIDUAL PURPOSES CAN BE LOCKED AT ANY ROUND AND THEY WILL DROP OUT OF ADJUSTMENT AND SMOOTHING

# debug toggle to turn on display of dataframes in cell outputs
debug = False

import pandas as pd  
import os
import plotly as py
import plotly.graph_objects as go
import plotly.express as px
import ipywidgets as widgets
import numpy as np
from scipy import special
import time

# offline mode for charts, so it stays local
py.offline.init_notebook_mode(connected=True)

# set rounding precision and display precision - display is rounding + 1, so you can see that it's working
round_precision = 7
pd.set_option("display.precision", round_precision + 1)

# read in curve data'
dirCWD    = os.getcwd()
dirIntermediate = os.path.join(dirCWD,'_Friction_Factors')
dirResults = os.path.join(dirCWD,'_Friction_Factors')

binsize = 2

observedStage = 'v9 Final Smoothing'

filenameTLFObs = r'A:\1 - TDM\2 - Estimate Param\_General Parameters\7 - Update Obs TLF\3 - Create Obs TLFs for all Trips\results\dfTLF_Obs_wSmoothed_20221130-133903.csv'
folderTDMTLFLoc = r'A:\1 - TDM\3 - Model Dev\1 - WF\2 - Sandbox\v9.0Beta\WF TDM v9.0 - 2022-11-01\Scenarios\BY_2019\3_Distribute\TLF'

# some trip purposes are added together for information purposes
dfTripPurpSubtotals = pd.DataFrame([
    ['HBOth','HBO' ],
    ['HBShp','HBO' ],
    ['NHBW' ,'NHB' ],
    ['NHBNW','NHB' ],
    ['IX'   ,'IXXI'],
    ['XI'   ,'IXXI']
],columns=(['TRIPPURP','TRIPPURP_SUB']))
if debug: display(dfTripPurpSubtotals)

# calibration round settings
dfCalibrationRounds = pd.DataFrame([
    ['0-Initial from v832',r'A:\1 - TDM\3 - Model Dev\1 - WF\2 - Sandbox\v9.0Beta\WF TDM v9.0 - 2022-11-01\1_Inputs\0_GlobalData\3_Distribute\FricFactor_AllPurp.csv']
],columns=(['FF_CALIB_ROUND','FILENAMEFF']))

# create and initialize locking fields
dfCalibrationRounds['LOCKFF' ] = False
dfCalibrationRounds['LOCKTLF'] = False

# AUTOLOCK ALL BUT LAST CALIBROUND TO AVOID OVERWRITING DATA IF NOT MANUALLY LOCKED
dfCalibrationRounds.loc[(dfCalibrationRounds.index<dfCalibrationRounds.shape[0]-1), 'LOCKFF' ] = True
dfCalibrationRounds.loc[(dfCalibrationRounds.index<dfCalibrationRounds.shape[0]-1), 'LOCKTLF'] = True

if debug: display(dfCalibrationRounds)

# add trip purposes here to lock friction factors
dfTripPurpLocks = pd.DataFrame([
    #example: ['HBW','0-Initial from v832'],
    ['HBW'  ,'0-Initial from v832'],
    ['HBShp','0-Initial from v832']
],
columns=('TRIPPURP','LOCK_FF_CALIB_ROUND'))
if debug: display(dfTripPurpLocks)

# a set of column renaming to be used to put all columns in consistent naming
colRenames ={'HBOTH':'HBOth',
             'HBSHP':'HBShp',
             'HBSCHPR' :'HBSchPr',
             'HBSCHSC' :'HBSchSc',
             'HBSCH_PR':'HBSchPr',
             'HBSCH_SC':'HBSchSc',
             'HBSch_Pr':'HBSchPr',
             'HBSch_Sc':'HBSchSc'}

# use entire width of browser for cells
#from IPython.display import display, HTML
#display(HTML("<style>.container { width:100% !important; }</style>"))

In [43]:
dfCalibrationRounds

Unnamed: 0,FF_CALIB_ROUND,FILENAMEFF,LOCKFF,LOCKTLF
0,0-Initial from v832,A:\1 - TDM\3 - Model Dev\1 - WF\2 - Sandbox\v9...,False,False


In [44]:
# create intermediate directories for each round
for index, row in dfCalibrationRounds.iterrows():
    path = os.path.join(dirIntermediate,row['FF_CALIB_ROUND'])

    # Check whether the specified path exists or not
    isExist = os.path.exists(path)
    
    #printing if the path exists or not
    if not isExist:

        # Create a new directory because it does not exist
        os.makedirs(path)
        print("The new directory is created! " + path)

    path = os.path.join(path,'manual_smoothing')

    # Check whether the specified path exists or not
    isExist = os.path.exists(path)
    
    #printing if the path exists or not
    if not isExist:

        # Create a new directory because it does not exist
        os.makedirs(path)
        print("The new directory is created! " + path)



# Observed Trip Length Frequencies

In [45]:
# read in observed TLF data to be used
dfTLF_Obs = pd.read_csv(filenameTLFObs)

#filter by the STAGE for 
dfTLF_Obs = dfTLF_Obs[dfTLF_Obs['STAGE']==observedStage].copy()
dfTLF_Obs = dfTLF_Obs.drop(columns=('STAGE'))
if debug: display(dfTLF_Obs)

In [46]:
# show max bin size for TLF type to use in updating TDM TLF code
if debug: display(dfTLF_Obs[dfTLF_Obs['FREQ']>0].groupby(['TLFTYPE'],as_index=False).agg(MAXBIN=('BIN','max')))

In [47]:
# Check TLF Observed. FREQ should all sum to 1! Also show average trip length
dfTLF_Obs['BINxFREQ'] = dfTLF_Obs['BIN'] * dfTLF_Obs['FREQ']
dfTLF_Obs_Stats = dfTLF_Obs.groupby(['TRIPPURP','TLFTYPE'],as_index=False).agg(FREQ_SUM=('FREQ','sum'),AVG_TRIP_LEN=('BINxFREQ','sum'))
dfTLF_Obs = dfTLF_Obs.drop(columns=('BINxFREQ'))
if debug: display(dfTLF_Obs_Stats)

In [48]:
import plotly as py
import plotly.graph_objects as go
import ipywidgets as widgets
import numpy as np
from scipy import special

py.offline.init_notebook_mode(connected=True)

# Observed Plots

In [49]:
#PLOTTING FUNCTION

# chart preset zoom extents
dfZoomPresets = pd.DataFrame([
    ['Begin' ,     0,   100,     0,np.NaN],
    ['Mid'   ,    30,   160,     0, 0.018],
    ['Tail'  ,   100,   400,     0, 0.001],
    ['All'   ,np.NaN,np.NaN,np.NaN,np.NaN],
    ['Custom',np.NaN,np.NaN,np.NaN,np.NaN],
], columns=('PRESET','XMIN','XMAX','YMIN','YMAX'))
dfZoomPresets

def update_plot_obs(trippurps, tlftypes, zoompreset, xmin, xmax, ymin, ymax):

    dfAvgTripLen = pd.DataFrame()
    
    data = []
    for trippurp in trippurps:
        for tlftype in tlftypes:

            # data for plotting from filtered dataframe
            plotdata = dfTLF_Obs[(dfTLF_Obs['TRIPPURP']==trippurp) & (dfTLF_Obs['TLFTYPE']==tlftype)].copy()

            # fill any NaN values with zeros
            plotdata = plotdata.fillna(0)
            plotdata = plotdata.rename(columns={'FREQ_Obs':'FREQ'}) # temp fix, since this column gets renamed somewhere else
            
            xplot = plotdata['BIN']
            yplot = plotdata['FREQ']
            name  = trippurp + ' ' + tlftype 

            trace1 = go.Scatter(
                x=xplot,
                y=yplot,
                mode='lines',
                name=name,
                line=dict(
                    shape='spline'
                )
            )
            data.append(trace1)
                
    # get zoom presets
    if (zoompreset=='Custom'):
        _xmin = xmin
        _xmax = xmax
        _ymin = ymin
        _ymax = ymax
    else:
        _xmin = dfZoomPresets[dfZoomPresets['PRESET']==zoompreset]['XMIN'].values[0]
        _xmax = dfZoomPresets[dfZoomPresets['PRESET']==zoompreset]['XMAX'].values[0]
        _ymin = dfZoomPresets[dfZoomPresets['PRESET']==zoompreset]['YMIN'].values[0]
        _ymax = dfZoomPresets[dfZoomPresets['PRESET']==zoompreset]['YMAX'].values[0]

    layout = go.Layout(
        title='Trip Length Frequencies - Observed Data',
        yaxis=dict(
            title='',
            range=(_ymin,_ymax)
        ),
        xaxis=dict(
            title='GC/Dist/Time',
            range=(_xmin,_xmax)
        ),
        #width=1600,
        height=450
    )
    
    fig = go.Figure(data=data, layout=layout)
    py.offline.iplot(fig)

In [50]:
# MAKE INTERACTIVE CHART
py.offline.init_notebook_mode(connected=True)

lstTripPurp  = dfTLF_Obs['TRIPPURP'].unique().tolist() # HBW, HBShp, HBOth,...
lstTLFType   = dfTLF_Obs['TLFTYPE' ].unique().tolist() # GC, Time, Dist
    
selectTripPurp  = widgets.SelectMultiple(options=lstTripPurp, value=('HBW'        ,), description='Trip Purpose')
selectTLFType   = widgets.SelectMultiple(options=lstTLFType , value=('GC'         ,), description='TLF Type'    )
selectPreset    = widgets.Select(options=dfZoomPresets['PRESET'].tolist(), value='All', description='Zoom Presets')

#custom extents
xmin = widgets.Text(value='0', description='X Min')
xmax = widgets.Text(value='60', description='X Max')
ymin = widgets.Text(value='0', description='Y Min')
ymax = widgets.Text(value='.01', description='Y Max')

widgets.interactive(update_plot_obs, trippurps=selectTripPurp, tlftypes=selectTLFType, zoompreset=selectPreset, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax)

interactive(children=(SelectMultiple(description='Trip Purpose', index=(0,), options=('HBW', 'HBShp', 'HBOth',…

# Friction Factors

In [52]:
# read Friction Factors into array ordered by iteration

# Friction Factors array that stores a separate dataframe for each Calibration Round
dfsFF=[]

for index, row in dfCalibrationRounds.iterrows():
    filename = os.path.join(os.path.join(dirIntermediate,row['FF_CALIB_ROUND']),'1-friction factors begin.csv')
    if row['LOCKFF']==False:
        display ('Reading friction factors for ' + row['FF_CALIB_ROUND'])
        dfFF = pd.read_csv(row['FILENAMEFF'])

        # make sure id column is always same: BIN
        dfFF.rename(columns={';MINUTE':'BIN','Mile':'BIN','Mil':'BIN','Min':'BIN','Bin':'BIN'},inplace=True)
        # rename columns
        dfFF.rename(columns=colRenames,inplace=True)

        if debug: display(dfFF)

        # melt all columns but first
        dfFF = pd.melt(dfFF, id_vars=['BIN'], value_vars=dfFF.columns[1:].tolist(), ignore_index=False, var_name='TRIPPURP', value_name='FF')

        dfFF.to_csv(filename, index=False)

    else:
        dfFF = pd.read_csv(filename)

    # append to array of Friction Factors
    dfsFF.append(dfFF)

    if debug: display(dfFF)


'Reading friction factors for 0-Initial from v832'

# Read in TDM Trip Length Frequencies

In [53]:
# read in trips from TDM sorted into TLF bins

# array of dataframes to hold TLFs from each round of calibration
dfsTLF_TDM = []

for index, row in dfCalibrationRounds.iterrows():

    display('Reading ' + row['FF_CALIB_ROUND'] + ' TLFs')

    filename = os.path.join(os.path.join(dirIntermediate,row['FF_CALIB_ROUND']),'2-tlfs.csv')

    if row['LOCKTLF']==False:

        tlfs=['Cost','Dist','Time']

        dfTLFTDMTrips = pd.DataFrame()

        for tlf in tlfs:
            
            # read in csv for tlf
            dfRead = pd.read_csv(os.path.join(folderTDMTLFLoc,'TLF_' + tlf + '.csv'))
            
            # make sure id column is always same: BIN
            dfRead.rename(columns={';BIN':'BIN',';MINUTE':'BIN','Mile':'BIN','Mil':'BIN','Min':'BIN','Bin':'BIN'}, inplace=True)
            # rename columns
            dfRead.rename(columns=colRenames,inplace=True)

            dfRead = pd.melt(dfRead, id_vars=['BIN'], value_vars=dfRead.columns[1:].tolist(), ignore_index=False, var_name='TRIPPURP', value_name='TRIPS')

            # set TLF value, rename Cost to GC
            if tlf=='Cost':
                dfRead['TLFTYPE'] = 'GC'
            else:
                dfRead['TLFTYPE'] = tlf

            # reorder columns
            dfRead = dfRead[['TLFTYPE','TRIPPURP','BIN','TRIPS']]

            # concat data into single dataframe
            dfTLFTDMTrips = pd.concat([dfTLFTDMTrips, dfRead], ignore_index=True)

        if debug: display(dfTLFTDMTrips)

        # create subtotals
        dfTLFTDMTripsForSubtotals = pd.DataFrame.merge(dfTLFTDMTrips, dfTripPurpSubtotals, on=('TRIPPURP'))
        dfTLFTDMTripsForSubtotals = dfTLFTDMTripsForSubtotals.groupby(['TLFTYPE','TRIPPURP_SUB','BIN'],as_index=False).agg(TRIPS=('TRIPS','sum'),COUNT=('TRIPS','size'))
        dfTLFTDMTripsForSubtotals = dfTLFTDMTripsForSubtotals[['TLFTYPE','TRIPPURP_SUB','BIN','TRIPS']]
        dfTLFTDMTripsForSubtotals = dfTLFTDMTripsForSubtotals.rename(columns={'TRIPPURP_SUB':'TRIPPURP'})
        if debug: display (dfTLFTDMTripsForSubtotals)

        # concat subtotals
        dfTLFTDMTrips = pd.concat([dfTLFTDMTrips,dfTLFTDMTripsForSubtotals], ignore_index=True)
        if debug: display(dfTLFTDMTrips)

        # calculated collapsed bins
        from math import floor
        def round_to_binsize(x):
            return int(binsize * floor(float(x)/binsize))
        dfTLFTDMTrips['BIN_COLLAPSE'] = dfTLFTDMTrips['BIN'].apply(lambda x: round_to_binsize(x))
        if debug: display(dfTLFTDMTrips)

        # aggregate to collapsed bins
        dfTLFTDMTrips = dfTLFTDMTrips.groupby(['TLFTYPE','TRIPPURP','BIN_COLLAPSE'], as_index=False).agg(TRIPS=('TRIPS','sum'))
        dfTLFTDMTrips = dfTLFTDMTrips.rename(columns={'BIN_COLLAPSE':'BIN'})
        if debug: display(dfTLFTDMTrips)

        # calculate percent distribution
        dfTLFTDMTripTotals = dfTLFTDMTrips.groupby(['TLFTYPE','TRIPPURP'], as_index=False).agg(TRIP_TOTAL=('TRIPS','sum'))
        if debug: display(dfTLFTDMTripTotals)

        # join to toals
        dfTLFTDMTripDist = pd.DataFrame.merge(dfTLFTDMTrips, dfTLFTDMTripTotals, on=('TLFTYPE','TRIPPURP'))
        dfTLFTDMTripDist['FREQ'] = dfTLFTDMTripDist['TRIPS'] / dfTLFTDMTripDist['TRIP_TOTAL']
        if debug: display(dfTLFTDMTripDist)

        dfTLF_TDM = dfTLFTDMTripDist[['TLFTYPE','TRIPPURP','BIN','FREQ']]
        if debug: display(dfTLF_TDM)

        # check to see if add up to 1.0
        dfCheck = dfTLF_TDM.groupby(['TLFTYPE','TRIPPURP'], as_index=False).agg(FREQ_SUM=('FREQ','sum'))
        # only display results out of range
        display('Not adding to 1 (if empty dataset, YAY!!!):')
        display(dfCheck[(dfCheck['FREQ_SUM']<0.9999999) | (dfCheck['FREQ_SUM']>1.0000001)])

        dfTLF_TDM.to_csv(filename, index=False)

    else:
        dfTLF_TDM = pd.read_csv(filename)

    #add calibration round back into table
    dfTLF_TDM['FF_CALIB_ROUND'] = row['FF_CALIB_ROUND']

    # append dataframe to array
    dfsTLF_TDM.append(dfTLF_TDM)


'Reading 0-Initial from v832 TLFs'

'Not adding to 1 (if empty dataset, YAY!!!):'

Unnamed: 0,TLFTYPE,TRIPPURP,FREQ_SUM




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



# Model vs Observed Plots

Compare Average Trip Length Between Modeled and Observed Data

In [54]:
dfTLF_Obs_N = dfTLF_Obs
dfTLF_Obs_N['FF_CALIB_ROUND'] = 'Observed'
dfTLF_Obs_N.rename(columns={'FREQ_Obs':'FREQ'},inplace=True) # temp fix

#Merge together all Modeled data and Observed data
dfObsModelMerge = (pd.concat(dfsTLF_TDM).append(dfTLF_Obs_N))   

#CALCUATE TRIP LENGTH AVERAGES AS TABLE
dfObsModelMerge['BINxFREQ'] = dfObsModelMerge['BIN'] * dfObsModelMerge['FREQ']
dfObsModelMerge_Stats = (dfObsModelMerge
    .groupby(['FF_CALIB_ROUND','TRIPPURP','TLFTYPE'],as_index=False)
        .agg(FREQ_SUM=('FREQ','sum'),AVG_TRIP_LEN=('BINxFREQ','sum')))

if debug: display(dfObsModelMerge_Stats)

In [478]:
#dfTLF_MvO_Wide = (dfObsModelMerge_Stats
#    .pivot(index=['TRIPPURP','TLFTYPE'],columns="FF_CALIB_ROUND",values="AVG_TRIP_LEN")
#    .reset_index(['TRIPPURP','TLFTYPE'])
#    .assign(DIF = lambda x: (x['Observed'] - x['0-Initial from v832'])/(x['0-Initial from v832'])*100))
#display(dfTLF_MvO_Wide)


In [55]:
dfObsModelMerge_Stats

Unnamed: 0,FF_CALIB_ROUND,TRIPPURP,TLFTYPE,FREQ_SUM,AVG_TRIP_LEN
0,0-Initial from v832,HBO,Dist,1.0,3.85901090
1,0-Initial from v832,HBO,GC,1.0,16.79816741
2,0-Initial from v832,HBO,Time,1.0,10.66604000
3,0-Initial from v832,HBOth,Dist,1.0,4.02842825
4,0-Initial from v832,HBOth,GC,1.0,17.19945867
...,...,...,...,...,...
109,Observed,XI_HV,GC,1.0,162.17048320
110,Observed,XI_HV,Time,1.0,72.56099400
111,Observed,XI_MD,Dist,1.0,42.12514280
112,Observed,XI_MD,GC,1.0,99.29019360


In [56]:
def update_df_mod_vs_obs(modeloption,tlfoption,trippurpoption):
    modellist = list(modeloption)
    modellist.append('Observed')

    dfObsModelMerge_Stats2 =(dfObsModelMerge_Stats
        .loc[dfObsModelMerge_Stats['FF_CALIB_ROUND'].isin(modellist)])
    dfObsModelMerge_Stats3 = (dfObsModelMerge_Stats2
        .loc[dfObsModelMerge_Stats2['TLFTYPE'].isin(tlfoption)])
    dfObsModelMerge_Stats4 = (dfObsModelMerge_Stats3
        .loc[dfObsModelMerge_Stats3['TRIPPURP'].isin(trippurpoption)])

    if len(modellist) > 2:
        dfTLF_MvO_Wide2 = (dfObsModelMerge_Stats4
            .pivot(index=['TRIPPURP','TLFTYPE'],columns="FF_CALIB_ROUND",values="AVG_TRIP_LEN")
            .reset_index(['TRIPPURP','TLFTYPE']))
    else:
        dfTLF_MvO_Wide2 = (dfObsModelMerge_Stats4
            .pivot(index=['TRIPPURP','TLFTYPE'],columns="FF_CALIB_ROUND",values="AVG_TRIP_LEN")
            .reset_index(['TRIPPURP','TLFTYPE'])
            .assign(PerChange = lambda x: (x[modellist[0]] - x['Observed'])/(x['Observed'])*100))

    fig = go.Figure(data=[go.Table(
                    header = dict(values=list(dfTLF_MvO_Wide2.columns),
                                  fill_color='paleturquoise',
                                  align='left'),
                    cells =  dict(values=round(dfTLF_MvO_Wide2,2).transpose().values.tolist(),
                                 fill_color='lavender',
                                 align='left'))
                    ])

    figbar2 = px.bar(dfObsModelMerge_Stats4, 
                     x = 'TRIPPURP', 
                     y = 'AVG_TRIP_LEN', 
                     color = 'FF_CALIB_ROUND', 
                     barmode = 'group'
                    )

    figbar2.show()
    py.offline.iplot(fig)  

In [57]:
py.offline.init_notebook_mode(connected=True)

lstModOpt  = dfObsModelMerge['FF_CALIB_ROUND'].unique().tolist()
lstModOptShort = [round for round in lstModOpt if round != 'Observed']  
lstTLFType   = dfObsModelMerge['TLFTYPE'].unique().tolist() # GC, Time, Dist
lstTripPurp  = dfTLF_Obs['TRIPPURP'].unique().tolist() # HBW, HBShp, HBOth,...
    
selectModelOption  = widgets.SelectMultiple(options=lstModOptShort, value=('0-Initial from v832',), description='Model Run'   )
selectTLFOption    = widgets.SelectMultiple(options=lstTLFType    , value=('GC' ,)                , description='TLF Type'    )
selectTripPurp     = widgets.SelectMultiple(options=lstTripPurp   , value=lstTripPurp             , description='Trip Purpose')

widgets.interactive(update_df_mod_vs_obs,modeloption=selectModelOption, tlfoption=selectTLFOption, trippurpoption=selectTripPurp)

interactive(children=(SelectMultiple(description='Model Run', index=(0,), options=('0-Initial from v832',), va…

Comparing Trip Length Frequency Graphs between Modeled and Observed

In [58]:
#PLOTTING FUNCTION

# chart preset zoom extents
dfZoomPresets = pd.DataFrame([
    ['Begin' ,     0,   100,     0, np.NaN],
    ['Mid'   ,    30,   160,     0, 0.0018],
    ['Tail'  ,   100,   400,     0,0.0001],
    ['All'   ,np.NaN,np.NaN,np.NaN, np.NaN],
    ['Custom',np.NaN,np.NaN,np.NaN, np.NaN],
], columns=('PRESET','XMIN','XMAX','YMIN','YMAX'))
dfZoomPresets

def update_plot_mod_vs_obs(modeloption, trippurps, tlftypes, zoompreset, xmin, xmax, ymin, ymax):

    data = []
    for modelspace in modeloption:
        for trippurp in trippurps:
            for tlftype in tlftypes:
            
                # data for plotting from filtered dataframe
                plotdata1 = dfObsModelMerge[(dfObsModelMerge['TRIPPURP']==trippurp) & (dfObsModelMerge['TLFTYPE']==tlftype)]
                plotdata = plotdata1[(plotdata1['FF_CALIB_ROUND']==modelspace)]

                # fill any NaN values with zeros
                plotdata = plotdata.fillna(0)

                xplot = plotdata['BIN']
                yplot = plotdata['FREQ']
                name  = trippurp + ' ' + tlftype + ' (' + modelspace + ')'

                trace1 = go.Scatter(
                    x=xplot,
                    y=yplot,
                    mode='lines',
                    name=name,
                    line=dict(
                        shape='spline'
                    )
                )
                data.append(trace1)
                
    # get zoom presets
    if (zoompreset=='Custom'):
        _xmin = xmin
        _xmax = xmax
        _ymin = ymin
        _ymax = ymax
    else:
        _xmin = dfZoomPresets[dfZoomPresets['PRESET']==zoompreset]['XMIN'].values[0]
        _xmax = dfZoomPresets[dfZoomPresets['PRESET']==zoompreset]['XMAX'].values[0]
        _ymin = dfZoomPresets[dfZoomPresets['PRESET']==zoompreset]['YMIN'].values[0]
        _ymax = dfZoomPresets[dfZoomPresets['PRESET']==zoompreset]['YMAX'].values[0]

    layout = go.Layout(
        title='Trip Length Frequencies - Observed Data',
        yaxis=dict(
            title='',
            range=(_ymin,_ymax)
        ),
        xaxis=dict(
            title='GC/Dist/Time',
            range=(_xmin,_xmax)
        ),
        #width=1600,
        height=450
    )
    
    fig = go.Figure(data=data, layout=layout)
    py.offline.iplot(fig)

In [59]:
# MAKE INTERACTIVE CHART
py.offline.init_notebook_mode(connected=True)

lstTripPurp  = dfObsModelMerge['TRIPPURP'].unique().tolist() # HBW, HBShp, HBOth,...
lstTLFType   = dfObsModelMerge['TLFTYPE' ].unique().tolist() # GC, Time, Dist
lstModOpt    = dfObsModelMerge['FF_CALIB_ROUND'].unique().tolist()
    
selectTripPurp     = widgets.SelectMultiple(options=lstTripPurp                     , value=('HBW',)               , description='Trip Purpose')
selectTLFType      = widgets.SelectMultiple(options=lstTLFType                      , value=('GC' ,)               , description='TLF Type'    )
selectPreset       = widgets.Select        (options=dfZoomPresets['PRESET'].tolist(), value= 'All'                 , description='Zoom Presets')
selectModelOption  = widgets.SelectMultiple(options=lstModOpt                       , value=('0-Initial from v832',), description='Model Run')

#custom extents
xmin = widgets.Text(value='0'  , description='X Min')
xmax = widgets.Text(value='60' , description='X Max')
ymin = widgets.Text(value='0'  , description='Y Min')
ymax = widgets.Text(value='.01', description='Y Max')

widgets.interactive(update_plot_mod_vs_obs, modeloption=selectModelOption,trippurps=selectTripPurp, tlftypes=selectTLFType, zoompreset=selectPreset, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax)

interactive(children=(SelectMultiple(description='Model Run', index=(0,), options=('0-Initial from v832', 'Obs…

# Friction Factor Adjustment and Smoothing

In [60]:
# function check which trip urposes are unlocked for a given round
 
def getUnlockedListForCalibRound(calibround):

    aUnlockedTripPurp = []

    # list trip purposes. must have dfsFF[0] loaded with complete set
    trippurps = dfsFF[0]['TRIPPURP'].unique().tolist()

    roundindex = dfCalibrationRounds[dfCalibrationRounds['FF_CALIB_ROUND']==calibround].index.values[0]
    if debug: print('Calib Round Index: ' + str(roundindex) + ' (' + calibround + ')')

    for trippurp in trippurps:

        #get round of lock
        lockedRounds = dfTripPurpLocks[dfTripPurpLocks['TRIPPURP']==trippurp]['LOCK_FF_CALIB_ROUND'].values

        if lockedRounds.size > 0:
            lockedRound = lockedRounds[0]
            lockedRoundIndices = dfCalibrationRounds[dfCalibrationRounds['FF_CALIB_ROUND']==lockedRound].index.values

            if lockedRoundIndices.size > 0: 
                lockedRoundIndex = lockedRoundIndices[0]
                if roundindex > lockedRoundIndex:
                    print(trippurp + ' Locked')
                else:
                    print(trippurp + ' Unlocked')
                    aUnlockedTripPurp.append(trippurp)
            else:
                print(trippurp + ' Unlocked')
                aUnlockedTripPurp.append(trippurp)
        else:
            print(trippurp + ' Unlocked')
            aUnlockedTripPurp.append(trippurp)

    return aUnlockedTripPurp

if debug: display(getUnlockedListForCalibRound('1-Test'))

In [61]:
# calculated new friction factors for each round of calibration

# dataframe array of adjusted friction factors
dfsFFAdj=[]

for index, row in dfCalibrationRounds.iterrows():
    calib_round = row['FF_CALIB_ROUND']
    print('Calbration Round: ' + calib_round)

    filename = os.path.join(os.path.join(dirIntermediate,row['FF_CALIB_ROUND']),'3-friction factors adjusted.csv')
    
    # if friction factors locked
    if row['LOCKFF']==True:
        dfsFFAdj = dfsFFAdj.append(pd.read_csv(filename))

    # if friction factors not locked
    else:

        # initialize dataframes with round data, round index should be same as indexes for TLF and FF
        dfTLF_TDM = dfsTLF_TDM[index][dfsTLF_TDM[index]['TLFTYPE']=='GC'].copy()
        dfFF      = dfsFF[index].copy()
        
        # calculate adjustment factor
        dfTLF_TDM.rename(columns={'FREQ':'FREQ_TDM'},inplace=True)
        dfTLF_Obs.rename(columns={'FREQ':'FREQ_Obs'},inplace=True)

        dfModVsObs = pd.DataFrame.merge(dfTLF_TDM,dfTLF_Obs,on=('TRIPPURP','TLFTYPE','BIN'))

        # adjustment factor is observed divided by model
        dfModVsObs.loc[dfModVsObs['FREQ_TDM']>0 , 'ADJFACTOR'] = dfModVsObs['FREQ_Obs'] / dfModVsObs['FREQ_TDM']

        # if BIN 0 is empty, then use BIN 2 factor, unless empty as well
        dfModVsObs['ADJFACTOR_NEXTBIN'] = dfModVsObs['ADJFACTOR'].shift(-1) # create temp field for next bin factor... should be ok to not filter by TRIPPURP and TLFTYPE since ends of curves are always 0
        if debug: display(dfModVsObs)
        
        dfModVsObs.fillna(0,inplace=True) # fill NaNs with zeros
        
        dfModVsObs['ADJFACTOR'] = dfModVsObs.apply(lambda x: x['ADJFACTOR_NEXTBIN'] if (x['BIN']==0) & (x['ADJFACTOR']==0) & (x['ADJFACTOR_NEXTBIN']>=0) else x['ADJFACTOR'] ,axis=1)
        
        dfModVsObs.drop(columns=('ADJFACTOR_NEXTBIN'),inplace=True) # drop next bin factor column
        
        if debug: display(dfModVsObs)      

        # don't know if this is needed 
        #dfModVsObs.loc[dfModVsObs['FREQ_TDM']==0, 'ADJFACTOR'] = 1

        # raw adjusted friction factor: multiply friction factor from previous round by adjustment factor
        dfModVsObsWithFF = pd.DataFrame.merge(dfModVsObs,dfFF,on=('TRIPPURP','BIN'))

        dfModVsObsWithFF['FF_ADJ_RAW'] = dfModVsObsWithFF['FF'] * dfModVsObsWithFF['ADJFACTOR']

        # normalized adj friction factor: divide by max (round to seven decimals)
        dfModVsObsWithFF_MaxAdj = (dfModVsObsWithFF
            .groupby(['TLFTYPE','TRIPPURP'], as_index=False)
                .agg(FF_ADJ_MAX=('FF_ADJ_RAW','max')))

        dfModVsObsWithFFAdjNrml = pd.DataFrame.merge(dfModVsObsWithFF,dfModVsObsWithFF_MaxAdj, on=('TLFTYPE','TRIPPURP'))

        dfModVsObsWithFFAdjNrml['FF_ADJ'] = round(dfModVsObsWithFFAdjNrml['FF_ADJ_RAW'] / dfModVsObsWithFFAdjNrml['FF_ADJ_MAX'],round_precision)

        dfModVsObsWithFFAdjNrml = dfModVsObsWithFFAdjNrml.rename(columns=({'FF':'FF_ORIG'}))

        # write out intermediate csv        
        dfFFAdj = dfModVsObsWithFFAdjNrml[['TRIPPURP','TLFTYPE','BIN','FF_ORIG','FF_ADJ']]
        dfFFAdj.to_csv(filename)
                
        if debug:
            display(dfModVsObsWithFFAdjNrml)
            dfCheck = (dfModVsObsWithFFAdjNrml
                .groupby(['TLFTYPE','TRIPPURP'])
                    .agg(FF_ADJ_MAX=('FF_ADJ','max')))
            print('These all should be 1:')
            display(dfCheck)

    # append to dataframe
    dfsFFAdj.append(dfFFAdj)
    if debug: display(dfsFFAdj)

Calbration Round: 0-Initial from v832


In [74]:
#PLOTTING FUNCTION
# chart preset zoom extents
dfZoomPresets = pd.DataFrame([
    ['Begin' ,     0,   100,     0,np.NaN],
    ['Mid'   ,    30,   160,     0, 0.018],
    ['Tail'  ,   100,   400,     0, 0.001],
    ['All'   ,np.NaN,np.NaN,np.NaN,np.NaN],
    ['Custom',np.NaN,np.NaN,np.NaN,np.NaN],
], columns=('PRESET','XMIN','XMAX','YMIN','YMAX'))
dfZoomPresets

aOAS=['Original','Adjusted','Smoothed']
linewidths=[1.5,4,2]
linedashes=['solid','dash','solid']

#def update_plot_ff(calibrnds, trippurps, oass, zoompreset):
def update_plot_ff(calibrnds, trippurps, oass, zoompreset, xmin, xmax, ymin, ymax):

    dfAvgTripLen = pd.DataFrame()
    
    data = []
    for calibrnd in calibrnds:
        for oas in oass:
            
            linewidth = linewidths[aOAS.index(oas)]
            linedash  = linedashes[aOAS.index(oas)]

            for trippurp in trippurps:

                # data for plotting from filtered dataframe
                plotdata = dfFFsForCharts[(dfFFsForCharts['FF_CALIB_ROUND']==calibrnd) & (dfFFsForCharts['TRIPPURP']==trippurp) & (dfFFsForCharts['OAS']==oas)]

                # fill any NaN values with zeros
                plotdata = plotdata.fillna(0)
                
                xplot = plotdata['BIN']
                yplot = plotdata['FF']
                name  = calibrnd + ' ' + trippurp + ' ' + oas

                trace1 = go.Scatter(
                    x=xplot,
                    y=yplot,
                    mode='lines',
                    name=name,
                    line=dict(
                        shape='spline',
                        width=linewidth,
                        dash=linedash
                    )
                )
                data.append(trace1)
                
    # get zoom presets
    if (zoompreset=='Custom'):
        _xmin = xmin
        _xmax = xmax
        _ymin = ymin
        _ymax = ymax
    else:
        _xmin = dfZoomPresets[dfZoomPresets['PRESET']==zoompreset]['XMIN'].values[0]
        _xmax = dfZoomPresets[dfZoomPresets['PRESET']==zoompreset]['XMAX'].values[0]
        _ymin = dfZoomPresets[dfZoomPresets['PRESET']==zoompreset]['YMIN'].values[0]
        _ymax = dfZoomPresets[dfZoomPresets['PRESET']==zoompreset]['YMAX'].values[0]

    layout = go.Layout(
        title='Friction Factors',
        yaxis=dict(
            title='',
            range=(_ymin,_ymax)
        ),
        xaxis=dict(
            title='GC',
            range=(_xmin,_xmax)
        ),
        #width=1600,
        height=600
    )
    
    fig = go.Figure(data=data, layout=layout)
    #py.offline.iplot(fig)
    fig.show()

In [140]:
# SMOOTHING

# backup will output a new CSV each time function is run... just to be safe
backup = True

dfSmoothedSetHolder = pd.DataFrame()
dfFFsForCharts = pd.DataFrame() # master dataframe

# loop through input dataframe
for index, row in dfCalibrationRounds.iterrows():
    
    # get trip purposes and tlf types for looping
    tps  = dfsFF[index]['TRIPPURP'].unique().tolist()

    dfSmoothParams = pd.DataFrame()

    # r=root, d=directories, f = files
    for r, d, f in os.walk(os.path.join(os.path.join(os.path.join(dirIntermediate,row['FF_CALIB_ROUND']),'manual_smoothing'))):
        for file in f:
            if file.endswith('.csv'):
                dfSmoothParams = pd.concat([dfSmoothParams,pd.read_csv(os.path.join(r, file))])
    if debug: display(dfSmoothParams)

    # create sets for give index
    dfOriginalSet = dfsFF[index].copy()
    dfAdjustedSet = dfsFFAdj[index][['TRIPPURP','TLFTYPE','BIN','FF_ADJ']].copy()
    dfAdjustedSet.columns = (['TRIPPURP','TLFTYPE','BIN','FF'])
    dfSmoothedSet = dfAdjustedSet.copy() # copy of adjusted since this is where it starts
    
    # add column for round values
    dfOriginalSet['FF_CALIB_ROUND'] = row['FF_CALIB_ROUND']
    dfAdjustedSet['FF_CALIB_ROUND'] = row['FF_CALIB_ROUND']
    dfSmoothedSet['FF_CALIB_ROUND'] = row['FF_CALIB_ROUND']

    dfOriginalSet['OAS'] = 'Original'
    dfAdjustedSet['OAS'] = 'Adjusted'
    dfSmoothedSet['OAS'] = 'Smoothed'

    if debug: display(dfAdjustedSet)

    # go through all trip purposes
    for tp in tps:
        if debug: print (tp)

        # make copy of filtered dataframe for working
        dfSmoothedSetByTP = dfSmoothedSet[(dfSmoothedSet['TRIPPURP']==tp)].copy()

        # make copy of filtered manual adjustments for working
        dfSmoothParamsByTP = dfSmoothParams[(dfSmoothParams['TRIPPURP']==tp)].copy()

        # smoothing is done row-by-row from the parameters file
        for index, row in dfSmoothParamsByTP.iterrows():

            # show before if debug set to 1
            if(row['DEBUG_TOGGLE']==1):
                pd.set_option('display.max_rows', None)
                display(dfSmoothedSetByTP[(dfSmoothedSetByTP['BIN'].between(row['BIN_FROM'],row['BIN_TO']))])
                pd.set_option('display.max_rows', 10)

            # linear adjustment
            if row['LINEAR_TOGGLE']:
                #print('linear')

                numPNTS = len(dfSmoothedSetByTP[(dfSmoothedSetByTP['BIN'].between(row['BIN_FROM'],row['BIN_TO']))].index)
                bgnFFAdj = dfSmoothedSetByTP[(dfSmoothedSetByTP['BIN']==row['BIN_FROM'])]['FF'].values[0]
                endFFAdj = dfSmoothedSetByTP[(dfSmoothedSetByTP['BIN']==row['BIN_TO'  ])]['FF'].values[0]

                # interpolate with np
                interpolates = np.linspace(bgnFFAdj, endFFAdj, numPNTS)

                # display for debugging
                if(row['DEBUG_TOGGLE']==1):
                    display('Bgn Freq ' + str(bgnFFAdj))
                    display('End Freq ' + str(endFFAdj))
                    display('Size ' + str(numPNTS))
                    display('Interpolates ' + str(interpolates))

                dfSmoothedSetByTP.loc[(dfSmoothedSetByTP['BIN'].between(row['BIN_FROM'],row['BIN_TO'])), 'FF'] = interpolates

            # moving average adjustment
            if row['MOVAVG_REACH']>0:
                
                maxbin = dfSmoothedSetByTP.loc[dfSmoothedSetByTP['BIN'].idxmax()]['BIN']
                
                # add zeros to keep moving average
                dfZeros = dfSmoothedSetByTP.copy()
                dfZeros['BIN'] += maxbin + binsize
                dfZeros['FF'] = 0
                dfSmoothedSetByTP = pd.concat([dfSmoothedSetByTP, dfZeros], ignore_index=True)
                
                # reach is on either side, so average extents is reach 2 + 1 with min periods for the shoulders, shift moves the avergage to center on bin
                # moving average is calculated for entire dataset and then only applied to BIN_FROM-BIN_TO range
                dfRolling = dfSmoothedSetByTP['FF'].rolling(row['MOVAVG_REACH']*2+1, min_periods=1).mean().shift(-1 * row['MOVAVG_REACH'])

                # debug
                if(row['DEBUG_TOGGLE']==1):
                    display(dfRolling)
                    
                dfSmoothedSetByTP.loc[dfSmoothedSetByTP['BIN'].between(row['BIN_FROM'],row['BIN_TO']),'FF'] = dfRolling                
                
                # trim back to maxbin
                dfSmoothedSetByTP = dfSmoothedSetByTP[dfSmoothedSetByTP['BIN']<=maxbin]
            
            # adjust frequency by override
            if row['FF_OVERRIDE']>0:
                dfSmoothedSetByTP.loc[(dfSmoothedSetByTP['BIN'].between(row['BIN_FROM'],row['BIN_TO'])),'FF'] = row['FF_OVERRIDE']

            # adjust frequency by relative amount
            dfSmoothedSetByTP.loc[(dfSmoothedSetByTP['BIN'].between(row['BIN_FROM'],row['BIN_TO'])),'FF'] += row['FFADJ_REL']

            # set frequency to zero
            if row['FF_ZERO_TOGGLE']==1:
                dfSmoothedSetByTP.loc[(dfSmoothedSetByTP['BIN'].between(row['BIN_FROM'],row['BIN_TO'])),'FF'] = 0

            # show after if debug set to 1
            if(row['DEBUG_TOGGLE']==1):
                pd.set_option('display.max_rows', None)
                display(dfSmoothedSetByTP[(dfSmoothedSetByTP['BIN'].between(row['BIN_FROM'],row['BIN_TO']))])
                pd.set_option('display.max_rows', 10)

            if debug: display(dfSmoothedSetByTP)

        dfSmoothedSetHolder = pd.concat([dfSmoothedSetHolder, dfSmoothedSetByTP], ignore_index=True)

    if debug: display(dfSmoothedSetHolder)

    # combine dataframes... smoothed with observed
    dfFFsForCharts = pd.concat([dfFFsForCharts,dfOriginalSet,dfAdjustedSet,dfSmoothedSetHolder], ignore_index=True)

if debug: display(dfFFsForCharts)

# MAKE INTERACTIVE CHART
py.offline.init_notebook_mode(connected=True)

lstCalibRnd  = dfFFsForCharts['FF_CALIB_ROUND'].unique().tolist() # iterative round names
lstTripPurp  = dfFFsForCharts['TRIPPURP'      ].unique().tolist() # HBW, HBShp, HBOth,...
lstOAS       = dfFFsForCharts['OAS'           ].unique().tolist() # Original, Adjusted, Smoothed

# multi select
selectCalibRnd  = widgets.SelectMultiple(options=lstCalibRnd                     , value=(dfFFsForCharts['FF_CALIB_ROUND'].values[0],), description='Calib Round' )
selectTripPurp  = widgets.SelectMultiple(options=lstTripPurp                     , value=('NHBW',)                                     , description='Trip Purpose')
selectOAS       = widgets.SelectMultiple(options=lstOAS                          , value=('Original','Adjusted','Smoothed')           , description='OAS'         )

# single select
selectPreset    = widgets.Select        (options=dfZoomPresets['PRESET'].tolist(), value='Begin'                                      , description='Zoom Presets')

##custom extents
xmin = widgets.Text(value='0', description='X Min',style={'width':50, 'description_width': '30'})
xmax = widgets.Text(value='250', description='X Max',style={'width':50, 'description_width': '30'})
ymin = widgets.Text(value='0', description='Y Min',style={'width':'50', 'description_width': '30'})
ymax = widgets.Text(value='.003', description='Y Max',style={'width':50, 'description_width': '30'})

#container1 = widgets.HBox([selectCalibRnd, selectTripPurp, selectOAS, selectPreset])
#container2 = widgets.VBox([xmin, xmax, ymin, ymax])
#container3 = widgets.HBox([container1, container2])

#ui = container3

#out = widgets.interactive_output(update_plot_ff, {'calibrnds':selectCalibRnd, 'trippurps':selectTripPurp, 'oass':selectOAS, 'zoompreset':selectPreset, 'xmin':xmin, 'xmax':xmax, 'ymin':ymin, 'ymax':ymax})
#display(ui, out)

widgets.interactive(update_plot_ff, calibrnds=selectCalibRnd, trippurps=selectTripPurp, oass=selectOAS, zoompreset=selectPreset, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax)
#widgets.interactive(update_plot_ff, calibrnds=selectCalibRnd, trippurps=selectTripPurp, oass=selectOAS, zoompreset=selectPreset)

interactive(children=(SelectMultiple(description='Calib Round', index=(0,), options=('0-Initial from v832',), …

In [488]:
import matplotlib.pyplot as plt
import numpy as np

m = widgets.FloatSlider(min=-5,max=5,step=0.5, description="Slope")
c = widgets.FloatSlider(min=-5,max=5,step=0.5, description="Intercept")

# An HBox lays out its children horizontally
ui = widgets.HBox([m, c])

def plot(m, c):
    x = np.random.rand(10)
    y = m *x + c
    plt.plot(x,y)
    plt.show()

out = widgets.interactive_output(plot, {'m': m, 'c': c})

display(out, ui)

Output()

HBox(children=(FloatSlider(value=0.0, description='Slope', max=5.0, min=-5.0, step=0.5), FloatSlider(value=0.0…