In [1]:
import pandas as pd
import panel as pn
from panel.interact import interact
pn.extension()

import matplotlib.pyplot as plt
import numpy as np

In [2]:
full = pd.read_csv('SL_All_Zones_Cooling_no_dups_11_30_21.csv', low_memory = False)
full = full[full['ZoneType'] == 'Patient Room']
full = full[['LoadHourlyAvg']]

In [3]:
cooling = pd.DataFrame()
between = full[full['LoadHourlyAvg'] < 0]
cooling['LoadHourlyAvg'] = abs(between['LoadHourlyAvg'])
cooling.reset_index(inplace = True, drop = True)

In [4]:
full.sort_values(by = 'LoadHourlyAvg', ascending = False, inplace = True)
full.reset_index(inplace = True)
full.reset_index(inplace = True)
full.drop(columns = 'index', inplace = True)
full['zScore'] = (full['LoadHourlyAvg'] - full['LoadHourlyAvg'].mean()) / full['LoadHourlyAvg'].std()
full['Percentile'] = full['LoadHourlyAvg'].rank(method = 'dense', pct = True)
full['Percentile']= round(full['Percentile'], 3)
full['zScore']= round(full['zScore'], 2)
full.head()

Unnamed: 0,level_0,LoadHourlyAvg,zScore,Percentile
0,0,-1.894781e-14,1.46,1.0
1,1,-1.894781e-14,1.46,1.0
2,2,-1.894781e-14,1.46,1.0
3,3,-2.526374e-14,1.46,1.0
4,4,-3.789561e-14,1.46,1.0


In [5]:
cooling.sort_values(by = 'LoadHourlyAvg', ascending = False, inplace = True)
cooling.reset_index(drop = True, inplace = True)
cooling.reset_index(inplace = True)
cooling.rename(columns = {'level_0': 'index'}, inplace = True)
cooling['zScore'] = (cooling['LoadHourlyAvg'] - cooling['LoadHourlyAvg'].mean()) / cooling['LoadHourlyAvg'].std()
cooling['Percentile'] = cooling['index'].rank(ascending = False, method = 'dense', pct = True)
cooling['Percentile']= round(cooling['Percentile'], 3)
cooling['zScore']= round(cooling['zScore'], 2)
cooling.head()

Unnamed: 0,index,LoadHourlyAvg,zScore,Percentile
0,0,14965.74,15.69,1.0
1,1,14630.67,15.31,1.0
2,2,13416.39,13.92,1.0
3,3,12874.05,13.3,1.0
4,4,12520.08,12.89,1.0


In [7]:
tabs = pn.Tabs()
df = cooling

minRangeZ = df['zScore'].min()
maxRangeZ = df['zScore'].max()
if len(df) == len(cooling):
    histBins = np.arange(df['LoadHourlyAvg'].min(), df['LoadHourlyAvg'].nlargest(50).iloc[-1], 100)
else:
    histBins = np.arange(df['LoadHourlyAvg'].nsmallest(50).iloc[-1], df['LoadHourlyAvg'].max(), 200)

maxSliderPct = pn.widgets.FloatSlider(name = 'Max Percentile', start = 0, end = 100, value = 100)
minSliderPct = pn.widgets.FloatSlider(name = 'Min Percentile', start = 0, end = 100)

maxSliderZ = pn.widgets.FloatSlider(name = 'Max ZScore', start = minRangeZ, end = maxRangeZ, step = 0.01, value = 3)
minSliderZ = pn.widgets.FloatSlider(name = 'Min ZScore', start = minRangeZ, end = maxRangeZ, step = 0.01)

@pn.depends(maxSliderPct, minSliderPct)
def graphPercent(maxPercent, minPercent):
    dfMax = df[df['Percentile'] == maxPercent / 100]
    maxLoad = dfMax['LoadHourlyAvg'].max()
    overHour =dfMax.iloc[0]['index']
    
    dfMin = df[df['Percentile'] == minPercent / 100]
    minLoad = dfMin['LoadHourlyAvg'].min()

    fig, ax = plt.subplots(figsize=(12,6), facecolor='w')
    cnts, values, bars = ax.hist(df['LoadHourlyAvg'], edgecolor='k', bins = histBins)
    
    for i, (cnt, value, bar) in enumerate(zip(cnts, values, bars)):
        if ((value <= minLoad) & (minPercent != 0)) | (value >= maxLoad) :
            bar.set_facecolor('#7997a1')
        else:
            bar.set_facecolor('#add8e6')
    
    #overHours = 
    maxStr = (str(maxPercent) + 'th Percentile: ' + str(int(round(maxLoad))) + ' Btu/h')
    hourStr = ('Cooling load greater in ' + str(int(overHour)) + ' hours out of ' + str(len(df)))
    
    plt.xlabel('Cooling Load (Btu/h)', fontsize = 18)
    plt.ylabel('Count of Hours', fontsize = 18)
    plt.title('Histogram of San Leandro Patient Room Btu/h', fontsize = 24)
    
    plt.annotate(maxStr, (0, 0), xytext=(0.45, 0.7), textcoords='axes fraction', fontsize = 14)
    plt.annotate(hourStr, (0, 0), xytext=(0.45, 0.6), textcoords='axes fraction', fontsize = 14)
    plt.close()
    return fig

@pn.depends(maxSliderZ, minSliderZ)
def graphZScore(maxZ, minZ):
    dfMax = df[df['zScore'] == maxZ]
    maxLoad = dfMax['LoadHourlyAvg'].max()
    #overHour = len(df) - dfMin.iloc[0]['index']
    
    dfMin = df[df['zScore'] == minZ]
    minLoad = dfMin['LoadHourlyAvg'].min()

    fig, ax = plt.subplots(figsize=(12,6), facecolor='w')
    cnts, values, bars = ax.hist(df['LoadHourlyAvg'], edgecolor='k', bins = histBins)
    
    for i, (cnt, value, bar) in enumerate(zip(cnts, values, bars)):
        if ((value <= minLoad) & (minZ != df['zScore'].min())) | (value >= maxLoad):
            bar.set_facecolor('#7997a1')
        else:
            bar.set_facecolor('#add8e6')
    
    maxStr = (str(maxZ)+' Standard Deviations: '+str(int(round(maxLoad)))+' BTU/h')
    minStr = (str(minZ)+' Standard Deviations: '+str(int(round(minLoad)))+' BTU/h')
    
    plt.xlabel('BTU per Hour')
    plt.ylabel('Count')
    plt.title('Histogram of BTU/h\nWithin Z-Score Range')
    
    plt.annotate(maxStr, (0, 0), xytext=(0.45, 0.7), textcoords='axes fraction', fontsize = 14)
    plt.annotate(minStr, (0, 0), xytext=(0.45, 0.6), textcoords='axes fraction', fontsize = 14)
    plt.close()
    return fig

def Pfunc():
    return pn.Column(maxSliderPct, minSliderPct, graphPercent)
def Zfunc():
    return pn.Column(maxSliderZ, minSliderZ, graphZScore)

tabs.extend([
    ('Percentile', Pfunc)#,
    #('Z-Score', Zfunc)
])

tabs.show()

Launching server at http://localhost:55177


<bokeh.server.server.Server at 0x259936436a0>

