# Data Visualization Module
The purpose of this notebook is to act as a module that can be imported into the Master Notebook to visualize various data.

## Temperature and Relative Humidity


In [29]:
def plotThermalComfortBands(x, y, ylabel, ylim, byDay, day_no = 0, figure_size = (16,8)):
    '''
    Inputs:
        - x: numpy DateTime array holding the independent variable data
        - y: numpy float array holding the temperature or relative humidity
        - ylabel: string representing the y-axis label 
        - byDay: boolean specifying whether or not the data is grouped by day or is the entire range
        - day_no: integer for what day in the series it is
        - figure_size: a tuple holding the figure size; default is (16,8)
    Returns a time series plot of Temperature or RH with bands denoting typical comfort ranges
    '''
    # Plotting
    fig, ax = plt.subplots(figsize = figure_size)
    ax.plot(x,y,color='black',linewidth=3,label=ylabel)
    
    ## Shading hot/wet and cold/dry areas
    if ylabel == 'Temperature':
        ylabel = 'Temperature ($^\circ$F)'
        bp = [0,66,68,72,74,90]
        labels = ['Very Cold','Cold','Ideal','Warm','Hot']
        colors = ['cyan','blue','green','orange','red']
    else:
        ylabel = 'Relative Humidity (%)'
        bp = [0,20,30,60,70,100]
        labels = ['Very Dry','Dry','Ideal','Humid','Very Humid']
        colors = ['red','orange','green','cyan','blue']
        
    ax.axhspan(bp[0],bp[1],color=colors[0],alpha=0.3,label=labels[0])
    ax.axhspan(bp[1],bp[2],color=colors[1],alpha=0.3,label=labels[1])
    ax.axhspan(bp[2],bp[3],color=colors[2],alpha=0.3,label=labels[2])
    ax.axhspan(bp[3],bp[4],color=colors[3],alpha=0.3,label=labels[3])
    ax.axhspan(bp[4],bp[5],color=colors[4],alpha=0.3,label=labels[4])
    
    ## Formatting x-axis
    ax.set_xlim([x[0],x[-1]])
    ### Formatting datetime labels
    if byDay == True:
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%m/%d %H:%M'))
        ax.xaxis.set_major_locator(mdates.DayLocator())
        ax.xaxis.set_minor_formatter(mdates.DateFormatter('%H:%M'))
        ax.xaxis.set_minor_locator(mdates.HourLocator(byhour=range(2,24,2)))
    else:
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
        ax.xaxis.set_major_locator(mdates.DayLocator()) 
    ax.set_xlabel('Time')
    ax.xaxis.set_tick_params(rotation=-30)
    
    ## Formatting y-axis
    ax.set_ylim(ylim)
    ax.set_ylabel(ylabel)
    
    ax.legend()
    

## Fine Particulate Matter (PM2.5) Time Series
The following function plots PM2.5 concentration as either $\mu$g/m$^3$ or AQI. The background is formatted such that the breakpoints for AQI are included. Breakpoints for AQI can be found [here](http://aqicn.org/faq/2013-09-09/revised-pm25-aqi-breakpoints/). 

In [66]:
def plotFinePMTimeSeries(x, y, ylabel, byDay, day_no=0, figure_size=(16,8)):
    '''
    Inputs:
        - x: numpy DateTime array holding the independent variable data
        - y: numpy float array holding the PM2.5 concentration or AQI
        - ylabel: string representing the y-axis label 
        - byDay: boolean specifying whether or not the data is grouped by day or is the entire range
        - day_no: integer for what day in the series it is
        - figure_size: a tuple holding the figure size; default is (16,8)
    Returns a time series plot of PM2.5 Concentration or AQI with AQI breakpoints included as background
    '''
    # Plotting
    fig, ax = plt.subplots(figsize = figure_size)

    ### AQI breakpoints
    bp_c_start = [0,12,35.5,55.5,150.5,250.5]
    bp_c_end = [12,35.4,55.4,150.4,250.4,500]
    bp_aqi_start = [0,51,101,151,201,301]
    bp_aqi_end = [50,100,150,200,300,500]
    if ylabel == 'Air Quality Index':
        bp_start = bp_aqi_start
        bp_end = bp_aqi_end
    else:
        bp_start = bp_c_start
        bp_end = bp_c_end
    
    ### Shading the figure with the different breakpoints
    aqi1 = plt.axhspan(bp_start[0],bp_end[0],color='green',alpha=0.3,label='Good')
    aqi2 = plt.axhspan(bp_start[1],bp_end[1],color='yellow',alpha=0.3,label='Moderate')
    aqi3 = plt.axhspan(bp_start[2],bp_end[2],color='orange',alpha=0.3,label='Unhealthy for Children/Elderly')
    aqi4 = plt.axhspan(bp_start[3],bp_end[3],color='red',alpha=0.3,label='Unhealthy')
    aqi5 = plt.axhspan(bp_start[4],bp_end[4],color='purple',alpha=0.3,label='Very Unhealthy')
    aqi6 = plt.axhspan(bp_start[5],bp_end[5],color='maroon',alpha=0.3,label='Hazardous')
    
    ax.plot(x,y,color='black',linewidth=3,label='PM2.5')
    
    ## Formatting x-axis
    ax.set_xlim([x[0],x[-1]])
    ### Formatting datetime labels
    if byDay == True:
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%m/%d %H:%M'))
        ax.xaxis.set_major_locator(mdates.DayLocator())
        ax.xaxis.set_minor_formatter(mdates.DateFormatter('%H:%M'))
        ax.xaxis.set_minor_locator(mdates.HourLocator(byhour=range(2,24,2)))
    else:
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
        ax.xaxis.set_major_locator(mdates.DayLocator())
    ax.set_xlabel('Time')
    ax.xaxis.set_tick_params(rotation=-30)
    
    ## Formatting y-axis
    ax.set_ylim([0,500])
    ax.set_ylabel(ylabel)
    
    ## Formatting remaining aspects of figure
    ax.set_title('Day ' + str(day_no) + ': ' + str(x[2].date()))
    ax.legend()
    

In [67]:
def plotFinePMMedian(date_list, medians):
    '''
    Inputs:
        - medians: a list that contains the median concentrations
    Plots a bar chart of the median concentrations during the range
    '''
    # Plotting
    fig, ax = plt.subplots(figsize = (16,8))
    
    bp_start = [0,51,101,151,201,301]
    bp_end = [50,100,150,200,300,500]
    
    ## Shading the figure with the different breakpoints
    ax.axhspan(bp_start[0],bp_end[0],color='green',alpha=0.3,label='Good')
    ax.axhspan(bp_start[1],bp_end[1],color='yellow',alpha=0.3,label='Moderate')
    ax.axhspan(bp_start[2],bp_end[2],color='orange',alpha=0.3,label='Unhealthy for Children/Elderly')
    ax.axhspan(bp_start[3],bp_end[3],color='red',alpha=0.3,label='Unhealthy')
    ax.axhspan(bp_start[4],bp_end[4],color='purple',alpha=0.3,label='Very Unhealthy')
    ax.axhspan(bp_start[5],bp_end[5],color='maroon',alpha=0.3,label='Hazardous')
    
    index = np.arange(len(medians))
    p2 = ax.bar(index,medians,color='white',alpha=0.3,linewidth=3,label=' ')
    p1 = ax.bar(index,medians,color='white',fill=False,hatch='/',edgecolor='black',linewidth=3,label='Median PM2.5')
    
    ## Putting Sleep Efficiency values on the bar
    rect_no = 0
    for rect in p1:
        x_pos = rect.get_x() + rect.get_width()/2.0
        y_pos = rect.get_height() + 5
        text = str(medians[rect_no])
        plt.text(x_pos, y_pos, text, ha='center', va='bottom',color='white',bbox=dict(facecolor='black', alpha=1))
        rect_no += 1
    
    ## Formatting x-axis
    ax.xaxis_date()
    ax.set_xlabel('Date')
    date_ticks = []
    for night in date_list:
        date_ticks.append(str(night.month) + '/' + str(night.day))
    plt.xticks(index,date_ticks)
    
    ## Formatting y-axis
    ax.set_ylabel('Median AQI')
    ax.set_ylim([0,500])
    
    ax.legend(title='Air Quality Index')

## Sleep Stages Bar Chart
The following function plots the relative percentage of wake, rem, and non-rem stages. The background corresponds to the approximate ranges that qualify as proper sleep.

In [19]:
def plotSleepStagePercentages(sleep_metrics):
    '''
    Inputs:
        - sleep_metrics: a dataframe that holds different sleep metrics, indexed by the night
    Returns a bar chart showing the relative percentage of awake, non-rem, and rem sleep stages
    '''
    # Plotting
    fig, ax = plt.subplots(figsize=(16,8))
    index = np.arange(len(sleep_metrics))
    p1 = ax.bar(index,sleep_metrics['Awake %'],bottom=sleep_metrics['Non-REM %']+sleep_metrics['REM %'], color='pink',edgecolor='k',label='Awake')
    p2 = ax.bar(index,sleep_metrics['REM %'], bottom=sleep_metrics['Non-REM %'],color='c',edgecolor='k',label='REM')
    p3 = ax.bar(index,sleep_metrics['Non-REM %'], color='navy',edgecolor='k',label='Non-REM')

    ## Putting Sleep Efficiency values on the bar
    rect_no = 0
    for rect in p1:
        x_pos = rect.get_x() + rect.get_width()/2.0
        y_pos = 5
        text = str(sleep_metrics['Efficiency_Grade'][rect_no])
        plt.text(x_pos, y_pos, text, ha='center', va='bottom',color='white',bbox=dict(facecolor='black', alpha=0.5))
        rect_no += 1
    
    ## Putting percentage on the bar
    offset = 5
    rect_no = 0
    for rect in p1:
        x_pos = rect.get_x() + rect.get_width()/2.0
        y_pos = sleep_metrics['Non-REM %'][rect_no] + sleep_metrics['REM %'][rect_no] + rect.get_height() - offset
        text =  str(int(sleep_metrics['Awake %'][rect_no])) + '%'
        plt.text(x_pos, y_pos, text, ha='center', va='bottom',color='white',bbox=dict(facecolor='black', alpha=0.5))
        rect_no += 1
    rect_no = 0
    for rect in p2:
        x_pos = rect.get_x() + rect.get_width()/2.0
        y_pos = sleep_metrics['Non-REM %'][rect_no] + rect.get_height() - offset
        text =  str(int(sleep_metrics['REM %'][rect_no])) + '%'
        plt.text(x_pos, y_pos, text, ha='center', va='bottom',color='white',bbox=dict(facecolor='black', alpha=0.5))
        rect_no += 1
    rect_no = 0
    for rect in p3:
        x_pos = rect.get_x() + rect.get_width()/2.0
        y_pos = rect.get_height() - offset
        text =  str(int(sleep_metrics['Non-REM %'][rect_no])) + '%'
        plt.text(x_pos, y_pos, text, ha='center', va='bottom',color='white',bbox=dict(facecolor='black', alpha=0.5))
        rect_no += 1
    
    ## Formatting x-axis
    ax.xaxis_date()
    ax.set_xlabel('Date')
    date_ticks = []
    for night in sleep_metrics.index:
        date_ticks.append(str(night.month) + '/' + str(night.day))
    plt.xticks(index,date_ticks)

    ## Formatting y-axis
    ax.set_yticks(np.arange(0,105,10))
    ax.set_ylabel('Percent of Sleep Spent in Each Stage')
        
    ax.legend()

In [24]:
def plotSleepStageBenchmarks(sleep_metrics):
    '''
    Inputs:
        - sleep_metrics: a dataframe that holds different sleep metrics, indexed by the night
    Returns a bar chart that shows the percentage spent in each sleep stage compared against the recommended levels
    '''
    # Plotting
    fig, ax = plt.subplots(figsize=(16,8))
    ## Generating bar chart details
    index = np.arange(len(sleep_metrics))
    bar_width = 0.2
    pad = 0.1
    
    ## Creating the bars and the shaded areas
    rect_shade1 = ax.bar(index-pad-bar_width,15,bottom=5,width=bar_width+pad/2,color='pink',alpha=0.3,hatch='/',edgecolor='k',label='Typical Awake %')
    p1 = ax.bar(index-pad-bar_width,sleep_metrics['Awake %'],width=bar_width,color='pink',edgecolor='k',label='Your Awake %')
    rect_shade2 = ax.bar(index,10,bottom=13,width=bar_width+pad/2,color='c',alpha=0.3,hatch='/',edgecolor='k',label='Recommended REM %')
    p2 = ax.bar(index,sleep_metrics['REM %'],width=bar_width,color='c',edgecolor='k',label='Your REM %')
    rect_shad3 = ax.bar(index+pad+bar_width,83-52,bottom=52,width=bar_width+pad/2,color='navy',alpha=0.3,hatch='/',edgecolor='k',label='Recommended Non-REM %')
    p3 = ax.bar(index+pad+bar_width,sleep_metrics['Non-REM %'],width=bar_width,color='navy',edgecolor='k',label='Your Non-REM %')

    ## Formatting x-axis
    ax.xaxis_date()
    ax.set_xlabel('Date')
    date_ticks = []
    for night in sleep_metrics.index:
        date_ticks.append(str(night.month) + '/' + str(night.day))
    plt.xticks(index,date_ticks)
    
    ## Formatting y-axis
    ax.set_ylabel('Percent of Time Spent in Sleep Stage')
    ax.set_ylim([0,100])
    ax.set_yticks(np.arange(0,105,10))
    
    ax.legend()

## Sleep and PM2.5 Metrics

In [1]:
def plotPMandSleepEfficiency(concentration_metrics, cm, sleep_metrics, sm, sm_lim):
    '''
    Inputs:
        - concentration_metrics: a dataframe indexed by the night that holds nightly PM metrics
        - cm: string defining the concentration metric to use
        - sleep_metrics: a dataframe indexed by the night that holds sleep metrics
        - sm: string that defines which sleep metric to use
        - sm_lim: list holding the upper and lower bounds for the sleep metric
    Returns a scatterplot that shows the relationship between PM2.5 and a sleep metric
    '''
    # Must check to see if the two datasets have the same number of days
    if len(concentration_metrics) == len(sleep_metrics):
        # Plotting median concentration and sleep efficiency
        fig, ax = plt.subplots(figsize=(16,8)) 
        
        ## Shading the figure with the different breakpoints
        bp_start = [0,51,101,151,201,301]
        bp_end = [50,100,150,200,300,500]
        ax.axhspan(bp_start[0],bp_end[0],color='green',alpha=0.3,label='Good')
        ax.axhspan(bp_start[1],bp_end[1],color='yellow',alpha=0.3,label='Moderate')
        ax.axhspan(bp_start[2],bp_end[2],color='orange',alpha=0.3,label='Unhealthy for Children/Elderly')
        ax.axhspan(bp_start[3],bp_end[3],color='red',alpha=0.3,label='Unhealthy')
        ax.axhspan(bp_start[4],bp_end[4],color='purple',alpha=0.3,label='Very Unhealthy')
        ax.axhspan(bp_start[5],bp_end[5],color='maroon',alpha=0.3,label='Hazardous')
        
        if sm == 'Efficiency':
            x = sleep_metrics[sm]
            ## Overlaying the Sleep Efficiency Grade
            grades = [80,85,90]
            for i in range(len(grades)):
                ax.axvline(x=grades[i],linewidth=2,color ='black')  
            ax.text(65, -15, 'Bad', ha='center', va='bottom',color='black')
            ax.text(82.5, -15, 'Poor', ha='center', va='bottom',color='black')
            ax.text(87.5, -15, 'Normal', ha='center', va='bottom',color='black')
            ax.text(95, -15, 'Excellent', ha='center', va='bottom',color='black')
            xlabel = 'Sleep Quality: Efficiency (%)'
        
        if sm == 'Latency':
            ## Converting to Minutes
            x = sleep_metrics[sm]*60
            xlabel = 'Sleep Quality: Latency (Minutes)'
            
        ## Plotting the data points
        ax.scatter(x,concentration_metrics[cm],s=500,c='black',alpha=1,edgecolor='black',label=None)
        
        ## Formatting x-axis
        ax.set_xlabel(xlabel)
        ax.set_xlim(sm_lim)
        
        ## Formmating y-axis
        ax.set_ylabel('PM2.5 AQI: ' + cm)
        ax.set_ylim([0,500])
        ax.set_yticks([50,100,150,200,300,500])
        
        ax.legend(title='Air Quality Index')

## Sleep Metrics and Perceived Sleep Metrics

In [41]:
def compareSleepQuality(sleep_metrics,sleep_scores,sm,sm_lim,psm,psm_lim):
    '''
    Inputs:
        - sleep_metrics: a dataframe indexed by the night that holds sleep metrics
        - sleep_scores: a dataframe 
        - sm: string holding the name of the sleep metric to use
        - sm_lim: list holind the upper and lower bounds for the sleep metric
        - psm: string holding the name of the perceived sleep metric to use
        - psm_lim: list holding the upper and lower bounds for the perceived sleep metric 
    Returns
    '''
    # Getting the x and y values
    x = []
    y = []
    for i in range(len(sleep_metrics)):
        for j in range(len(sleep_scores)):
            if sleep_metrics.index[i] == sleep_scores.index[j]:
                x.append(sleep_metrics[sm][i])
                y.append(sleep_scores[psm][j])
    
    # Plotting 
    fig, ax = plt.subplots(figsize=(16,8))
    
    ## Placing bands of recommended values
    if sm == 'Time_Asleep':
        ax.axvspan(7,9,color='green',alpha=0.2,label='Recommended Hours of Sleep')
    if psm == 'Time_Asleep':
        ax.axhspan(7,9,color='green',alpha=0.2)
    if sm == 'Efficiency':
        v_place = (psm_lim[-1]-psm_lim[0])*0.03
        ax.axvspan(0,80,linewidth=2,facecolor ='red',edgecolor='black',alpha=0.3,label='Bad')
        ax.axvspan(80,85,linewidth=2,facecolor ='orange',edgecolor='black',alpha=0.3,label='Poor')
        ax.axvspan(85,90,linewidth=2,facecolor ='white',edgecolor='black',alpha=0.3,label='Normal')
        ax.axvspan(90,100,linewidth=2,facecolor ='green',edgecolor='black',alpha=0.3,label='Excellent')
    
    ## Scattering the data
    #t = np.arange(len(x))
    ax.scatter(x,y,s=500,c='black',alpha=1,edgecolor='black')
                
    ## Formatting the x-axis
    ax.set_xlabel('Fitbit Sleep Metric: ' + sm)
    ax.set_xlim(sm_lim)
    
    ## Formatting the y-axis
    ax.set_ylabel('Reported Sleep Metric: ' + psm)
    ax.set_ylim(psm_lim)
    
    ax.legend(title='Sleep Efficiency')

## Supporting Visualization Functions

In [13]:
def saveFigure(name, directory, save=True):
    '''
    Inputs:
        - name: string holding the name to save the file as
        - directory: string holding the location to save the figure
        - save: boolean so that we don't have to comment out the function
    Saves the figure in the given directory
    '''
    ## Saving the Figure
    if save == True:
        plt.title(name)
        filename = directory + name + '.pdf'
        plt.savefig(filename,bbox_inches="tight")

In [3]:
def createCmap(cvals,colors):
    '''
    Inputs:
        - cvals: list of integers or floats that specify the midpoint values where the colors should be the strongest
        - colors: list of strings that specify the colors to use for the color bar
    Return a color map with the specified colors and cutoffs
    '''

    norm=plt.Normalize(min(cvals),max(cvals))
    tuples = list(zip(map(norm,cvals), colors))
    cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", tuples)
    return cmap

In [12]:
def plotHeatMap(days,data,lims,colors,year=2019):
    '''
    Inputs:
        - days: list of DateTimes that specify the days the data are collected over
        - data: list of floats containing the data to be plotted
        - lims: list of integers/floats that holds the limits for the breakpoints
        - colors: list of strings that holds the colors for each limit (length should be one less than lims)
        - year: integer representing the year over which you wish to generate the heat map for
    Returns a plot of a heat map similar to the commit contribution map used in GitHub
    '''
    # Checking for correct lengths of lims and colors
    plot = False
    if len(colors) == len(lims)-1:
        plot = True
    
    if plot == True:
        # Creating list of dates for the given year
        days_of_year = []
        start = datetime(year,1,1)
        days_of_year.append(start)
        for i in range(364):
            days_of_year.append(days_of_year[-1]+timedelta(days=1))

        # Creating the Figure
        ## Setting up the figure
        fig,ax = plt.subplots(figsize=[52,7])
        ax.set_xlim([0,52])
        ax.set_ylim([-7,0])
        ax.tick_params(axis='both',labelbottom=False,labelleft=False)
        ### Drawing the grids
        for i in range(6):
            ax.axhline(-i-1,color='black')

        for i in range(51):
            ax.axvline(i+1,color='black')
        ### Putting the labels as text
        days_of_week = ['M','T','W','Th','F','Sa','Su']
        for i in range(len(days_of_week)):
            ax.text(-0.5,-0.6-i,days_of_week[i],fontsize=24,horizontalalignment='center')
        mo_loc_x = []
        mo_loc_y = []
        for i in range(len(days_of_year)):
            if days_of_year[i].day == 1:
                mo_loc_x.append(days_of_year[i].isocalendar()[1]-1)
                mo_loc_y.append((days_of_year[i].isocalendar()[2])*-1)
        months_of_year = ['Jan','Feb','Mar','Apr','May','June','July','Aug','Sep','Oct','Nov','Dec']
        for i in range(len(months_of_year)):
            ax.text(mo_loc_x[i]+2,0.5,months_of_year[i],fontsize=24,horizontalalignment='center')
            ax.text(mo_loc_x[i],mo_loc_y[i]+0.7,'1',fontsize=16)


        ## Plotting the actual data
        t = data
        x = []
        y = []
        for date in days:
            x.append(date.isocalendar()[1]-0.5)
            y.append((date.isocalendar()[2]-0.5)*-1)
        cvals = []
        for i in range(len(lims)-1):
            cvals.append(lims[i]+(lims[i+1]-lims[i])/2)
        sc = plt.scatter(x,y,c=t,vmin=lims[0],vmax=lims[-1],cmap=createCmap(cvals,colors),s=1000,marker='s',edgecolor='black')
        cbar = plt.colorbar(sc,ticks=lims)
        cbar.ax.tick_params(labelsize=20)
