In [None]:
#########################################################################################################################
## Project: American Time Use Survey Analysis
## Objectives: Create a narrative describing how the COVID-19 pandemic iimpacted how Americans in general and how different
##             demographics of Americans spend their time.
## Date: Last updated 04/16/2023
## Author: Jenna Bedrava
#########################################################################################################################

In [2]:
#importing packages
import pandas as pd
import altair as alt

### Scatterplot of TSNE Dimensions and Bar Chart of Minutes Each Month

In [3]:
#reading in data
tsne = pd.read_csv('/Users/jennabedrava/Documents/SI 649/ATUS_project_data/tsne.csv')

In [4]:
#ordering these for the legend
tsne['time_period'] = pd.Categorical(tsne.time_period, 
                                     categories = ['Pre-COVID Peak', 'COVID Peak', 'Post-COVID Peak'], 
                                     ordered = True)

In [5]:
#scatterplot of tSNE dimensions
base = alt.Chart(tsne).encode(
    x = alt.X('tsne1:Q', sort = {'field': 'date'}, axis = alt.Axis(title = 'tSNE Dimension 1'),
              scale = alt.Scale(domain = [(tsne.tsne1.min() - 4), (tsne.tsne1.max() + 6)])),
    y = alt.Y('tsne2:Q', sort = {'field': 'date'}, axis = alt.Axis(title = 'tSNE Dimension 2'),
              scale = alt.Scale(domain = [(tsne.tsne2.min() - 2), (tsne.tsne2.max() + 2)])),
    color = alt.Color('time_period:N', legend = alt.Legend(title = None), sort = ['Pre-COVID Peak', 'COVID Peak', 'Post-COVID Peak'],
                      scale = alt.Scale(domain = ['Pre-COVID Peak', 'COVID Peak', 'Post-COVID Peak'], 
                                        range = ['#f58a42', '#4b6be5', '#7f3eb0'])),
    #tooltip = alt.Tooltip(['Caring for Household','Household Activities', 'Phone Calls', 'Socializing and Leisure', 'Traveling'], format='.2f')
)

points = base.mark_circle()
line = base.mark_line().encode(opacity = alt.value(.3))
text = base.mark_text(dy = -10).encode(
    text = 'date_label'
)

(points + line + text)

#bar chart of minutes spent on activities with significant differences

#based on avg_time_2019_2021_bymonth.csv, the activities with significantly different means in 2019 and 2021 are:
#Eating and Drinking, Civic Duties, Household Activities, Personal Care, Phone Calls, Consumer Purchasing, Religious/Spiritual Activities,
#Socializing and Leisure, Traveling, Volunteering

#allowing user to select month to display in bar chart
selectMonth = alt.selection_single(on = 'mouseover', nearest = True)

month_barchart = base.transform_fold(['Eating and Drinking', 'Civic Duties', 'Household Activities', 'Personal Care', 
                                               'Phone Calls', 'Consumer Purchasing', 'Religious/Spiritual Activities',
                                               'Socializing and Leisure', 'Traveling', 'Volunteering'], 
                                               as_ = ['ACTIVITY', 'value']).mark_bar().transform_filter(
    alt.FieldOneOfPredicate("ACTIVITY", oneOf=['Eating and Drinking', 'Civic Duties', 'Household Activities', 'Personal Care', 
                                               'Phone Calls', 'Consumer Purchasing', 'Religious/Spiritual Activities',
                                               'Socializing and Leisure', 'Traveling', 'Volunteering'])
 ).transform_window(sort = [alt.SortField("value", order = "descending")], val_rank = "rank(*)"
 ).encode(
    x=alt.X('value:Q', axis = alt.Axis(title = 'Weighted Average Minutes Spent')),
    y=alt.Y('ACTIVITY:N', sort = alt.EncodingSortField(field = "val_rank", order = "ascending"), axis = alt.Axis(title = None))
 ).transform_filter(selectMonth) 

#horizontally concatonating the two
(points + line + text).add_selection(selectMonth).properties(
    title = 'Trends in Time Usage by Month via tSNE Dimension Reduction') | month_barchart.properties(title = 'Average Minutes Spent on Each Activity')

### Barchart of Difference in Minutes Spent in 2019 and 2021 

In [6]:
#reading in data I cleaned in R - AVERAGES HERE ARE WEIGHTED 
barchart_data = pd.read_csv('/Users/jennabedrava/Documents/SI 649/ATUS_project_data/avg_time_2019_2021.csv')

#decided we want differences to be 2021-2019, so positive values reflect an increase in that activity in 2021
barchart_data['DIFF'] = barchart_data['DIFF'] * -1

In [7]:
#allowing user to select age group and sex
ages = list(barchart_data['AGE_GROUP'].unique())
sexes = list(barchart_data['SEX'].unique())

selectSex = alt.selection_single(
    fields = ['SEX'],
    init = {'SEX':sexes[0]},
    bind = alt.binding_select(options = sexes, name = 'Select sex: ')
)

selectAge = alt.selection_single(
    fields = ['AGE_GROUP'],
    init = {'AGE_GROUP':ages[0]},
    bind = alt.binding_select(options = ages, name = 'Select age group: ')
)

#bar chart
barchart = alt.Chart(barchart_data, title = "Comparing Average Time Spent in 2021 and 2019").transform_filter(selectAge & selectSex).transform_window(
    sort = [alt.SortField("DIFF", order = "descending")], 
    diff_rank = "rank(*)"
    ).mark_bar().encode(
    alt.Y('ACTIVITY:N', sort = alt.EncodingSortField(field = "diff_rank", order = "ascending"), axis = alt.Axis(title = None)),
    alt.X('DIFF:Q', axis = alt.Axis(title = "Average Difference Between 2021 and 2019 in Minutes"), 
          scale = alt.Scale(domain = [(barchart_data.DIFF.min() - 4), (barchart_data.DIFF.max() + 4)])),
    color=alt.condition(
        alt.datum.DIFF > 0,
        alt.value("#3cb371"),  #positive color
        alt.value("#ff6666")  #negative color
    )
)

#text labels
text_right = barchart.mark_text(align = "left", baseline = "middle", dx = 3).encode(
    text = alt.Text("DIFF:Q", format = '.2f'),
    opacity = alt.condition(alt.datum.DIFF > 0, alt.value(1), alt.value(0)),
    color = alt.condition(alt.datum.significant == 'YES', alt.value("#3cb371"), alt.value('lightgray'))
)

text_left = barchart.mark_text(align = "right", baseline = "middle", dx = -3).encode(
    text = alt.Text("DIFF:Q", format = '.2f'),
    opacity = alt.condition((alt.datum.DIFF < 0), alt.value(1), alt.value(0)),
    color = alt.condition(alt.datum.significant == 'YES', alt.value("#ff6666"), alt.value('lightgray'))
)

barchart.add_selection(selectAge, selectSex) + text_right + text_left

### Bar Chart of Monthly Averages of Time Spent for All Activites

In [8]:
#reading in data
monthly_combined = pd.read_csv('/Users/jennabedrava/Documents/SI 649/ATUS_project_data/monthly_combined.csv')

#adding date variable
monthly_combined['DATE'] = pd.to_datetime(monthly_combined[['YEAR', 'MONTH']].assign(DAY=1))

#adding time_period variable
monthly_combined['time_period'] = 'Post-COVID Peak'
monthly_combined.loc[monthly_combined['DATE'] <= '2021-01-01', 'time_period'] = 'COVID Peak'
monthly_combined.loc[monthly_combined['DATE'] <= '2020-02-01', 'time_period'] = 'Pre-COVID Peak'

#ordering time_period variable
monthly_combined['time_period'] = pd.Categorical(monthly_combined.time_period, categories = ['Pre-COVID Peak', 'COVID Peak', 'Post-COVID Peak'], ordered = True)

#making month_label variable
monthly_combined['month_label'] = (monthly_combined['DATE'].dt.strftime('%b') + ' ' + monthly_combined['DATE'].dt.strftime('%Y'))
monthly_combined['month_label'] = monthly_combined.month_label.astype('category')

#ordering month_label variable
my_order = ['Jan 2019', 'Feb 2019', 'Mar 2019', 'Apr 2019', 'May 2019', 'Jun 2019', 'Jul 2019', 'Aug 2019', 'Sep 2019', 'Oct 2019', 'Nov 2019', 'Dec 2019',
            'Jan 2020', 'Feb 2020', 'Mar 2020', 'May 2020', 'Jun 2020', 'Jul 2020', 'Aug 2020', 'Sep 2020', 'Oct 2020', 'Nov 2020', 'Dec 2020',
            'Jan 2021', 'Feb 2021', 'Mar 2021', 'Apr 2021', 'May 2021', 'Jun 2021', 'Jul 2021', 'Aug 2021', 'Sep 2021', 'Oct 2021', 'Nov 2021', 'Dec 2021']
monthly_combined['month_label'] = pd.Categorical(monthly_combined.month_label, categories = my_order, ordered = True)

In [9]:
activities = list(monthly_combined['ACTIVITY'].unique())

selectActivity = alt.selection_single(
    fields = ['ACTIVITY'],
    init = {'ACTIVITY':activities[0]},
    bind = alt.binding_select(options = activities, name = 'Select activity: ')
)

barchart_allmonths = alt.Chart(monthly_combined).mark_bar().encode(
    x=alt.X('month_label:N', sort = my_order, axis = alt.Axis(title = None)),
    y=alt.Y('mean:Q', axis = alt.Axis(title = "Average Minutes Spent")),
    color = alt.Color('time_period:N', sort = ['Pre-COVID Peak', 'COVID Peak', 'Post-COVID Peak'], title = None,
                      scale = alt.Scale(domain = ['Pre-COVID Peak', 'COVID Peak', 'Post-COVID Peak'], 
                                        range = ['#f58a42', '#4b6be5', '#7f3eb0'])),
    tooltip = alt.Tooltip(['mean:Q'], format = '.2f', title = 'Average Number of Minutes')
).transform_filter(selectActivity).properties(width = 800, height = 100)

barchart_allmonths.add_selection(selectActivity).configure_legend(labelFontSize = 14)

### Waterfall Plot of % Change in Number of People Reporting Doing Activity

In [10]:
#reading in data
waterfall = pd.read_csv('/Users/jennabedrava/Documents/SI 649/ATUS_project_data/waterfall.csv')

In [11]:
#plot was too crowded with this many time points, so only using Jun 2019 - Jun 2021
month_list = ['Jan 2019', 'Feb 2019', 'Mar 2019', 'Apr 2019', 'May 2019',
              'Jul 2021', 'Aug 2021', 'Sep 2021', 'Oct 2021', 'Nov 2021', 'Dec 2021']

waterfall = waterfall[~waterfall['label'].isin(month_list)]

In [12]:
#making Jun 2020 amount values equal to percent_nonzero
waterfall.loc[waterfall['label'] == 'Jun 2019', 'amount'] = waterfall[waterfall['label'] == 'Jun 2019']['percent_nonzero']

In [13]:
#sorting label variable
waterfall['label'] = pd.Categorical(waterfall['label'], 
                                    categories = ['Jun 2019', 'Jul 2019', 'Aug 2019', 'Sep 2019', 'Oct 2019', 'Nov 2019', 'Dec 2019',
                                                  'Jan 2020', 'Feb 2020', 'Mar 2020', 'May 2020', 'Jun 2020', 'Jul 2020', 'Aug 2020', 'Sep 2020', 'Oct 2020', 'Nov 2020', 'Dec 2020',
                                                  'Jan 2021', 'Feb 2021', 'Mar 2021', 'Apr 2021', 'May 2021', 'Jun 2021'], 
                                    ordered = True)

In [14]:
#data needs to be in order for each category
waterfall = waterfall.sort_values(['ACTIVITY', 'label'], ascending = True)

In [15]:
#interactive component
activities2 = list(waterfall['ACTIVITY'].unique())

selectActivity2 = alt.selection_single(
    fields = ['ACTIVITY'],
    init = {'ACTIVITY':activities2[0]},
    bind = alt.binding_select(options = activities2, name = 'Select activity: ')
)

#code from https://altair-viz.github.io/gallery/waterfall_chart.html

base_chart = alt.Chart(waterfall).transform_window(
    window_sum_amount="sum(amount)",
    window_lead_label="lead(label)",
).transform_calculate(
    calc_lead="datum.window_lead_label === null ? datum.label : datum.window_lead_label",
    calc_prev_sum="datum.label === 'End' ? 0 : datum.window_sum_amount - datum.amount",
    calc_amount="datum.label === 'End' ? datum.window_sum_amount : datum.amount",
    calc_text_amount="(datum.label !== 'Begin' && datum.label !== 'End' && datum.calc_amount > 0 ? '+' : '') + datum.calc_amount",
    calc_center="(datum.window_sum_amount + datum.calc_prev_sum) / 2",
    calc_sum_dec="datum.window_sum_amount < datum.calc_prev_sum ? datum.window_sum_amount : ''",
    calc_sum_inc="datum.window_sum_amount > datum.calc_prev_sum ? datum.window_sum_amount : ''",
).encode(
    x=alt.X(
        "label:O",
        axis=alt.Axis(title="Months", labelAngle=30),
        sort = None
))

# alt.condition does not support multiple if else conditions which is why
# we use a dictionary instead. See https://stackoverflow.com/a/66109641
# for more information
color_coding = {
    "condition": [
        {"test": "datum.label === 'Begin' || datum.label === 'End'", "value": "#878d96"},
        {"test": "datum.calc_amount < 0", "value": "#ff6666"},
    ],
    "value": "#3cb371",
}

bar = base_chart.mark_bar(size=35).encode(
    y=alt.Y("calc_prev_sum:Q", title="Percent of People Who Reported Doing Activity", axis=alt.Axis(format="%")),
    y2=alt.Y2("window_sum_amount:Q"),
    color=color_coding,
)

# The "rule" chart is for the horizontal lines that connect the bars
rule = base_chart.mark_rule(
    xOffset=-17.5,
    x2Offset=17.5,
).encode(
    y=alt.Y("window_sum_amount:Q"),
    x2=alt.X2("calc_lead"),
)

# Add values as text
text_pos_values_top_of_bar = base_chart.mark_text(
    baseline="bottom",
    dy=-4
).encode(
    text=alt.Text("calc_sum_inc:N", format = '.2%'),
    y=alt.Y("calc_sum_inc:Q"),
).transform_filter(
    alt.datum.calc_sum_inc > 0.00
)
text_neg_values_bot_of_bar = base_chart.mark_text(
    baseline="top",
    dy=4
).encode(
    text=alt.Text("calc_sum_dec:N", format = '.2%'),
    y="calc_sum_dec:Q",
).transform_filter(
    alt.datum.calc_sum_dec > 0.00
)
text_bar_values_mid_of_bar = base_chart.mark_text(baseline="middle").encode(
    text=alt.Text("calc_text_amount:N", format = '.2%'),
    y="calc_center:Q",
    color=alt.value("white"),
)

#we need a line between Feb 2020 and March 2020
covid_begins = base_chart.mark_rule(xOffset = -19, strokeWidth = 2.5, strokeDash=[1,1]).encode(
    x = alt.X('label:O', sort = None), 
    color = alt.value("darkgrey"),
    opacity = alt.condition(alt.datum.label == 'Mar 2020', alt.value(1), alt.value(0))
    )

covid_ends = base_chart.mark_rule(xOffset = -19, strokeWidth = 2.5, strokeDash=[1,1]).encode(
    x = alt.X('label:O', sort = None), 
    color = alt.value("darkgrey"),
    opacity = alt.condition(alt.datum.label == 'Feb 2021', alt.value(1), alt.value(0))
    )

alt.layer(bar, rule, text_pos_values_top_of_bar, text_neg_values_bot_of_bar, text_bar_values_mid_of_bar,
          covid_begins, covid_ends
).add_selection(selectActivity2).transform_filter(selectActivity2).properties(height = 300, width = 1000)

### Area Chart and Forest Plot of GLMM Results

In [17]:
#reading in data
models = pd.read_csv('/Users/jennabedrava/Documents/SI 649/ATUS_project_data/models.csv')

In [18]:
#interactive component
activities2 = list(models['ACTIVITY'].unique())

colorCondition = alt.condition(alt.datum.ESTIMATE > 0, alt.value("#3cb371"), alt.value("#ff6666"))

#point estimates
points = alt.Chart(models, title = 'Generalized Linear Model Results for Selected Activity').mark_point(filled=True, color='black').transform_window(
    sort = [alt.SortField("ESTIMATE", order = "descending")], 
    est_rank = "rank(*)"   
).encode(
    alt.X('ESTIMATE:Q'),
    alt.Y('VARIABLE:N', sort = alt.EncodingSortField(field = "est_rank", order = "ascending"), axis = alt.Axis(title = None)),
    color = colorCondition,
    tooltip = alt.Tooltip(value = None)
)

#error bars
error_bars = alt.Chart(models).mark_errorbar().transform_window(
    sort = [alt.SortField("ESTIMATE", order = "descending")], 
    est_rank = "rank(*)"   
).encode(
    alt.X('upperCI:Q', scale = alt.Scale(zero = False), axis = alt.Axis(title = 'Parameter Estimates and 99.4% Confidence Intervals')),
    alt.X2('lowerCI:Q'),
    alt.Y('VARIABLE:N', sort = alt.EncodingSortField(field = "est_rank", order = "ascending"),axis=alt.Axis(title = None)),
    color = colorCondition,
    tooltip = alt.Tooltip(value = None)
)

forest_plot = (points + error_bars).add_selection(selectActivity2).transform_filter(selectActivity2)
#forest_plot

In [19]:
opacityCondition = alt.condition(alt.datum.SIGNIFICANT == 1, alt.value(.8), alt.value(0))

bubble_chart = alt.Chart(models, title = 'Significant Effect Estimates from Generalized Linear Models').mark_circle(
    opacity = 0.8, stroke = 'black', strokeWidth = .7
).transform_calculate(
    EST_ABS = abs(alt.datum.ESTIMATE)
).encode(y = alt.Y('VARIABLE:N', sort=alt.EncodingSortField(field='VARIABLE:N', order='ascending'), axis=alt.Axis(title = None)),
    x = alt.X('ACTIVITY:N', axis=alt.Axis(title = None)),
    size = alt.Size('EST_ABS:Q', legend = None, scale=alt.Scale(range=[20, 1500])),
    color = colorCondition,
    tooltip = alt.Tooltip(['INTERP:N'], title = 'Interpretation'),
    opacity = opacityCondition
).transform_filter(alt.datum.VARIABLE != 'Intercept').transform_filter(alt.datum.ACTIVITY != 'Civic Duties')

bubble_chart | forest_plot

### Small Multiple Bar Charts of Time Period Effect Sizes

In [20]:
top = alt.Chart(models).transform_calculate(
    PERCENT_CHANGE_SIGN = alt.datum.PERCENT_CHANGE * alt.datum.SIGN_PERCENT_CHANGE
).transform_filter(
    alt.FieldOneOfPredicate('VARIABLE', oneOf=['COVID Peak','Post-COVID Peak'])
).transform_filter(
    alt.FieldOneOfPredicate('ACTIVITY', oneOf=['Caring for Household', 'Caring for Non-Household', 'Education', 'Eating and Drinking',
                                               'Household Activities', 'Household Services', 'Personal Care', 'Phone Calls'])
).mark_bar().encode(
    x = alt.X('VARIABLE:N', axis = alt.Axis(title = None, labelAngle=30)),
    y = alt.Y('PERCENT_CHANGE_SIGN:Q', axis = alt.Axis(title = 'Percent Change from Pre-COVID Peak Months')),
    color = alt.Color('VARIABLE:N', scale = alt.Scale(domain = ['COVID Peak', 'Post-COVID Peak'], range = ['#4b6be5', '#7f3eb0']), legend = None),
    column = alt.Column('ACTIVITY:N', title = None)
    ).properties(width = 100)

bottom = alt.Chart(models).transform_calculate(
    PERCENT_CHANGE_SIGN = alt.datum.PERCENT_CHANGE * alt.datum.SIGN_PERCENT_CHANGE
).transform_filter(
    alt.FieldOneOfPredicate('VARIABLE', oneOf=['COVID Peak','Post-COVID Peak'])
).transform_filter(
    alt.FieldOneOfPredicate('ACTIVITY', oneOf=['Care Services', 'Consumer Purchasing', 'Religious/Spiritual Activities', 
                                               'Socializing and Leisure', 'Sports and Exercise', 'Traveling', 
                                               'Volunteering', 'Work'])
).mark_bar().encode(
    x = alt.X('VARIABLE:N', axis = alt.Axis(title = None, labelAngle=30)),
    y = alt.Y('PERCENT_CHANGE_SIGN:Q', axis = alt.Axis(title = 'Percent Change from Pre-COVID Peak Months')),
    color = alt.Color('VARIABLE:N', scale = alt.Scale(domain = ['COVID Peak', 'Post-COVID Peak'], range = ['#4b6be5', '#7f3eb0']), legend = None),
    column = alt.Column('ACTIVITY:N', title = None)
    ).properties(width = 100)
  
top & bottom

### Line Plot of Healthcare Workers (HCW) vs. Non HCW

In [21]:
#reading in data
occ = pd.read_csv('/Users/jennabedrava/Documents/SI 649/ATUS_project_data/avg_time_all_years_bymonth_occ.csv')

In [None]:
occ['MONTH_YEAR'] = pd.Categorical(occ['MONTH_YEAR'], 
                                    categories = ['Jan 2019', 'Feb 2019', 'Mar 2019', 'Apr 2019', 'May 2019', 'Jun 2019', 'Jul 2019', 'Aug 2019', 'Sep 2019', 'Oct 2019', 'Nov 2019', 'Dec 2019',
                                                  'Jan 2020', 'Feb 2020', 'Mar 2020', 'May 2020', 'Jun 2020', 'Jul 2020', 'Aug 2020', 'Sep 2020', 'Oct 2020', 'Nov 2020', 'Dec 2020',
                                                  'Jan 2021', 'Feb 2021', 'Mar 2021', 'Apr 2021', 'May 2021', 'Jun 2021', 'Jul 2021', 'Aug 2021', 'Sep 2021', 'Oct 2021', 'Nov 2021', 'Dec 2021'], 
                                    ordered = True)

#removing Civic Duties
occ = occ[occ.ACTIVITY != 'Civic Duties']

In [22]:
activities3 = list(occ['ACTIVITY'].unique())

selectActivity3 = alt.selection_single(
    fields = ['ACTIVITY'],
    init = {'ACTIVITY':activities3[0]},
    bind = alt.binding_select(options = activities3, name = 'Select activity: ')
)

linechart = alt.Chart(occ).mark_line(point= True).encode(
    x = alt.X('MONTH_YEAR:O', sort = ['Jan 2019', 'Feb 2019', 'Mar 2019', 'Apr 2019', 'May 2019', 'Jun 2019', 'Jul 2019', 'Aug 2019', 'Sep 2019', 'Oct 2019', 'Nov 2019', 'Dec 2019',
                                      'Jan 2020', 'Feb 2020', 'Mar 2020', 'May 2020', 'Jun 2020', 'Jul 2020', 'Aug 2020', 'Sep 2020', 'Oct 2020', 'Nov 2020', 'Dec 2020',
                                      'Jan 2021', 'Feb 2021', 'Mar 2021', 'Apr 2021', 'May 2021', 'Jun 2021', 'Jul 2021', 'Aug 2021', 'Sep 2021', 'Oct 2021', 'Nov 2021', 'Dec 2021'],
              title = None),
    y = alt.Y('mean:Q', title = 'Average Number of Minutes Spent'),
    color = alt.Color('OCC_GROUP:N', title = 'Occupation Group', scale = alt.Scale(domain = ['Healthcare Worker', 'Non-Healthcare Worker'], range = ['#ed68ce', '#e8bc56'])),
    tooltip = alt.Tooltip(['mean:Q'], format = '.2f', title = 'Average Number of Minutes')
).add_selection(selectActivity3).transform_filter(selectActivity3)


#we need a line between Feb 2020 and March 2020
covid_begins2 = alt.Chart(occ).mark_rule(xOffset = 0, strokeWidth = 2.5, strokeDash=[1,1]).encode(
    x = alt.X('MONTH_YEAR:O', sort = ['Jan 2019', 'Feb 2019', 'Mar 2019', 'Apr 2019', 'May 2019', 'Jun 2019', 'Jul 2019', 'Aug 2019', 'Sep 2019', 'Oct 2019', 'Nov 2019', 'Dec 2019',
                                      'Jan 2020', 'Feb 2020', 'Mar 2020', 'May 2020', 'Jun 2020', 'Jul 2020', 'Aug 2020', 'Sep 2020', 'Oct 2020', 'Nov 2020', 'Dec 2020',
                                      'Jan 2021', 'Feb 2021', 'Mar 2021', 'Apr 2021', 'May 2021', 'Jun 2021', 'Jul 2021', 'Aug 2021', 'Sep 2021', 'Oct 2021', 'Nov 2021', 'Dec 2021']), 
    color = alt.value("darkgrey"),
    opacity = alt.condition(alt.datum.MONTH_YEAR == 'Mar 2020', alt.value(1), alt.value(0))
    )

covid_ends2 = alt.Chart(occ).mark_rule(xOffset = 0, strokeWidth = 2.5, strokeDash=[1,1]).encode(
    x = alt.X('MONTH_YEAR:O', sort = ['Jan 2019', 'Feb 2019', 'Mar 2019', 'Apr 2019', 'May 2019', 'Jun 2019', 'Jul 2019', 'Aug 2019', 'Sep 2019', 'Oct 2019', 'Nov 2019', 'Dec 2019',
                                      'Jan 2020', 'Feb 2020', 'Mar 2020', 'May 2020', 'Jun 2020', 'Jul 2020', 'Aug 2020', 'Sep 2020', 'Oct 2020', 'Nov 2020', 'Dec 2020',
                                      'Jan 2021', 'Feb 2021', 'Mar 2021', 'Apr 2021', 'May 2021', 'Jun 2021', 'Jul 2021', 'Aug 2021', 'Sep 2021', 'Oct 2021', 'Nov 2021', 'Dec 2021']), 
    color = alt.value("darkgrey"),
    opacity = alt.condition(alt.datum.MONTH_YEAR == 'Feb 2021', alt.value(1), alt.value(0))
    )

alt.layer(linechart,covid_begins2, covid_ends2).properties(title = 'Comparing Trends in Time Use Among Healthcare Workers and Non-Healthcare Workers')

### Calendar Plot of Time Spent on Each Activity

In [25]:
#reading in data - THIS DATA ALREADY SUBTRACTED 1 FROM EACH DATE  
calplot_data = pd.read_csv('/Users/jennabedrava/Documents/SI 649/ATUS_project_data/avg_time_all_years_byday.csv')

In [28]:
#disabling Altair's max number of rows specification
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [33]:
#converting DATE to string
calplot_data['DATE'] = calplot_data['DATE'].astype(str)

#converting DATE to datetime type
calplot_data['DATE'] = pd.to_datetime(calplot_data['DATE'], format = '%Y-%m-%d')

#converting year to integer (not float)
calplot_data['YEAR'] = calplot_data['DATE'].dt.year

#altair doesn't like my week variable from R, so making new one
calplot_data['week'] = calplot_data['DATE'].dt.strftime('%W')

#adding time_period variable
calplot_data['time_period'] = 'Post-COVID Peak'
calplot_data.loc[calplot_data['DATE'] <= '2021-01-01', 'time_period'] = 'COVID Peak'
calplot_data.loc[calplot_data['DATE'] <= '2020-02-01', 'time_period'] = 'Pre-COVID Peak'


#calendar heat map in Altair, so we can add interaction and combine with other plots
activities = list(calplot_data['ACTIVITY'].unique())

selectActivity = alt.selection_single(
    fields = ['ACTIVITY'],
    init = {'ACTIVITY':activities[0]},
    bind = alt.binding_select(options = activities, name = 'Select activity: ')
)

#2019
cal2019 = alt.Chart(calplot_data[calplot_data.YEAR == 2019]).mark_rect().transform_filter(selectActivity).encode(
    x = alt.X(field='week', type='ordinal', title=None),
    y = alt.Y(field='day_of_week', type='ordinal', title=None,
              sort=alt.Sort(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])),
    color = alt.Color(field='mean', type='quantitative', scale=alt.Scale(scheme='blues'), bin=alt.Bin(maxbins=5),
                     title = 'Average Minutes Spent'),
    column = alt.Column(field='DATE', type='temporal', timeUnit='month', title=None),
    tooltip = alt.Tooltip(['DATE:T', 'mean:Q'], format='.2f')
).properties(width=900/12).resolve_scale(x = 'independent')

#2020
cal2020 = alt.Chart(calplot_data[calplot_data.YEAR == 2020]).mark_rect().transform_filter(selectActivity).encode(
    x = alt.X(field='week', type='ordinal', title=None),
    y = alt.Y(field='day_of_week', type='ordinal', title=None,
              sort=alt.Sort(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])),
    color = alt.Color(field='mean', type='quantitative', scale=alt.Scale(scheme='blues'), bin=alt.Bin(maxbins=5),
                     title = 'Average Minutes Spent'),
    column = alt.Column(field='DATE', type='temporal', timeUnit='month', title=None),
    tooltip = alt.Tooltip(['DATE:T', 'mean:Q'], format='.2f')
).properties(width=900/12).resolve_scale(x = 'independent')


#2021
cal2021 = alt.Chart(calplot_data[calplot_data.YEAR == 2021]).mark_rect().transform_filter(selectActivity).encode(
    x = alt.X(field='week', type='ordinal', title=None),
    y = alt.Y(field='day_of_week', type='ordinal', title=None,
              sort=alt.Sort(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'])),
    color = alt.Color(field='mean', type='quantitative', scale=alt.Scale(scheme='blues'), bin=alt.Bin(maxbins=5),
                     title = 'Average Minutes Spent'),
    column = alt.Column(field='DATE', type='temporal', timeUnit='month', title=None),
    tooltip = alt.Tooltip(['DATE:T', 'mean:Q'], format='.2f')
).properties(width=900/12).resolve_scale(x = 'independent')


(cal2019 & cal2020 & cal2021).add_selection(selectActivity).resolve_scale(x = 'shared')