In [1]:
# pip install -U altair_viewer

In [2]:
import pandas as pd
import numpy as np
import altair as alt

pd.set_option('display.max_colwidth', None)

In [3]:
df_leaders = pd.read_csv('data/leader_responses.csv')
df_ic = pd.read_csv('data/ic_response.csv')

def data_clean(df):

    # Drop the first row if it contains unwanted data
    df = df.drop(labels = [0], axis = 0)
    df = df.drop(columns = ['Respondent ID', 'Collector ID', 'Start Date', 'End Date', 'IP Address',
                            'Email Address', 'First Name', 'Last Name', 'Custom Data 1', 'Do you have any additional feedback?'], axis = 1)

    # Fill missing values and n/a responses with 2.5
    df = df.fillna(2.5)
    df.replace(6, 2.5, inplace=True) 

    return df

df_leaders = data_clean(df_leaders)
df_ic = data_clean(df_ic)

In [4]:

def conf_interval_data(df):

    df = df.T

    df = df.astype(float)

    df['mean'] = df.mean(axis=1)
    df['std'] = df.std(axis=1)
    df['upper'] = df['mean'] + df['std'] * 1
    df['lower'] = df['mean'] - df['std'] * 1

    df = df[['mean', 'std', 'upper', 'lower']].reset_index(names = 'Question')

    df = df.sort_values(by='mean', ascending=False)

    return df

df_leaders_ci = conf_interval_data(df_leaders)
df_ic_ci = conf_interval_data(df_ic)

In [5]:

from altair_saver import save

df = df_leaders_ci.copy()

question_sort_order = df['Question'].tolist()

mean_chart = alt.Chart(df).mark_circle(size = 75).encode(
    y = alt.Y('Question', sort=question_sort_order, axis=alt.Axis(labelLimit=600), title = ''),
    x = alt.X('mean', scale=alt.Scale(domain=[0, 6]), title = 'Response Range'),
    color = alt.value('#00274C'),
    tooltip = ['Question', 'mean', 'std', 'upper', 'lower']
).properties(
    title = 'Mean Responses of Leaders',
    width = 400, height = 800
)

ci_chart = alt.Chart(df).mark_bar(cornerRadius=10, height=10, opacity= .3).encode(
    y = alt.Y('Question', sort=question_sort_order, axis=alt.Axis(labelLimit=600), title = ''),
    x = alt.X('lower', scale=alt.Scale(domain=[0, 6]), title= 'Response Range'),
    x2 = alt.X2('upper'),
    color = alt.value('#FFCB05'),
    tooltip = ['Question', 'mean', 'std', 'upper', 'lower']
).properties(
    title = 'Average Response and Range of Leaders',
    width = 400, height = 800
)

ci_chart + mean_chart

In [6]:
df = df_ic_ci.copy()

question_sort_order = df['Question'].tolist()

mean_chart = alt.Chart(df).mark_circle(size = 75).encode(
    y = alt.Y('Question', sort=question_sort_order, axis=alt.Axis(labelLimit=600), title = ''),
    x = alt.X('mean', scale=alt.Scale(domain=[0, 6])),
    color = alt.value('#00274C'),
    tooltip = ['Question', 'mean', 'std', 'upper', 'lower']
).properties(
    title = 'Mean Responses of Leaders',
    width = 400, height = 800
)

ci_chart = alt.Chart(df).mark_bar(cornerRadius=10, height=10, opacity= .3).encode(
    y = alt.Y('Question', sort=question_sort_order, axis=alt.Axis(labelLimit=600), title = ''),
    x = alt.X('lower', scale=alt.Scale(domain=[0, 6]), title= 'Response Range'),
    x2 = alt.X2('upper'),
    color = alt.value('#FFCB05'),
    tooltip = ['Question', 'mean', 'std', 'upper', 'lower']
).properties(
    title = 'Average Response and Range by Team Members',
    width = 400, height = 800
)

ci_chart + mean_chart

In [7]:
df_ic_ci.head()

Unnamed: 0,Question,mean,std,upper,lower
38,Uses time constructively and efficiently,4.708333,1.290322,5.998655,3.418011
16,Provides clear and consistent direction to me when I ask for guidance,4.5,1.369306,5.869306,3.130694
4,Protects confidentiality,4.479167,1.11784,5.597007,3.361327
14,Involves me in decision-making when appropriate,4.427083,1.425181,5.852265,3.001902
32,Involves others in planning actions,4.375,1.316957,5.691957,3.058043


In [8]:
df_leaders_ci.head()

Unnamed: 0,Question,mean,std,upper,lower
0,Shows genuine concern for team members,3.9,1.3,5.2,2.6
4,Protects confidentiality,3.8,0.87178,4.67178,2.92822
17,"Encourages and supports my involvement in training and development activities, company activities and events",3.8,0.87178,4.67178,2.92822
3,"Treats people fairly, without showing favoritism",3.7,1.004988,4.704988,2.695012
40,Has excellent relationships with MWSE team members regardless of position in the organization,3.7,1.004988,4.704988,2.695012


In [9]:
df_combined = pd.merge(df_leaders_ci, df_ic_ci, on='Question', suffixes=('_leaders', '_ic'))

df_combined['mean_diff'] = df_combined['mean_ic'] - df_combined['mean_leaders']
df_combined.head()

Unnamed: 0,Question,mean_leaders,std_leaders,upper_leaders,lower_leaders,mean_ic,std_ic,upper_ic,lower_ic,mean_diff
0,Shows genuine concern for team members,3.9,1.3,5.2,2.6,4.125,0.949232,5.074232,3.175768,0.225
1,Protects confidentiality,3.8,0.87178,4.67178,2.92822,4.479167,1.11784,5.597007,3.361327,0.679167
2,"Encourages and supports my involvement in training and development activities, company activities and events",3.8,0.87178,4.67178,2.92822,4.0,0.978945,4.978945,3.021055,0.2
3,"Treats people fairly, without showing favoritism",3.7,1.004988,4.704988,2.695012,3.9375,1.048933,4.986433,2.888567,0.2375
4,Has excellent relationships with MWSE team members regardless of position in the organization,3.7,1.004988,4.704988,2.695012,4.083333,1.15169,5.235023,2.931644,0.383333


In [10]:
df = df_combined.copy()
df = df.sort_values(by='mean_diff', ascending=False)

question_sort_order = df['Question'].tolist()

ci_chart = alt.Chart(df).mark_bar(cornerRadius=10, height=10, opacity= .8).encode(
    y = alt.Y('Question', sort=question_sort_order, axis=alt.Axis(labelLimit=600), title = ''),
    x = alt.X('mean_ic', scale=alt.Scale(domain=[0, 6]), title= 'Response Range'),
    x2 = alt.X2('mean_leaders'),
    color = alt.value('#D3D3D3'),
).properties(
    title = 'Average Questions by Team Members',
    width = 400, height = 800
)

ic_chart = alt.Chart(df).mark_circle(size = 100).encode(
    y = alt.Y('Question', sort=question_sort_order, axis=alt.Axis(labelLimit=600), title = ''),
    x = alt.X('mean_ic', scale=alt.Scale(domain=[0, 6]), title= 'Response Range'),
    color = alt.value('#FFCB05'),
).properties(
    title = 'Average Questions by Team Members',
    width = 400, height = 800
)

leader_chart = alt.Chart(df).mark_circle(size = 100).encode(
    y = alt.Y('Question:N', sort=question_sort_order, axis=alt.Axis(labelLimit=600), title = ''),
    x = alt.X('mean_leaders', scale=alt.Scale(domain=[0, 6]), title= 'Response Range'),
    color = alt.value('#00274C'),
)

(ci_chart + ic_chart + leader_chart).properties(
    title={
      "text": ["Average Difference between Leaders and Team Members by Question"],
      "subtitle": ["Leaders in blue, Team Members in yellow"]
    },
    width = 400, height = 800
)