## Import packages and set working directory

In [1]:
import pandas as pd
import numpy as np
import scipy.stats as stats

## Calculate differences by question type for all hires

In [3]:
# Create dictionary of dataframes split by Survey+Question
dfs = {i:data for i, data in df.groupby('Survey+Question')}

# Create empty list to hold aggregated data and empty dictionary to hold pre- and post-covid values by question
a1 = []
all_dict = {}

# Loop over dictionary of dataframes by Survey+Question
for key, data in dfs.items():
    # Split dataframe into pre- and post-covid groups
    pre_cnt = data[data['Covid Grp'] == 'Pre-Covid']['Value']
    pst_cnt = data[data['Covid Grp'] == 'Post-Covid']['Value']
    # Add pre- and post-covid values to empty dictionary for later
    all_dict[key] = {'Pre-Covid': pre_cnt, 'Post-Covid': pst_cnt}
    # Append metrics to empty list for each question
    a1.append({'Survey+Question': key,
               'Pre-Covid Respondents': len(pre_cnt),
               'Post-Covid Respondents': len(pst_cnt),
               'Pre-Covid Score': data[data['Covid Grp'] == 'Pre-Covid']['Value'].mean(),
               'Post-Covid Score': data[data['Covid Grp'] == 'Post-Covid']['Value'].mean(),
               'Difference (Post-Pre)': np.subtract(data[data['Covid Grp'] == 'Post-Covid']['Value'].mean(), data[data['Covid Grp'] == 'Pre-Covid']['Value'].mean())})

# Create empty list to hold p-values by Survey+Question
a2 = []

# Loop over dictionary created above with pre- and post-covid values by Survey+Question
for key, data in all_dict.items():
    # Perform a one-tailed Mann-Whitney U test on the pre- and post-covid data
    stat, pvalue = stats.mannwhitneyu(data['Pre-Covid'], data['Post-Covid'], alternative='greater')
    # Set the alpha level
    alpha = 0.05
    # Print the Surve+Question and the conclusion given by the p-value
    if pvalue > alpha:
        print(key, '\nSAME DISTRIBUTION (FAIL TO REJECT H0)\n')
    else:
        print(key, '\nDIFFERENT DISTRIBUTION (REJECT H0)\n')
    # Append the Survey+Question and p-value to the empty list created above
    a2.append({'Survey+Question': key,
               'PValue': pvalue})
    
# Convert dicts to dataframes and merge together
a1 = pd.DataFrame(a1)
a2 = pd.DataFrame(a2)
a = a1.merge(a2, on='Survey+Question')

# Sort the columns of the final dataframe
a = a[['Survey+Question', 'Pre-Covid Respondents', 'Post-Covid Respondents', 'Pre-Covid Score', 'Post-Covid Score', 'Difference (Post-Pre)', 'PValue']]

14 Day EE: I am satisfied with the company's overall hiring process (application, interview, and on-boarding). 
SAME DISTRIBUTION (FAIL TO REJECT H0)

14 Day EE: I feel like joining LG&E and KU Energy was the right decision. 
SAME DISTRIBUTION (FAIL TO REJECT H0)

14 Day EE: I have a clear understanding of the company's benefits. 
SAME DISTRIBUTION (FAIL TO REJECT H0)

14 Day EE: I received a welcome call from the hiring manager. 
SAME DISTRIBUTION (FAIL TO REJECT H0)

14 Day EE: I was given a clear understanding of the position and job expectations. 
SAME DISTRIBUTION (FAIL TO REJECT H0)

14 Day EE: My co-workers have made me feel welcomed at LG&E and KU Energy. 
DIFFERENT DISTRIBUTION (REJECT H0)

14 Day EE: My workspace was ready when I arrived. 
SAME DISTRIBUTION (FAIL TO REJECT H0)

14 Day EE: The Company's representative kept me well-informed at different points in the process. 
SAME DISTRIBUTION (FAIL TO REJECT H0)

14 Day EE: The Company's representative was available to answer