# D&I report - HSBC Contact Center

# Hypothesis 1: Teams with higher gender diversity have higher KPI achievement rates.

Hypothesis: Teams with balanced gender diversity (40-60%) have higher KPI achievement rates compared to teams that are not gender-balanced.

In [None]:
from scipy import stats
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate the proportion of females in each team
df['female_ratio'] = df.groupby('team')['gender'].transform(lambda x: sum(x == 'Female') / len(x))

# Define gender-balanced teams (40-60% females) and non-balanced teams
diverse_teams = df[(df['female_ratio'] >= 0.4) & (df['female_ratio'] <= 0.6)]
non_diverse_teams = df[(df['female_ratio'] < 0.4) | (df['female_ratio'] > 0.6)]

# Perform t-test to compare KPI achievement between diverse and non-diverse teams
t_stat, p_val = stats.ttest_ind(diverse_teams['KPI_achievement'], non_diverse_teams['KPI_achievement'])
print(f"Hypothesis 1 - T-statistic: {t_stat}, P-value: {p_val}")


In [None]:
# Visualization: Boxplot for Gender Diversity vs KPI Achievement
sns.boxplot(x='female_ratio', y='KPI_achievement', data=df)
plt.axvline(0.4, color='red', linestyle='--', label='40% Females')
plt.axvline(0.6, color='blue', linestyle='--', label='60% Females')
plt.title('KPI Achievement by Gender Diversity')
plt.xlabel('Gender Diversity (Proportion of Females)')
plt.ylabel('KPI Achievement')
plt.legend()
plt.show()


# Hypothesis 2: Younger agents (age < 30) tend to achieve higher KPIs.

In [None]:
# Create age-based groups
young_agents = df[df['age'] < 30]
older_agents = df[df['age'] >= 30]

# Perform t-test
t_stat, p_val = stats.ttest_ind(young_agents['KPI_achievement'], older_agents['KPI_achievement'])
print(f"Hypothesis 2 - T-statistic: {t_stat}, P-value: {p_val}")


In [None]:
# Visualization: Barplot for KPI Achievement by Age Group
df['age_group'] = pd.cut(df['age'], bins=[0, 30, 40, df['age'].max()], labels=['<30', '30-40', '>40'])
sns.barplot(x='age_group', y='KPI_achievement', data=df, ci=None)
plt.title('KPI Achievement by Age Group')
plt.xlabel('Age Group')
plt.ylabel('KPI Achievement')
plt.show()



# Hypothesis 3: Teams led by female team leads have higher KPI performance than teams led by male team leads.

In [None]:
# Split based on team lead's gender (assuming we have gender data for team leads)
female_lead_teams = df[df['teamlead_name'].str.contains('Female')]
male_lead_teams = df[df['teamlead_name'].str.contains('Male')]

# Perform t-test
t_stat, p_val = stats.ttest_ind(female_lead_teams['KPI_achievement'], male_lead_teams['KPI_achievement'])
print(f"Hypothesis 3 - T-statistic: {t_stat}, P-value: {p_val}")


In [None]:
# Visualization: Boxplot for Team Lead Gender vs KPI Achievement
df['lead_gender'] = df['teamlead_name'].apply(lambda x: 'Female' if 'Female' in x else 'Male')
sns.boxplot(x='lead_gender', y='KPI_achievement', data=df)
plt.title('KPI Achievement by Team Lead Gender')
plt.xlabel('Team Lead Gender')
plt.ylabel('KPI Achievement')
plt.show()


# Hypothesis 4: Teams with smaller team sizes perform better in terms of KPI achievement.

In [None]:
Hypothesis: Smaller teams (less than 10 members) perform better in terms of KPI achievement.

In [None]:
# Create bins for team size (Small, Medium, Large)
df['team_size_bin'] = pd.cut(df['team_size'], bins=[0, 10, 20, df['team_size'].max()], labels=['Small', 'Medium', 'Large'])

# Perform ANOVA for team size bins
anova_results = stats.f_oneway(
    df[df['team_size_bin'] == 'Small']['KPI_achievement'],
    df[df['team_size_bin'] == 'Medium']['KPI_achievement'],
    df[df['team_size_bin'] == 'Large']['KPI_achievement']
)
print(f"Hypothesis 4 - F-statistic: {anova_results.statistic}, P-value: {anova_results.pvalue}")


In [None]:
# Visualization: Boxplot for Team Size vs KPI Achievement
sns.boxplot(x='team_size_bin', y='KPI_achievement', data=df)
plt.title('KPI Achievement by Team Size')
plt.xlabel('Team Size Category')
plt.ylabel('KPI Achievement')
plt.show()


# Hypothesis 5: No significant difference in KPI achievement between male and female agents.


In [None]:
# Split based on agent's gender
female_agents = df[df['gender'] == 'Female']
male_agents = df[df['gender'] == 'Male']

# Perform t-test
t_stat, p_val = stats.ttest_ind(female_agents['KPI_achievement'], male_agents['KPI_achievement'])
print(f"Hypothesis 5 - T-statistic: {t_stat}, P-value: {p_val}")


In [None]:
# Visualization: Boxplot for Agent Gender vs KPI Achievement
sns.boxplot(x='gender', y='KPI_achievement', data=df)
plt.title('KPI Achievement by Agent Gender')
plt.xlabel('Agent Gender')
plt.ylabel('KPI Achievement')
plt.show()


# Hypothesis 6: Teams with diverse tenure perform better.


In [None]:
# Define tenure groups (Junior: <2 years, Senior: >= 2 years)
df['tenure_group'] = pd.cut(df['tenure'], bins=[0, 2, df['tenure'].max()], labels=['Junior', 'Senior'])

# Calculate tenure diversity (mix of junior and senior agents within each team)
df['tenure_diversity'] = df.groupby('team')['tenure_group'].transform(lambda x: len(set(x)))

# Perform t-test
diverse_tenure_teams = df[df['tenure_diversity'] > 1]
non_diverse_tenure_teams = df[df['tenure_diversity'] == 1]

t_stat, p_val = stats.ttest_ind(diverse_tenure_teams['KPI_achievement'], non_diverse_tenure_teams['KPI_achievement'])
print(f"Hypothesis 6 - T-statistic: {t_stat}, P-value: {p_val}")


In [None]:
# Visualization: Boxplot for Tenure Diversity vs KPI Achievement
sns.boxplot(x='tenure_group', y='KPI_achievement', data=df)
plt.title('KPI Achievement by Tenure Group')
plt.xlabel('Tenure Group')
plt.ylabel('KPI Achievement')
plt.show()


# Hypothesis 7: Agents with higher tenure (years of experience) achieve better KPIs.


In [None]:
# Split agents based on their tenure (e.g., Junior: < 2 years, Senior: >= 2 years)
junior_agents = df[df['tenure'] < 2]
senior_agents = df[df['tenure'] >= 2]

# Perform t-test for tenure-based performance differences
t_stat, p_val = stats.ttest_ind(junior_agents['KPI_achievement'], senior_agents['KPI_achievement'])
print(f"Hypothesis 7 - T-statistic: {t_stat}, P-value: {p_val}")


In [None]:
# Visualization: Barplot for KPI Achievement by Tenure Group
df['tenure_group'] = pd.cut(df['tenure'], bins=[0, 2, 5, df['tenure'].max()], labels=['<2 Years', '2-5 Years', '>5 Years'])
sns.barplot(x='tenure_group', y='KPI_achievement', data=df, ci=None)
plt.title('KPI Achievement by Tenure Group')
plt.xlabel('Tenure Group')
plt.ylabel('KPI Achievement')
plt.show()


# Hypothesis 8: Teams with more diverse age groups achieve higher KPIs.


In [None]:
# Calculate age diversity within each team
df['age_diversity'] = df.groupby('team')['age'].transform(lambda x: x.std())  # Standard deviation of ages as a measure of diversity

# Split into high and low age diversity groups (threshold can be median)
median_age_diversity = df['age_diversity'].median()
high_age_diversity_teams = df[df['age_diversity'] > median_age_diversity]
low_age_diversity_teams = df[df['age_diversity'] <= median_age_diversity]

# Perform t-test
t_stat, p_val = stats.ttest_ind(high_age_diversity_teams['KPI_achievement'], low_age_diversity_teams['KPI_achievement'])
print(f"Hypothesis 8 - T-statistic: {t_stat}, P-value: {p_val}")


In [None]:
# Visualization: Boxplot for Age Diversity vs KPI Achievement
sns.boxplot(x=pd.cut(df['age_diversity'], bins=3), y='KPI_achievement', data=df)
plt.title('KPI Achievement by Age Diversity')
plt.xlabel('Age Diversity (Group Binned)')
plt.ylabel('KPI Achievement')
plt.show()


# Hypothesis 9: Teams with more female agents perform as well as teams with more male agents.

In [None]:
# Calculate the proportion of females in each team
df['female_ratio'] = df.groupby('team')['gender'].transform(lambda x: sum(x == 'Female') / len(x))

# Define teams with more females and more males
more_female_teams = df[df['female_ratio'] > 0.5]
more_male_teams = df[df['female_ratio'] <= 0.5]

# Perform t-test
t_stat, p_val = stats.ttest_ind(more_female_teams['KPI_achievement'], more_male_teams['KPI_achievement'])
print(f"Hypothesis 9 - T-statistic: {t_stat}, P-value: {p_val}")


In [None]:
# Visualization: Boxplot for KPI Achievement by Female/Male Majority Teams
df['gender_majority'] = df['female_ratio'].apply(lambda x: 'More Female' if x > 0.5 else 'More Male')
sns.boxplot(x='gender_majority', y='KPI_achievement', data=df)
plt.title('KPI Achievement by Gender Majority in Teams')
plt.xlabel('Team Gender Majority')
plt.ylabel('KPI Achievement')
plt.show()


# Hypothesis 10: Teams with larger teams have lower KPI achievements due to challenges in managing larger groups.


In [None]:
# Split teams based on size
small_teams = df[df['team_size'] <= 10]
medium_teams = df[(df['team_size'] > 10) & (df['team_size'] <= 20)]
large_teams = df[df['team_size'] > 20]

# Perform ANOVA (comparing small, medium, and large teams)
anova_results = stats.f_oneway(
    small_teams['KPI_achievement'],
    medium_teams['KPI_achievement'],
    large_teams['KPI_achievement']
)
print(f"Hypothesis 10 - F-statistic: {anova_results.statistic}, P-value: {anova_results.pvalue}")


In [None]:
# Visualization: Boxplot for Team Size Categories vs KPI Achievement
sns.boxplot(x='team_size_bin', y='KPI_achievement', data=df)
plt.title('KPI Achievement by Team Size')
plt.xlabel('Team Size Category')
plt.ylabel('KPI Achievement')
plt.show()


# Hypothesis 11: Agents working in diverse teams (in terms of gender, age, or other factors) perform better than those in homogenous teams.


In [None]:
# Create a diversity score: combining gender and age diversity into one metric
df['diversity_score'] = df['gender_ratio'] * df['age_diversity']  # Combine gender and age diversity

# Split into high and low diversity groups (median as the threshold)
median_diversity_score = df['diversity_score'].median()
high_diversity_teams = df[df['diversity_score'] > median_diversity_score]
low_diversity_teams = df[df['diversity_score'] <= median_diversity_score]

# Perform t-test to compare high vs low diversity teams
t_stat, p_val = stats.ttest_ind(high_diversity_teams['KPI_achievement'], low_diversity_teams['KPI_achievement'])
print(f"Hypothesis 11 - T-statistic: {t_stat}, P-value: {p_val}")


In [None]:
# Visualization: Boxplot of Diversity Score vs KPI Achievement
sns.boxplot(x=pd.cut(df['diversity_score'], bins=3), y='KPI_achievement', data=df)
plt.title('KPI Achievement by Diversity Score (Gender + Age)')
plt.xlabel('Diversity Score (Group Binned)')
plt.ylabel('KPI Achievement')
plt.show()


# Hypothesis 12: Agents with more flexible work schedules (e.g., working from home) achieve better KPI scores.

In [None]:
# Categorize agents based on work flexibility: Flexible vs Non-Flexible
df['work_flexibility'] = df['work_schedule'].apply(lambda x: 'Flexible' if x == 'Remote' or 'Flexible' in x else 'Non-Flexible')

# Perform t-test for flexibility-based KPI achievement differences
t_stat, p_val = stats.ttest_ind(df[df['work_flexibility'] == 'Flexible']['KPI_achievement'], 
                                df[df['work_flexibility'] == 'Non-Flexible']['KPI_achievement'])
print(f"Hypothesis 12 - T-statistic: {t_stat}, P-value: {p_val}")


In [None]:
# Visualization: Boxplot for KPI Achievement by Work Schedule Flexibility
sns.boxplot(x='work_flexibility', y='KPI_achievement', data=df)
plt.title('KPI Achievement by Work Schedule Flexibility')
plt.xlabel('Work Flexibility (Flexible vs Non-Flexible)')
plt.ylabel('KPI Achievement')
plt.show()


# Hypothesis 13: Agents with more training hours perform better on KPIs.

In [None]:
# Split agents based on their training hours (e.g., above and below median training hours)
median_training_hours = df['training_hours'].median()
high_training_agents = df[df['training_hours'] > median_training_hours]
low_training_agents = df[df['training_hours'] <= median_training_hours]

# Perform t-test to compare high vs low training hours on KPI achievement
t_stat, p_val = stats.ttest_ind(high_training_agents['KPI_achievement'], low_training_agents['KPI_achievement'])
print(f"Hypothesis 13 - T-statistic: {t_stat}, P-value: {p_val}")


In [None]:
# Visualization: Barplot for KPI Achievement by Training Hours Group
df['training_group'] = pd.cut(df['training_hours'], bins=[0, median_training_hours, df['training_hours'].max()], 
                              labels=['Low Training', 'High Training'])
sns.barplot(x='training_group', y='KPI_achievement', data=df, ci=None)
plt.title('KPI Achievement by Training Hours Group')
plt.xlabel('Training Group')
plt.ylabel('KPI Achievement')
plt.show()


# Hypothesis 14: Teams with more diverse skill sets perform better.

In [None]:
# Calculate skill diversity based on KPI types achieved by agents within each team
df['skill_diversity'] = df.groupby('team')['KPI_type'].transform(lambda x: x.nunique())  # Number of unique KPI types as diversity metric

# Split into high and low skill diversity groups (median threshold)
median_skill_diversity = df['skill_diversity'].median()
high_skill_diversity_teams = df[df['skill_diversity'] > median_skill_diversity]
low_skill_diversity_teams = df[df['skill_diversity'] <= median_skill_diversity]

# Perform t-test to compare skill diversity and KPI achievement
t_stat, p_val = stats.ttest_ind(high_skill_diversity_teams['KPI_achievement'], low_skill_diversity_teams['KPI_achievement'])
print(f"Hypothesis 14 - T-statistic: {t_stat}, P-value: {p_val}")


In [None]:
# Visualization: Scatterplot of Skill Diversity vs KPI Achievement
sns.scatterplot(x='skill_diversity', y='KPI_achievement', data=df)
plt.title('KPI Achievement by Skill Diversity')
plt.xlabel('Skill Diversity (Unique KPI Types)')
plt.ylabel('KPI Achievement')
plt.show()


# Hypothesis 15: Agents from underrepresented groups (e.g., minorities) perform as well as agents from majority groups.

In [None]:
# Create a minority status column (e.g., based on gender, ethnicity, etc.)
df['minority_status'] = df['ethnicity'].apply(lambda x: 'Minority' if x in ['Group1', 'Group2'] else 'Majority')

# Perform t-test for minority vs majority group KPI achievement
t_stat, p_val = stats.ttest_ind(df[df['minority_status'] == 'Minority']['KPI_achievement'], 
                                df[df['minority_status'] == 'Majority']['KPI_achievement'])
print(f"Hypothesis 15 - T-statistic: {t_stat}, P-value: {p_val}")


In [None]:
# Visualization: Boxplot for KPI Achievement by Minority Status
sns.boxplot(x='minority_status', y='KPI_achievement', data=df)
plt.title('KPI Achievement by Minority vs Majority Group')
plt.xlabel('Minority Status')
plt.ylabel('KPI Achievement')
plt.show()


# D&I KPIs

In [None]:
1. Representation and Demographics KPIs
    These KPIs assess the composition of the workforce across different diversity dimensions such as gender, ethnicity, age, and more.

    a) Gender Diversity Ratio: The ratio of male to female employees or across other gender identities at various levels (entry-level, mid-management, senior leadership).
            Formula: (Number of women / Total workforce) x 100
    b) Minority Representation: The percentage of employees from underrepresented groups (ethnic minorities, people with disabilities, etc.) in the overall workforce and at different levels.
            Formula: (Number of minority group members / Total workforce) x 100
    c) Age Diversity Index: Measures the distribution of employees across different age groups and how balanced the workforce is in terms of generational diversity.
Workforce Composition by Job Level: Representation of diverse groups (gender, race, etc.) across job levels—entry-level, middle management, and senior leadership.
            Formula: (Number of diverse employees at a level / Total employees at that level) x 100

            
2. Recruitment, Promotion, and Retention KPIs
    These KPIs evaluate how well an organization is attracting, advancing, and retaining a diverse workforce.

    a) Diverse Hiring Rate: The percentage of new hires who belong to underrepresented groups (e.g., women, ethnic minorities, etc.).
            Formula: (Number of diverse hires / Total number of hires) x 100
    b) Promotion Rate for Diverse Groups: The rate at which employees from underrepresented groups are promoted compared to the general workforce.
            Formula: (Promotions of diverse employees / Total promotions) x 100
    c) Retention Rate by Demographics: The percentage of employees from diverse groups retained year-over-year, compared to the general employee population.
            Formula: (Number of diverse employees retained / Total diverse employees at start of the year) x 100
    d) Turnover Rate by Demographics: The rate at which diverse employees leave the company compared to others.
            Formula: (Number of diverse employees who left / Total diverse employees) x 100

                
3. Pay Equity KPIs
    These KPIs assess pay fairness across different groups to ensure equal pay for equal work.

    a) Gender Pay Gap: The difference between the average salaries of male and female employees, expressed as a percentage of male earnings.
            Formula: ((Male average salary - Female average salary) / Male average salary) x 100
    b) Ethnic Pay Gap: The difference in pay between majority ethnic groups and minority ethnic groups.
    c) Pay Distribution Across different teams for same Job Levels: The comparison of pay levels between diverse employees and the rest of the workforce across different job levels.

4. Inclusive Leadership KPIs
    These KPIs evaluate how leaders foster inclusivity in their teams and the organization.

    a) Manager Inclusivity Score: An employee-reported measure of how inclusive managers are, based on factors like fairness, bias-free decision-making, and support for diverse team members.
            Formula: Derived from employee surveys (average score out of 10, for example)
    b) Diverse Leadership Representation: The percentage of leadership positions (e.g., senior managers, directors, C-suite) held by individuals from diverse groups.
    c) Inclusive Training Participation: The percentage of managers and employees who have completed diversity, equity, and inclusion training.
            Formula: (Number of employees trained in DEI / Total workforce) x 100

5. Employee Experience and Sentiment KPIs
    These KPIs measure how employees from diverse backgrounds experience the workplace in terms of inclusion, belonging, and engagement.

    a) Inclusion Score: A measure of how included employees feel, based on survey responses on topics like voice, belonging, and fair treatment.
            Formula: Aggregated score from employee surveys
    b) Belonging Index: The percentage of employees who report feeling a sense of belonging within the organization.
            Formula: (Employees reporting a sense of belonging / Total survey respondents) x 100
    c) Employee Net Promoter Score (eNPS) by Demographics: The willingness of employees from different demographic groups to recommend the organization as a place to work.
            Formula: (Percentage of Promoters - Percentage of Detractors)
    d) Employee Engagement by Demographics: Measures how engaged diverse employees are compared to the broader employee population.
            Formula: (Engagement score of diverse employees / Total engagement score) x 100

6. Learning and Development KPIs
These KPIs assess whether diverse groups are given equal opportunities for growth and development.

Training Participation by Demographics: The percentage of diverse employees who receive training compared to the general workforce.
Formula: (Number of diverse employees trained / Total employees trained) x 100
Leadership Development Program Participation: The percentage of underrepresented groups enrolled in leadership development programs.
Training Completion Rate on D&I Topics: Percentage of employees who have completed mandatory or voluntary diversity training.
7. Work-Life Balance and Flexibility KPIs
These KPIs measure how work-life balance initiatives are benefiting diverse groups.

Flexible Working Arrangement Uptake: The percentage of diverse employees utilizing remote or flexible work options.
Formula: (Number of diverse employees with flexible work arrangements / Total diverse employees) x 100
Parental Leave Uptake by Gender: The rate at which male and female employees take parental leave.
Formula: (Parental leave uptake for each gender / Total eligible employees for that gender) x 100
8. Diversity of Suppliers and Stakeholders
These KPIs measure how diverse an organization’s suppliers, vendors, and partners are.

Diverse Supplier Spend: The percentage of procurement spend on suppliers owned by underrepresented groups (e.g., women-owned, minority-owned businesses).
Formula: (Spend on diverse suppliers / Total procurement spend) x 100
Supplier Diversity Ratio: The ratio of diverse suppliers compared to the total number of suppliers.
Formula: (Number of diverse suppliers / Total suppliers) x 100
9. External Recognition and Certification KPIs
These KPIs measure the external recognition an organization receives for its diversity and inclusion efforts.

Diversity Awards and Recognition: The number of external awards or recognitions received for D&I practices.
Diversity Certification Levels: Achieving certifications like those from DiversityInc, Great Place to Work, or Forbes' Best Employers for Diversity.
