# Analise de Genero - Igualdade de Genero
**Projeto 1: IBM HR Analytics - AiDAPT - Cegid Academy**

Meta: 50% mulheres em todos os cargos.
Distribuicao por departamento, cargo, nivel hierarquico, gap salarial e promocoes.

Base de dados: Projeto1_IBM_HR

In [1]:
import os
from dotenv import load_dotenv, find_dotenv
from urllib.parse import quote_plus
from sqlalchemy import create_engine
load_dotenv(find_dotenv())

%load_ext sql

host = os.getenv('MSSQL_HOST', 'localhost')
port = os.getenv('MSSQL_PORT', '1433')
user = os.getenv('MSSQL_USER', 'sa')
password = quote_plus(os.getenv('MSSQL_PASSWORD', 'your_password_here'))
engine = create_engine(f"mssql+pymssql://{user}:{password}@{host}:{port}/Projeto1_IBM_HR")
%sql engine --alias Projeto1_IBM_HR

## 1. VISÃO GERAL DE GÉNERO

### Distribuição global

In [2]:
%%sql
SELECT
    Gender AS Genero,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Colaboradores), 2) AS DECIMAL(5,2)) AS Percentagem,
    CASE
        WHEN Gender = 'Female' THEN
            CAST(CAST(50 - ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Colaboradores), 2) AS DECIMAL(5,2)) AS VARCHAR(10)) + '% para meta'
        ELSE ''
    END AS GapParaMeta50
FROM Colaboradores
GROUP BY Gender;

Genero,Total,Percentagem,GapParaMeta50
Female,588,40.0,10.00% para meta
Male,882,60.0,


## 2. GÉNERO POR DEPARTAMENTO

### Contagem e percentagem por departamento

In [3]:
%%sql
SELECT
    Department AS Departamento,
    Gender AS Genero,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (PARTITION BY Department), 2) AS DECIMAL(5,2)) AS PercNoDepartamento
FROM Colaboradores
GROUP BY Department, Gender
ORDER BY Department, Gender;

Departamento,Genero,Total,PercNoDepartamento
Human Resources,Female,20,31.75
Human Resources,Male,43,68.25
Research & Development,Female,379,39.44
Research & Development,Male,582,60.56
Sales,Female,189,42.38
Sales,Male,257,57.62


### Resumo por departamento (formato pivot)

In [4]:
%%sql
SELECT
    Department AS Departamento,
    SUM(CASE WHEN Gender = 'Female' THEN 1 ELSE 0 END) AS Mulheres,
    SUM(CASE WHEN Gender = 'Male' THEN 1 ELSE 0 END) AS Homens,
    COUNT(*) AS Total,
    CAST(ROUND(SUM(CASE WHEN Gender = 'Female' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS PercMulheres,
    CAST(50 - ROUND(SUM(CASE WHEN Gender = 'Female' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS GapParaMeta50
FROM Colaboradores
GROUP BY Department
ORDER BY PercMulheres;

Departamento,Mulheres,Homens,Total,PercMulheres,GapParaMeta50
Human Resources,20,43,63,31.7,18.3
Research & Development,379,582,961,39.4,10.6
Sales,189,257,446,42.4,7.6


## 3. GÉNERO POR CARGO (JobRole)

### Detalhado por cargo

In [5]:
%%sql
SELECT
    JobRole AS Cargo,
    SUM(CASE WHEN Gender = 'Female' THEN 1 ELSE 0 END) AS Mulheres,
    SUM(CASE WHEN Gender = 'Male' THEN 1 ELSE 0 END) AS Homens,
    COUNT(*) AS Total,
    CAST(ROUND(SUM(CASE WHEN Gender = 'Female' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS PercMulheres,
    CASE
        WHEN SUM(CASE WHEN Gender = 'Female' THEN 1.0 ELSE 0 END) * 100 / COUNT(*) < 50 THEN 'Défice Mulheres'
        WHEN SUM(CASE WHEN Gender = 'Female' THEN 1.0 ELSE 0 END) * 100 / COUNT(*) > 50 THEN 'Excesso Mulheres'
        ELSE 'Equilibrado'
    END AS Situacao
FROM Colaboradores
GROUP BY JobRole
ORDER BY PercMulheres;

Cargo,Mulheres,Homens,Total,PercMulheres,Situacao
Human Resources,16,36,52,30.8,Défice Mulheres
Laboratory Technician,85,174,259,32.8,Défice Mulheres
Healthcare Representative,51,80,131,38.9,Défice Mulheres
Research Scientist,114,178,292,39.0,Défice Mulheres
Sales Executive,132,194,326,40.5,Défice Mulheres
Research Director,33,47,80,41.3,Défice Mulheres
Sales Representative,38,45,83,45.8,Défice Mulheres
Manager,47,55,102,46.1,Défice Mulheres
Manufacturing Director,72,73,145,49.7,Défice Mulheres


## 4. GÉNERO POR NÍVEL HIERÁRQUICO

### Análise por nível (importante para ver se mulheres chegam a cargos de chefia)

In [6]:
%%sql
SELECT
    JobLevel AS NivelHierarquico,
    CASE JobLevel
        WHEN 1 THEN 'Entry Level'
        WHEN 2 THEN 'Junior'
        WHEN 3 THEN 'Mid-Level'
        WHEN 4 THEN 'Senior'
        WHEN 5 THEN 'Executive'
    END AS DescricaoNivel,
    SUM(CASE WHEN Gender = 'Female' THEN 1 ELSE 0 END) AS Mulheres,
    SUM(CASE WHEN Gender = 'Male' THEN 1 ELSE 0 END) AS Homens,
    COUNT(*) AS Total,
    CAST(ROUND(SUM(CASE WHEN Gender = 'Female' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS PercMulheres
FROM Colaboradores
GROUP BY JobLevel
ORDER BY JobLevel;

NivelHierarquico,DescricaoNivel,Mulheres,Homens,Total,PercMulheres
1,Entry Level,199,344,543,36.6
2,Junior,220,314,534,41.2
3,Mid-Level,94,124,218,43.1
4,Senior,51,55,106,48.1
5,Executive,24,45,69,34.8


## 5. ANÁLISE SALARIAL POR GÉNERO

### Salário médio global por género

In [7]:
%%sql
SELECT
    Gender AS Genero,
    COUNT(*) AS NumColaboradores,
    AVG(MonthlyIncome) AS SalarioMedio,
    MIN(MonthlyIncome) AS SalarioMin,
    MAX(MonthlyIncome) AS SalarioMax
FROM Colaboradores
GROUP BY Gender;

Genero,NumColaboradores,SalarioMedio,SalarioMin,SalarioMax
Male,882,6380,1009,19999
Female,588,6686,1129,19973


### Gap salarial geral

In [8]:
%%sql
SELECT
    'Gap Salarial (Homens - Mulheres)' AS Metrica,
    (SELECT AVG(MonthlyIncome) FROM Colaboradores WHERE Gender = 'Male') -
    (SELECT AVG(MonthlyIncome) FROM Colaboradores WHERE Gender = 'Female') AS GapAbsoluto,
    CAST(ROUND(
        ((SELECT AVG(CAST(MonthlyIncome AS FLOAT)) FROM Colaboradores WHERE Gender = 'Male') -
         (SELECT AVG(CAST(MonthlyIncome AS FLOAT)) FROM Colaboradores WHERE Gender = 'Female')) /
        (SELECT AVG(CAST(MonthlyIncome AS FLOAT)) FROM Colaboradores WHERE Gender = 'Female') * 100
    , 2) AS DECIMAL(5,2)) AS GapPercentual;

Metrica,GapAbsoluto,GapPercentual
Gap Salarial (Homens - Mulheres),-306,-4.58


### Salário médio por cargo e género

In [9]:
%%sql
SELECT
    JobRole AS Cargo,
    AVG(CASE WHEN Gender = 'Female' THEN MonthlyIncome END) AS SalarioMedioMulheres,
    AVG(CASE WHEN Gender = 'Male' THEN MonthlyIncome END) AS SalarioMedioHomens,
    AVG(CASE WHEN Gender = 'Male' THEN MonthlyIncome END) -
    AVG(CASE WHEN Gender = 'Female' THEN MonthlyIncome END) AS GapSalarial
FROM Colaboradores
GROUP BY JobRole
ORDER BY GapSalarial DESC;

Cargo,SalarioMedioMulheres,SalarioMedioHomens,GapSalarial
Research Director,15144,16657,1513
Manager,16915,17409,494
Sales Executive,6764,7033,269
Healthcare Representative,7433,7589,156
Laboratory Technician,3246,3232,-14
Sales Representative,2671,2587,-84
Research Scientist,3344,3173,-171
Manufacturing Director,7409,7182,-227
Human Resources,4540,4100,-440


### Salário médio por nível e género

In [10]:
%%sql
SELECT
    JobLevel AS Nivel,
    AVG(CASE WHEN Gender = 'Female' THEN MonthlyIncome END) AS SalarioMedioMulheres,
    AVG(CASE WHEN Gender = 'Male' THEN MonthlyIncome END) AS SalarioMedioHomens,
    AVG(CASE WHEN Gender = 'Male' THEN MonthlyIncome END) -
    AVG(CASE WHEN Gender = 'Female' THEN MonthlyIncome END) AS GapSalarial
FROM Colaboradores
GROUP BY JobLevel
ORDER BY JobLevel;

Nivel,SalarioMedioMulheres,SalarioMedioHomens,GapSalarial
1,2780,2790,10
2,5435,5549,114
3,9962,9706,-256
4,15431,15570,139
5,19129,19224,95


## 6. PROMOÇÕES POR GÉNERO

### Anos desde última promoção por género

In [11]:
%%sql
SELECT
    Gender AS Genero,
    AVG(YearsSinceLastPromotion) AS MediaAnosSemPromocao,
    MAX(YearsSinceLastPromotion) AS MaxAnosSemPromocao
FROM Colaboradores
GROUP BY Gender;

Genero,MediaAnosSemPromocao,MaxAnosSemPromocao
Male,2,15
Female,2,15


### Distribuição de tempo sem promoção

In [12]:
%%sql
SELECT
    Gender AS Genero,
    CASE
        WHEN YearsSinceLastPromotion = 0 THEN 'Promovido este ano'
        WHEN YearsSinceLastPromotion <= 2 THEN '1-2 anos'
        WHEN YearsSinceLastPromotion <= 5 THEN '3-5 anos'
        ELSE '5+ anos sem promoção'
    END AS TempoSemPromocao,
    COUNT(*) AS Total
FROM Colaboradores
GROUP BY Gender,
    CASE
        WHEN YearsSinceLastPromotion = 0 THEN 'Promovido este ano'
        WHEN YearsSinceLastPromotion <= 2 THEN '1-2 anos'
        WHEN YearsSinceLastPromotion <= 5 THEN '3-5 anos'
        ELSE '5+ anos sem promoção'
    END
ORDER BY Gender, MIN(YearsSinceLastPromotion);

Genero,TempoSemPromocao,Total
Female,Promovido este ano,228
Female,1-2 anos,195
Female,3-5 anos,74
Female,5+ anos sem promoção,91
Male,Promovido este ano,353
Male,1-2 anos,321
Male,3-5 anos,84
Male,5+ anos sem promoção,124


## 7. ATTRITION POR GÉNERO

### Taxa de saída por género

In [13]:
%%sql
SELECT
    Gender AS Genero,
    Attrition,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (PARTITION BY Gender), 2) AS DECIMAL(5,2)) AS PercNoGenero
FROM Colaboradores
GROUP BY Gender, Attrition
ORDER BY Gender, Attrition;

Genero,Attrition,Total,PercNoGenero
Female,No,501,85.2
Female,Yes,87,14.8
Male,No,732,82.99
Male,Yes,150,17.01


## 8. OVERTIME E BUSINESS TRAVEL POR GÉNERO

### Overtime por género

In [14]:
%%sql
SELECT
    Gender AS Genero,
    OverTime,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (PARTITION BY Gender), 2) AS DECIMAL(5,2)) AS Percentagem
FROM Colaboradores
GROUP BY Gender, OverTime
ORDER BY Gender, OverTime;

Genero,OverTime,Total,Percentagem
Female,No,408,69.39
Female,Yes,180,30.61
Male,No,646,73.24
Male,Yes,236,26.76


### Business Travel por género

In [15]:
%%sql
SELECT
    Gender AS Genero,
    BusinessTravel AS TipoViagem,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (PARTITION BY Gender), 2) AS DECIMAL(5,2)) AS Percentagem
FROM Colaboradores
GROUP BY Gender, BusinessTravel
ORDER BY Gender, BusinessTravel;

Genero,TipoViagem,Total,Percentagem
Female,Non-Travel,49,8.33
Female,Travel_Frequently,117,19.9
Female,Travel_Rarely,422,71.77
Male,Non-Travel,101,11.45
Male,Travel_Frequently,160,18.14
Male,Travel_Rarely,621,70.41


## 9. RESUMO EXECUTIVO - IGUALDADE DE GÉNERO

In [16]:
%%sql
SELECT '=== RESUMO IGUALDADE DE GÉNERO ===' AS Info;

Info
=== RESUMO IGUALDADE DE GÉNERO ===


In [17]:
%%sql
SELECT
    'Percentagem de Mulheres na Empresa' AS Metrica,
    CAST(ROUND(SUM(CASE WHEN Gender = 'Female' THEN 1.0 ELSE 0.0 END) * 100 / COUNT(*), 1) AS VARCHAR(10)) + '%' AS Valor,
    CAST(50 - ROUND(SUM(CASE WHEN Gender = 'Female' THEN 1.0 ELSE 0.0 END) * 100 / COUNT(*), 1) AS VARCHAR(10)) + '% para meta' AS GapParaMeta
FROM Colaboradores;

Metrica,Valor,GapParaMeta
Percentagem de Mulheres na Empresa,40.000000%,10.000000% para meta


### Departamento com MENOS mulheres

In [18]:
%%sql
SELECT TOP 1
    'Departamento com Menos Mulheres' AS Metrica,
    Department AS Departamento,
    CAST(ROUND(SUM(CASE WHEN Gender = 'Female' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS VARCHAR(10)) + '%' AS PercMulheres
FROM Colaboradores
GROUP BY Department
ORDER BY SUM(CASE WHEN Gender = 'Female' THEN 1.0 ELSE 0 END) * 100 / COUNT(*);

Metrica,Departamento,PercMulheres
Departamento com Menos Mulheres,Human Resources,31.700000%


### Cargo com MENOS mulheres

In [19]:
%%sql
SELECT TOP 1
    'Cargo com Menos Mulheres' AS Metrica,
    JobRole AS Cargo,
    CAST(ROUND(SUM(CASE WHEN Gender = 'Female' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS VARCHAR(10)) + '%' AS PercMulheres
FROM Colaboradores
GROUP BY JobRole
ORDER BY SUM(CASE WHEN Gender = 'Female' THEN 1.0 ELSE 0 END) * 100 / COUNT(*);

Metrica,Cargo,PercMulheres
Cargo com Menos Mulheres,Human Resources,30.800000%
