# Analise Adicional - Questoes do Grupo
**Projeto 1: IBM HR Analytics - AiDAPT - Cegid Academy**

Questoes especificas levantadas pelo grupo: stock options, distancia de casa,
performance rating, conflito de geracoes e chefias por genero.

Base de dados: Projeto1_IBM_HR

In [1]:
import os
from dotenv import load_dotenv, find_dotenv
from urllib.parse import quote_plus
from sqlalchemy import create_engine
load_dotenv(find_dotenv())

%load_ext sql

host = os.getenv('MSSQL_HOST', 'localhost')
port = os.getenv('MSSQL_PORT', '1433')
user = os.getenv('MSSQL_USER', 'sa')
password = quote_plus(os.getenv('MSSQL_PASSWORD', 'your_password_here'))
engine = create_engine(f"mssql+pymssql://{user}:{password}@{host}:{port}/Projeto1_IBM_HR")
%sql engine --alias Projeto1_IBM_HR

## 1. JOBROLES EM VÁRIOS DEPARTAMENTOS

### Questão: Existem os mesmos JobRoles em vários departamentos?

### Verificar quais JobRoles aparecem em múltiplos departamentos

In [2]:
%%sql
SELECT
    JobRole AS Cargo,
    COUNT(DISTINCT Department) AS NumDepartamentos,
    STRING_AGG(Department, ', ') AS Departamentos
FROM (
    SELECT DISTINCT JobRole, Department
    FROM Colaboradores
) AS sub
GROUP BY JobRole
ORDER BY NumDepartamentos DESC;

Cargo,NumDepartamentos,Departamentos
Manager,3,"Human Resources, Research & Development, Sales"
Manufacturing Director,1,Research & Development
Research Director,1,Research & Development
Research Scientist,1,Research & Development
Sales Executive,1,Sales
Sales Representative,1,Sales
Healthcare Representative,1,Research & Development
Human Resources,1,Human Resources
Laboratory Technician,1,Research & Development


### Detalhe por cargo e departamento

In [3]:
%%sql
SELECT
    Department AS Departamento,
    JobRole AS Cargo,
    COUNT(*) AS Total
FROM Colaboradores
GROUP BY Department, JobRole
ORDER BY JobRole, Department;

Departamento,Cargo,Total
Research & Development,Healthcare Representative,131
Human Resources,Human Resources,52
Research & Development,Laboratory Technician,259
Human Resources,Manager,11
Research & Development,Manager,54
Sales,Manager,37
Research & Development,Manufacturing Director,145
Research & Development,Research Director,80
Research & Development,Research Scientist,292
Sales,Sales Executive,326


## 2. STOCK OPTIONS

### Questão: Só há stock options para certo tipo de colaborador?

### Distribuição de Stock Options

In [4]:
%%sql
SELECT
    StockOptionLevel AS NivelStockOptions,
    CASE StockOptionLevel
        WHEN 0 THEN 'Sem Stock Options'
        WHEN 1 THEN 'Básico'
        WHEN 2 THEN 'Médio'
        WHEN 3 THEN 'Alto'
    END AS Descricao,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Colaboradores), 1) AS DECIMAL(5,1)) AS Percentagem
FROM Colaboradores
GROUP BY StockOptionLevel
ORDER BY StockOptionLevel;

NivelStockOptions,Descricao,Total,Percentagem
0,Sem Stock Options,631,42.9
1,Básico,596,40.5
2,Médio,158,10.7
3,Alto,85,5.8


### Stock Options por JobLevel

In [5]:
%%sql
SELECT
    JobLevel AS Nivel,
    SUM(CASE WHEN StockOptionLevel = 0 THEN 1 ELSE 0 END) AS [Sem_Stock],
    SUM(CASE WHEN StockOptionLevel = 1 THEN 1 ELSE 0 END) AS [Nivel_1],
    SUM(CASE WHEN StockOptionLevel = 2 THEN 1 ELSE 0 END) AS [Nivel_2],
    SUM(CASE WHEN StockOptionLevel = 3 THEN 1 ELSE 0 END) AS [Nivel_3],
    ROUND(AVG(CAST(StockOptionLevel AS FLOAT)), 2) AS MediaStockOptions
FROM Colaboradores
GROUP BY JobLevel
ORDER BY JobLevel;

Nivel,Sem_Stock,Nivel_1,Nivel_2,Nivel_3,MediaStockOptions
1,257,206,41,39,0.75
2,219,207,83,25,0.84
3,86,97,22,13,0.83
4,43,49,8,6,0.78
5,26,37,4,2,0.74


### Stock Options por JobRole

In [6]:
%%sql
SELECT
    JobRole AS Cargo,
    COUNT(*) AS Total,
    SUM(CASE WHEN StockOptionLevel > 0 THEN 1 ELSE 0 END) AS ComStockOptions,
    CAST(ROUND(SUM(CASE WHEN StockOptionLevel > 0 THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS PercComStock,
    ROUND(AVG(CAST(StockOptionLevel AS FLOAT)), 2) AS MediaStockOptions
FROM Colaboradores
GROUP BY JobRole
ORDER BY PercComStock DESC;

Cargo,Total,ComStockOptions,PercComStock,MediaStockOptions
Manager,102,65,63.7,0.75
Healthcare Representative,131,80,61.1,0.83
Manufacturing Director,145,86,59.3,0.81
Research Director,80,47,58.8,0.85
Laboratory Technician,259,151,58.3,0.82
Sales Executive,326,187,57.4,0.82
Human Resources,52,28,53.8,0.75
Research Scientist,292,156,53.4,0.77
Sales Representative,83,39,47.0,0.63


### Stock Options por Department

In [7]:
%%sql
SELECT
    Department AS Departamento,
    COUNT(*) AS Total,
    SUM(CASE WHEN StockOptionLevel > 0 THEN 1 ELSE 0 END) AS ComStockOptions,
    CAST(ROUND(SUM(CASE WHEN StockOptionLevel > 0 THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS PercComStock
FROM Colaboradores
GROUP BY Department
ORDER BY PercComStock DESC;

Departamento,Total,ComStockOptions,PercComStock
Research & Development,961,554,57.6
Human Resources,63,36,57.1
Sales,446,249,55.8


## 3. DISTÂNCIA DE CASA

### Observação: Existe imensa gente a trabalhar a 1 unidade da empresa

### Distribuição de DistanceFromHome

In [8]:
%%sql
SELECT
    DistanceFromHome,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Colaboradores), 1) AS DECIMAL(5,1)) AS Percentagem
FROM Colaboradores
GROUP BY DistanceFromHome
ORDER BY DistanceFromHome;

DistanceFromHome,Total,Percentagem
1,208,14.1
2,211,14.4
3,84,5.7
4,64,4.4
5,65,4.4
6,59,4.0
7,84,5.7
8,80,5.4
9,85,5.8
10,86,5.9


### Distribuição por categorias

In [9]:
%%sql
SELECT
    CASE
        WHEN DistanceFromHome = 1 THEN '1 (Muito Perto)'
        WHEN DistanceFromHome <= 5 THEN '2-5 (Perto)'
        WHEN DistanceFromHome <= 10 THEN '6-10'
        WHEN DistanceFromHome <= 15 THEN '11-15'
        WHEN DistanceFromHome <= 20 THEN '16-20'
        ELSE '21+ (Longe)'
    END AS DistanciaCategoria,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Colaboradores), 1) AS DECIMAL(5,1)) AS Percentagem
FROM Colaboradores
GROUP BY
    CASE
        WHEN DistanceFromHome = 1 THEN '1 (Muito Perto)'
        WHEN DistanceFromHome <= 5 THEN '2-5 (Perto)'
        WHEN DistanceFromHome <= 10 THEN '6-10'
        WHEN DistanceFromHome <= 15 THEN '11-15'
        WHEN DistanceFromHome <= 20 THEN '16-20'
        ELSE '21+ (Longe)'
    END
ORDER BY MIN(DistanceFromHome);

DistanciaCategoria,Total,Percentagem
1 (Muito Perto),208,14.1
2-5 (Perto),424,28.8
6-10,394,26.8
11-15,115,7.8
16-20,125,8.5
21+ (Longe),204,13.9


### Quantos vivem a 1 unidade?

In [10]:
%%sql
SELECT
    'Colaboradores a 1 unidade de distância' AS Metrica,
    COUNT(*) AS Total,
    CAST(CAST(ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Colaboradores), 1) AS DECIMAL(5,1)) AS VARCHAR(10)) + '%' AS Percentagem
FROM Colaboradores
WHERE DistanceFromHome = 1;

Metrica,Total,Percentagem
Colaboradores a 1 unidade de distância,208,14.1%


## 4. PERFORMANCE RATING

### Observação: O PerformanceRating só tem 3 e 4 (vai de 1 a 4)

### Verificar valores existentes

In [11]:
%%sql
SELECT DISTINCT PerformanceRating
FROM Colaboradores
ORDER BY PerformanceRating;

PerformanceRating
3
4


### Distribuição de Performance Rating

In [12]:
%%sql
SELECT
    PerformanceRating,
    CASE PerformanceRating
        WHEN 1 THEN 'Low'
        WHEN 2 THEN 'Good'
        WHEN 3 THEN 'Excellent'
        WHEN 4 THEN 'Outstanding'
    END AS Descricao,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Colaboradores), 1) AS DECIMAL(5,1)) AS Percentagem
FROM Colaboradores
GROUP BY PerformanceRating
ORDER BY PerformanceRating;

PerformanceRating,Descricao,Total,Percentagem
3,Excellent,1244,84.6
4,Outstanding,226,15.4


### Performance por Department

In [13]:
%%sql
SELECT
    Department AS Departamento,
    SUM(CASE WHEN PerformanceRating = 3 THEN 1 ELSE 0 END) AS Excellent,
    SUM(CASE WHEN PerformanceRating = 4 THEN 1 ELSE 0 END) AS Outstanding,
    ROUND(AVG(CAST(PerformanceRating AS FLOAT)), 2) AS MediaPerformance
FROM Colaboradores
GROUP BY Department;

Departamento,Excellent,Outstanding,MediaPerformance
Human Resources,54,9,3.14
Sales,385,61,3.14
Research & Development,805,156,3.16


## 5. CONFLITO DE GERAÇÕES

### Questão: Será que haverá conflito de gerações?

### Satisfação por geração

In [14]:
%%sql
SELECT
    CASE
        WHEN Age < 28 THEN 'Gen Z (< 28)'
        WHEN Age < 44 THEN 'Millennials (28-43)'
        WHEN Age < 60 THEN 'Gen X (44-59)'
        ELSE 'Baby Boomers (60+)'
    END AS Geracao,
    COUNT(*) AS Total,
    ROUND(AVG(CAST(JobSatisfaction AS FLOAT)), 2) AS SatisfacaoTrabalho,
    ROUND(AVG(CAST(WorkLifeBalance AS FLOAT)), 2) AS WorkLifeBalance,
    ROUND(AVG(CAST(EnvironmentSatisfaction AS FLOAT)), 2) AS SatisfacaoAmbiente,
    ROUND(AVG(CAST(RelationshipSatisfaction AS FLOAT)), 2) AS SatisfacaoRelacoes
FROM Colaboradores
GROUP BY
    CASE
        WHEN Age < 28 THEN 'Gen Z (< 28)'
        WHEN Age < 44 THEN 'Millennials (28-43)'
        WHEN Age < 60 THEN 'Gen X (44-59)'
        ELSE 'Baby Boomers (60+)'
    END
ORDER BY MIN(Age);

Geracao,Total,SatisfacaoTrabalho,WorkLifeBalance,SatisfacaoAmbiente,SatisfacaoRelacoes
Gen Z (< 28),210,2.71,2.77,2.68,2.69
Millennials (28-43),913,2.74,2.77,2.73,2.67
Gen X (44-59),342,2.71,2.73,2.74,2.83
Baby Boomers (60+),5,2.2,3.0,2.0,3.4


### Attrition por geração

In [15]:
%%sql
SELECT
    CASE
        WHEN Age < 28 THEN 'Gen Z (< 28)'
        WHEN Age < 44 THEN 'Millennials (28-43)'
        WHEN Age < 60 THEN 'Gen X (44-59)'
        ELSE 'Baby Boomers (60+)'
    END AS Geracao,
    SUM(CASE WHEN Attrition = 'Yes' THEN 1 ELSE 0 END) AS Saidas,
    COUNT(*) AS Total,
    CAST(ROUND(SUM(CASE WHEN Attrition = 'Yes' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS TaxaAttrition
FROM Colaboradores
GROUP BY
    CASE
        WHEN Age < 28 THEN 'Gen Z (< 28)'
        WHEN Age < 44 THEN 'Millennials (28-43)'
        WHEN Age < 60 THEN 'Gen X (44-59)'
        ELSE 'Baby Boomers (60+)'
    END
ORDER BY MIN(Age);

Geracao,Saidas,Total,TaxaAttrition
Gen Z (< 28),59,210,28.1
Millennials (28-43),136,913,14.9
Gen X (44-59),42,342,12.3
Baby Boomers (60+),0,5,0.0


## 6. CHEFIAS POR GÉNERO

### Observação: Chefias são mais do sexo masculino

### Análise de níveis de chefia por género

In [16]:
%%sql
SELECT
    JobLevel AS Nivel,
    CASE JobLevel
        WHEN 1 THEN 'Entry Level'
        WHEN 2 THEN 'Junior'
        WHEN 3 THEN 'Mid-Level'
        WHEN 4 THEN 'Senior'
        WHEN 5 THEN 'Executive'
    END AS DescricaoNivel,
    SUM(CASE WHEN Gender = 'Female' THEN 1 ELSE 0 END) AS Mulheres,
    SUM(CASE WHEN Gender = 'Male' THEN 1 ELSE 0 END) AS Homens,
    CAST(ROUND(SUM(CASE WHEN Gender = 'Female' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS PercMulheres
FROM Colaboradores
GROUP BY JobLevel
ORDER BY JobLevel;

Nivel,DescricaoNivel,Mulheres,Homens,PercMulheres
1,Entry Level,199,344,36.6
2,Junior,220,314,41.2
3,Mid-Level,94,124,43.1
4,Senior,51,55,48.1
5,Executive,24,45,34.8


### Cargos de gestão por género

In [17]:
%%sql
SELECT
    JobRole AS Cargo,
    SUM(CASE WHEN Gender = 'Female' THEN 1 ELSE 0 END) AS Mulheres,
    SUM(CASE WHEN Gender = 'Male' THEN 1 ELSE 0 END) AS Homens,
    CAST(ROUND(SUM(CASE WHEN Gender = 'Female' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS PercMulheres
FROM Colaboradores
WHERE JobRole LIKE '%Manager%' OR JobRole LIKE '%Director%'
GROUP BY JobRole
ORDER BY PercMulheres;

Cargo,Mulheres,Homens,PercMulheres
Research Director,33,47,41.3
Manager,47,55,46.1
Manufacturing Director,72,73,49.7


## 7. RELAÇÃO ANOS EXPERIÊNCIA VS FELICIDADE

### Questão: Como TotalWorkingYears e YearsAtCompany estão relacionados com felicidade?

### Satisfação por anos na empresa

In [18]:
%%sql
SELECT
    CASE
        WHEN YearsAtCompany = 0 THEN '0 (Novo)'
        WHEN YearsAtCompany <= 2 THEN '1-2 anos'
        WHEN YearsAtCompany <= 5 THEN '3-5 anos'
        WHEN YearsAtCompany <= 10 THEN '6-10 anos'
        ELSE '10+ anos'
    END AS AnosNaEmpresa,
    COUNT(*) AS Total,
    ROUND(AVG(CAST(JobSatisfaction AS FLOAT)), 2) AS SatisfacaoTrabalho,
    ROUND(AVG(CAST(WorkLifeBalance AS FLOAT)), 2) AS WorkLifeBalance
FROM Colaboradores
GROUP BY
    CASE
        WHEN YearsAtCompany = 0 THEN '0 (Novo)'
        WHEN YearsAtCompany <= 2 THEN '1-2 anos'
        WHEN YearsAtCompany <= 5 THEN '3-5 anos'
        WHEN YearsAtCompany <= 10 THEN '6-10 anos'
        ELSE '10+ anos'
    END
ORDER BY MIN(YearsAtCompany);

AnosNaEmpresa,Total,SatisfacaoTrabalho,WorkLifeBalance
0 (Novo),44,2.59,2.75
1-2 anos,298,2.79,2.76
3-5 anos,434,2.66,2.76
6-10 anos,448,2.77,2.75
10+ anos,246,2.72,2.78


### Satisfação por total de experiência

In [19]:
%%sql
SELECT
    CASE
        WHEN TotalWorkingYears <= 5 THEN '0-5 anos'
        WHEN TotalWorkingYears <= 10 THEN '6-10 anos'
        WHEN TotalWorkingYears <= 20 THEN '11-20 anos'
        ELSE '20+ anos'
    END AS ExperienciaTotal,
    COUNT(*) AS Total,
    ROUND(AVG(CAST(JobSatisfaction AS FLOAT)), 2) AS SatisfacaoTrabalho,
    ROUND(AVG(CAST(WorkLifeBalance AS FLOAT)), 2) AS WorkLifeBalance
FROM Colaboradores
GROUP BY
    CASE
        WHEN TotalWorkingYears <= 5 THEN '0-5 anos'
        WHEN TotalWorkingYears <= 10 THEN '6-10 anos'
        WHEN TotalWorkingYears <= 20 THEN '11-20 anos'
        ELSE '20+ anos'
    END
ORDER BY MIN(TotalWorkingYears);

ExperienciaTotal,Total,SatisfacaoTrabalho,WorkLifeBalance
0-5 anos,316,2.76,2.76
6-10 anos,607,2.73,2.77
11-20 anos,340,2.73,2.74
20+ anos,207,2.7,2.77


## 8. RATES E INCOME

### Questões sobre DailyRate, MonthlyRate, MonthlyIncome

### Estatísticas dos diferentes rates

In [20]:
%%sql
SELECT
    'MonthlyIncome' AS Metrica,
    MIN(MonthlyIncome) AS Minimo,
    MAX(MonthlyIncome) AS Maximo,
    AVG(MonthlyIncome) AS Media,
    STDEV(MonthlyIncome) AS DesvioPadrao
FROM Colaboradores
UNION ALL
SELECT
    'MonthlyRate',
    MIN(MonthlyRate),
    MAX(MonthlyRate),
    AVG(MonthlyRate),
    STDEV(MonthlyRate)
FROM Colaboradores
UNION ALL
SELECT
    'DailyRate',
    MIN(DailyRate),
    MAX(DailyRate),
    AVG(DailyRate),
    STDEV(DailyRate)
FROM Colaboradores
UNION ALL
SELECT
    'HourlyRate',
    MIN(HourlyRate),
    MAX(HourlyRate),
    AVG(HourlyRate),
    STDEV(HourlyRate)
FROM Colaboradores;

Metrica,Minimo,Maximo,Media,DesvioPadrao
MonthlyIncome,1009,19999,6502,4707.956783097994
MonthlyRate,2094,26999,14313,7117.786044059978
DailyRate,102,1499,802,403.5090999435282
HourlyRate,30,100,65,20.32942759399617


### Correlação entre MonthlyIncome e outros rates

In [21]:
%%sql
SELECT TOP 20
    EmployeeNumber,
    MonthlyIncome,
    MonthlyRate,
    DailyRate,
    HourlyRate,
    JobLevel
FROM Colaboradores
ORDER BY MonthlyIncome DESC;

EmployeeNumber,MonthlyIncome,MonthlyRate,DailyRate,HourlyRate,JobLevel
259,19999,5678,699,65,5
1035,19973,20284,247,55,5
1191,19943,18575,718,92,5
226,19926,17053,1452,53,5
787,19859,21199,725,78,5
1282,19847,19196,237,83,5
1038,19845,25846,266,57,5
1740,19833,4349,611,88,5
1255,19740,18625,920,96,5
1338,19717,4022,206,99,5


## 9. COLUNAS CONSTANTES (VERIFICAÇÃO)

### Over18, EmployeeCount, StandardHours são constantes?

In [22]:
%%sql
SELECT
    'Over18' AS Coluna,
    COUNT(DISTINCT Over18) AS ValoresDistintos,
    MIN(Over18) AS ValorMinimo,
    MAX(Over18) AS ValorMaximo
FROM Colaboradores
UNION ALL
SELECT
    'EmployeeCount',
    COUNT(DISTINCT EmployeeCount),
    CAST(MIN(EmployeeCount) AS VARCHAR(10)),
    CAST(MAX(EmployeeCount) AS VARCHAR(10))
FROM Colaboradores
UNION ALL
SELECT
    'StandardHours',
    COUNT(DISTINCT StandardHours),
    CAST(MIN(StandardHours) AS VARCHAR(10)),
    CAST(MAX(StandardHours) AS VARCHAR(10))
FROM Colaboradores;

Coluna,ValoresDistintos,ValorMinimo,ValorMaximo
Over18,1,Y,Y
EmployeeCount,1,1,1
StandardHours,1,80,80


## 10. RESUMO DAS QUESTÕES DO GRUPO

In [23]:
%%sql
SELECT '=== RESPOSTAS ÀS QUESTÕES DO GRUPO ===' AS Info;

Info
=== RESPOSTAS ÀS QUESTÕES DO GRUPO ===


### Q1: JobRoles em vários departamentos?

In [24]:
%%sql
SELECT
    'Q1: JobRoles em múltiplos departamentos' AS Questao,
    COUNT(*) AS Resposta
FROM (
    SELECT JobRole
    FROM Colaboradores
    GROUP BY JobRole
    HAVING COUNT(DISTINCT Department) > 1
) AS multi;

Questao,Resposta
Q1: JobRoles em múltiplos departamentos,1


### Q2: Stock Options apenas para certos colaboradores?

In [25]:
%%sql
SELECT
    'Q2: Colaboradores SEM Stock Options' AS Questao,
    CAST(ROUND(SUM(CASE WHEN StockOptionLevel = 0 THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS VARCHAR(10)) + '%' AS Resposta
FROM Colaboradores;

Questao,Resposta
Q2: Colaboradores SEM Stock Options,42.900000%


### Q3: Muita gente a 1 unidade de distância?

In [26]:
%%sql
SELECT
    'Q3: Colaboradores a distância = 1' AS Questao,
    CAST(ROUND(SUM(CASE WHEN DistanceFromHome = 1 THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS VARCHAR(10)) + '%' AS Resposta
FROM Colaboradores;

Questao,Resposta
Q3: Colaboradores a distância = 1,14.100000%


### Q4: Performance Rating só 3 e 4?

In [27]:
%%sql
SELECT
    'Q4: Performance Rating apenas 3 e 4' AS Questao,
    CASE WHEN MIN(PerformanceRating) >= 3 THEN 'CONFIRMADO' ELSE 'Existem outros valores' END AS Resposta
FROM Colaboradores;

Questao,Resposta
Q4: Performance Rating apenas 3 e 4,CONFIRMADO


### Q5: Chefias mais masculinas?

In [28]:
%%sql
SELECT
    'Q5: % Mulheres em níveis 4-5 (Senior/Executive)' AS Questao,
    CAST(ROUND(SUM(CASE WHEN Gender = 'Female' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS VARCHAR(10)) + '%' AS Resposta
FROM Colaboradores
WHERE JobLevel >= 4;

Questao,Resposta
Q5: % Mulheres em níveis 4-5 (Senior/Executive),42.900000%
