# Analise de Attrition - Perfil de Quem Sai
**Projeto 1: IBM HR Analytics - AiDAPT - Cegid Academy**

Attrition (saidas) e factores de risco.
Perfil comparativo entre quem sai e quem fica, com identificacao de colaboradores em risco.

Base de dados: Projeto1_IBM_HR

In [1]:
import os
from dotenv import load_dotenv, find_dotenv
from urllib.parse import quote_plus
from sqlalchemy import create_engine
load_dotenv(find_dotenv())

%load_ext sql

host = os.getenv('MSSQL_HOST', 'localhost')
port = os.getenv('MSSQL_PORT', '1433')
user = os.getenv('MSSQL_USER', 'sa')
password = quote_plus(os.getenv('MSSQL_PASSWORD', 'your_password_here'))
engine = create_engine(f"mssql+pymssql://{user}:{password}@{host}:{port}/Projeto1_IBM_HR")
%sql engine --alias Projeto1_IBM_HR

## 1. VISÃO GERAL DO ATTRITION

### Taxa global de attrition

In [2]:
%%sql
SELECT
    Attrition,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Colaboradores), 2) AS DECIMAL(5,2)) AS Percentagem
FROM Colaboradores
GROUP BY Attrition;

Attrition,Total,Percentagem
No,1233,83.88
Yes,237,16.12


## 2. PERFIL DE QUEM SAI VS QUEM FICA

### Comparação de médias

In [3]:
%%sql
SELECT
    Attrition,
    COUNT(*) AS Total,
    ROUND(AVG(CAST(Age AS FLOAT)), 1) AS IdadeMedia,
    ROUND(AVG(CAST(MonthlyIncome AS FLOAT)), 0) AS SalarioMedio,
    ROUND(AVG(CAST(YearsAtCompany AS FLOAT)), 1) AS AnosEmpresaMedia,
    ROUND(AVG(CAST(TotalWorkingYears AS FLOAT)), 1) AS ExperienciaMedia,
    ROUND(AVG(CAST(DistanceFromHome AS FLOAT)), 1) AS DistanciaMedia,
    ROUND(AVG(CAST(JobSatisfaction AS FLOAT)), 2) AS SatisfacaoMedia,
    ROUND(AVG(CAST(WorkLifeBalance AS FLOAT)), 2) AS WorkLifeMedia,
    ROUND(AVG(CAST(YearsSinceLastPromotion AS FLOAT)), 1) AS AnosSemPromocao
FROM Colaboradores
GROUP BY Attrition;

Attrition,Total,IdadeMedia,SalarioMedio,AnosEmpresaMedia,ExperienciaMedia,DistanciaMedia,SatisfacaoMedia,WorkLifeMedia,AnosSemPromocao
Yes,237,33.6,4787.0,5.1,8.2,10.6,2.47,2.66,1.9
No,1233,37.6,6833.0,7.4,11.9,8.9,2.78,2.78,2.2


## 3. ATTRITION POR DEPARTAMENTO

In [4]:
%%sql
SELECT
    Department AS Departamento,
    SUM(CASE WHEN Attrition = 'Yes' THEN 1 ELSE 0 END) AS Saidas,
    SUM(CASE WHEN Attrition = 'No' THEN 1 ELSE 0 END) AS Permaneceram,
    COUNT(*) AS Total,
    CAST(ROUND(SUM(CASE WHEN Attrition = 'Yes' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS TaxaAttrition
FROM Colaboradores
GROUP BY Department
ORDER BY TaxaAttrition DESC;

Departamento,Saidas,Permaneceram,Total,TaxaAttrition
Sales,92,354,446,20.6
Human Resources,12,51,63,19.0
Research & Development,133,828,961,13.8


## 4. ATTRITION POR CARGO

In [5]:
%%sql
SELECT
    JobRole AS Cargo,
    SUM(CASE WHEN Attrition = 'Yes' THEN 1 ELSE 0 END) AS Saidas,
    COUNT(*) AS Total,
    CAST(ROUND(SUM(CASE WHEN Attrition = 'Yes' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS TaxaAttrition
FROM Colaboradores
GROUP BY JobRole
ORDER BY TaxaAttrition DESC;

Cargo,Saidas,Total,TaxaAttrition
Sales Representative,33,83,39.8
Laboratory Technician,62,259,23.9
Human Resources,12,52,23.1
Sales Executive,57,326,17.5
Research Scientist,47,292,16.1
Healthcare Representative,9,131,6.9
Manufacturing Director,10,145,6.9
Manager,5,102,4.9
Research Director,2,80,2.5


## 5. ATTRITION POR CARACTERÍSTICAS DEMOGRÁFICAS

### Por género

In [6]:
%%sql
SELECT
    Gender AS Genero,
    SUM(CASE WHEN Attrition = 'Yes' THEN 1 ELSE 0 END) AS Saidas,
    COUNT(*) AS Total,
    CAST(ROUND(SUM(CASE WHEN Attrition = 'Yes' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS TaxaAttrition
FROM Colaboradores
GROUP BY Gender;

Genero,Saidas,Total,TaxaAttrition
Male,150,882,17.0
Female,87,588,14.8


### Por estado civil

In [7]:
%%sql
SELECT
    MaritalStatus AS EstadoCivil,
    SUM(CASE WHEN Attrition = 'Yes' THEN 1 ELSE 0 END) AS Saidas,
    COUNT(*) AS Total,
    CAST(ROUND(SUM(CASE WHEN Attrition = 'Yes' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS TaxaAttrition
FROM Colaboradores
GROUP BY MaritalStatus
ORDER BY TaxaAttrition DESC;

EstadoCivil,Saidas,Total,TaxaAttrition
Single,120,470,25.5
Married,84,673,12.5
Divorced,33,327,10.1


### Por faixa etária

In [8]:
%%sql
SELECT
    CASE
        WHEN Age < 25 THEN '18-24'
        WHEN Age < 30 THEN '25-29'
        WHEN Age < 35 THEN '30-34'
        WHEN Age < 40 THEN '35-39'
        WHEN Age < 50 THEN '40-49'
        ELSE '50+'
    END AS FaixaEtaria,
    SUM(CASE WHEN Attrition = 'Yes' THEN 1 ELSE 0 END) AS Saidas,
    COUNT(*) AS Total,
    CAST(ROUND(SUM(CASE WHEN Attrition = 'Yes' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS TaxaAttrition
FROM Colaboradores
GROUP BY
    CASE
        WHEN Age < 25 THEN '18-24'
        WHEN Age < 30 THEN '25-29'
        WHEN Age < 35 THEN '30-34'
        WHEN Age < 40 THEN '35-39'
        WHEN Age < 50 THEN '40-49'
        ELSE '50+'
    END
ORDER BY TaxaAttrition DESC;

FaixaEtaria,Saidas,Total,TaxaAttrition
18-24,38,97,39.2
25-29,53,229,23.1
30-34,59,325,18.2
50+,23,173,13.3
35-39,30,297,10.1
40-49,34,349,9.7


## 6. FACTORES DE RISCO PARA ATTRITION

### 6.1 Overtime

In [9]:
%%sql
SELECT
    OverTime,
    SUM(CASE WHEN Attrition = 'Yes' THEN 1 ELSE 0 END) AS Saidas,
    COUNT(*) AS Total,
    CAST(ROUND(SUM(CASE WHEN Attrition = 'Yes' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS TaxaAttrition
FROM Colaboradores
GROUP BY OverTime;

OverTime,Saidas,Total,TaxaAttrition
Yes,127,416,30.5
No,110,1054,10.4


### 6.2 Business Travel

In [10]:
%%sql
SELECT
    BusinessTravel AS TipoViagem,
    SUM(CASE WHEN Attrition = 'Yes' THEN 1 ELSE 0 END) AS Saidas,
    COUNT(*) AS Total,
    CAST(ROUND(SUM(CASE WHEN Attrition = 'Yes' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS TaxaAttrition
FROM Colaboradores
GROUP BY BusinessTravel
ORDER BY TaxaAttrition DESC;

TipoViagem,Saidas,Total,TaxaAttrition
Travel_Frequently,69,277,24.9
Travel_Rarely,156,1043,15.0
Non-Travel,12,150,8.0


### 6.3 Distância de Casa

In [11]:
%%sql
SELECT
    CASE
        WHEN DistanceFromHome <= 5 THEN '0-5 (Muito Perto)'
        WHEN DistanceFromHome <= 10 THEN '6-10 (Perto)'
        WHEN DistanceFromHome <= 20 THEN '11-20 (Médio)'
        ELSE '20+ (Longe)'
    END AS Distancia,
    SUM(CASE WHEN Attrition = 'Yes' THEN 1 ELSE 0 END) AS Saidas,
    COUNT(*) AS Total,
    CAST(ROUND(SUM(CASE WHEN Attrition = 'Yes' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS TaxaAttrition
FROM Colaboradores
GROUP BY
    CASE
        WHEN DistanceFromHome <= 5 THEN '0-5 (Muito Perto)'
        WHEN DistanceFromHome <= 10 THEN '6-10 (Perto)'
        WHEN DistanceFromHome <= 20 THEN '11-20 (Médio)'
        ELSE '20+ (Longe)'
    END
ORDER BY TaxaAttrition DESC;

Distancia,Saidas,Total,TaxaAttrition
20+ (Longe),45,204,22.1
11-20 (Médio),48,240,20.0
6-10 (Perto),57,394,14.5
0-5 (Muito Perto),87,632,13.8


### 6.4 Anos sem Promoção

In [12]:
%%sql
SELECT
    CASE
        WHEN YearsSinceLastPromotion = 0 THEN '0 anos'
        WHEN YearsSinceLastPromotion <= 2 THEN '1-2 anos'
        WHEN YearsSinceLastPromotion <= 5 THEN '3-5 anos'
        ELSE '5+ anos'
    END AS AnosSemPromocao,
    SUM(CASE WHEN Attrition = 'Yes' THEN 1 ELSE 0 END) AS Saidas,
    COUNT(*) AS Total,
    CAST(ROUND(SUM(CASE WHEN Attrition = 'Yes' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS TaxaAttrition
FROM Colaboradores
GROUP BY
    CASE
        WHEN YearsSinceLastPromotion = 0 THEN '0 anos'
        WHEN YearsSinceLastPromotion <= 2 THEN '1-2 anos'
        WHEN YearsSinceLastPromotion <= 5 THEN '3-5 anos'
        ELSE '5+ anos'
    END
ORDER BY TaxaAttrition DESC;

AnosSemPromocao,Saidas,Total,TaxaAttrition
0 anos,110,581,18.9
5+ anos,35,215,16.3
1-2 anos,76,516,14.7
3-5 anos,16,158,10.1


### 6.5 Nível de Satisfação

In [13]:
%%sql
SELECT
    CASE
        WHEN JobSatisfaction = 1 THEN '1-Low'
        WHEN JobSatisfaction = 2 THEN '2-Medium'
        WHEN JobSatisfaction = 3 THEN '3-High'
        WHEN JobSatisfaction = 4 THEN '4-Very High'
    END AS SatisfacaoTrabalho,
    SUM(CASE WHEN Attrition = 'Yes' THEN 1 ELSE 0 END) AS Saidas,
    COUNT(*) AS Total,
    CAST(ROUND(SUM(CASE WHEN Attrition = 'Yes' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS TaxaAttrition
FROM Colaboradores
GROUP BY JobSatisfaction
ORDER BY JobSatisfaction;

SatisfacaoTrabalho,Saidas,Total,TaxaAttrition
1-Low,66,289,22.8
2-Medium,46,280,16.4
3-High,73,442,16.5
4-Very High,52,459,11.3


### 6.6 Work-Life Balance

In [14]:
%%sql
SELECT
    CASE
        WHEN WorkLifeBalance = 1 THEN '1-Bad'
        WHEN WorkLifeBalance = 2 THEN '2-Good'
        WHEN WorkLifeBalance = 3 THEN '3-Better'
        WHEN WorkLifeBalance = 4 THEN '4-Best'
    END AS WorkLifeBalance,
    SUM(CASE WHEN Attrition = 'Yes' THEN 1 ELSE 0 END) AS Saidas,
    COUNT(*) AS Total,
    CAST(ROUND(SUM(CASE WHEN Attrition = 'Yes' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS TaxaAttrition
FROM Colaboradores
GROUP BY WorkLifeBalance
ORDER BY WorkLifeBalance;

WorkLifeBalance,Saidas,Total,TaxaAttrition
1-Bad,25,80,31.3
2-Good,58,344,16.9
3-Better,127,893,14.2
4-Best,27,153,17.6


### 6.7 Faixa Salarial

In [15]:
%%sql
SELECT
    CASE
        WHEN MonthlyIncome < 3000 THEN 'Baixo (<3000)'
        WHEN MonthlyIncome < 5000 THEN 'Médio-Baixo (3000-5000)'
        WHEN MonthlyIncome < 8000 THEN 'Médio (5000-8000)'
        WHEN MonthlyIncome < 12000 THEN 'Alto (8000-12000)'
        ELSE 'Muito Alto (>12000)'
    END AS FaixaSalarial,
    SUM(CASE WHEN Attrition = 'Yes' THEN 1 ELSE 0 END) AS Saidas,
    COUNT(*) AS Total,
    CAST(ROUND(SUM(CASE WHEN Attrition = 'Yes' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS DECIMAL(5,1)) AS TaxaAttrition
FROM Colaboradores
GROUP BY
    CASE
        WHEN MonthlyIncome < 3000 THEN 'Baixo (<3000)'
        WHEN MonthlyIncome < 5000 THEN 'Médio-Baixo (3000-5000)'
        WHEN MonthlyIncome < 8000 THEN 'Médio (5000-8000)'
        WHEN MonthlyIncome < 12000 THEN 'Alto (8000-12000)'
        ELSE 'Muito Alto (>12000)'
    END
ORDER BY TaxaAttrition DESC;

FaixaSalarial,Saidas,Total,TaxaAttrition
Baixo (<3000),113,395,28.6
Alto (8000-12000),29,186,15.6
Médio-Baixo (3000-5000),50,354,14.1
Médio (5000-8000),34,340,10.0
Muito Alto (>12000),11,195,5.6


## 7. ANÁLISE COMBINADA DE FACTORES DE RISCO

### Perfil completo de quem saiu

In [16]:
%%sql
SELECT
    'Perfil de quem SAIU' AS Categoria,
    ROUND(AVG(CAST(Age AS FLOAT)), 1) AS IdadeMedia,
    CAST(ROUND(SUM(CASE WHEN OverTime = 'Yes' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS VARCHAR(10)) + '%' AS PercOvertime,
    CAST(ROUND(SUM(CASE WHEN BusinessTravel = 'Travel_Frequently' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS VARCHAR(10)) + '%' AS PercTravelFreq,
    CAST(ROUND(SUM(CASE WHEN MaritalStatus = 'Single' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS VARCHAR(10)) + '%' AS PercSolteiros,
    ROUND(AVG(CAST(JobSatisfaction AS FLOAT)), 2) AS SatisfacaoMedia,
    ROUND(AVG(CAST(MonthlyIncome AS FLOAT)), 0) AS SalarioMedio,
    ROUND(AVG(CAST(YearsAtCompany AS FLOAT)), 1) AS AnosEmpresaMedia
FROM Colaboradores
WHERE Attrition = 'Yes'
UNION ALL
SELECT
    'Perfil de quem FICOU',
    ROUND(AVG(CAST(Age AS FLOAT)), 1),
    CAST(ROUND(SUM(CASE WHEN OverTime = 'Yes' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS VARCHAR(10)) + '%',
    CAST(ROUND(SUM(CASE WHEN BusinessTravel = 'Travel_Frequently' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS VARCHAR(10)) + '%',
    CAST(ROUND(SUM(CASE WHEN MaritalStatus = 'Single' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS VARCHAR(10)) + '%',
    ROUND(AVG(CAST(JobSatisfaction AS FLOAT)), 2),
    ROUND(AVG(CAST(MonthlyIncome AS FLOAT)), 0),
    ROUND(AVG(CAST(YearsAtCompany AS FLOAT)), 1)
FROM Colaboradores
WHERE Attrition = 'No';

Categoria,IdadeMedia,PercOvertime,PercTravelFreq,PercSolteiros,SatisfacaoMedia,SalarioMedio,AnosEmpresaMedia
Perfil de quem SAIU,33.6,53.600000%,29.100000%,50.600000%,2.47,4787.0,5.1
Perfil de quem FICOU,37.6,23.400000%,16.900000%,28.400000%,2.78,6833.0,7.4


### Colaboradores em maior risco (múltiplos factores)

In [17]:
%%sql
SELECT
    EmployeeNumber,
    Department,
    JobRole,
    Age,
    MonthlyIncome,
    OverTime,
    BusinessTravel,
    JobSatisfaction,
    WorkLifeBalance,
    YearsSinceLastPromotion,
    -- Contagem de factores de risco
    (CASE WHEN OverTime = 'Yes' THEN 1 ELSE 0 END +
     CASE WHEN BusinessTravel = 'Travel_Frequently' THEN 1 ELSE 0 END +
     CASE WHEN JobSatisfaction <= 2 THEN 1 ELSE 0 END +
     CASE WHEN WorkLifeBalance <= 2 THEN 1 ELSE 0 END +
     CASE WHEN YearsSinceLastPromotion >= 5 THEN 1 ELSE 0 END +
     CASE WHEN MonthlyIncome < 3000 THEN 1 ELSE 0 END) AS NumFactoresRisco
FROM Colaboradores
WHERE Attrition = 'No'  -- Colaboradores que ainda não saíram
ORDER BY NumFactoresRisco DESC, JobSatisfaction ASC;

EmployeeNumber,Department,JobRole,Age,MonthlyIncome,OverTime,BusinessTravel,JobSatisfaction,WorkLifeBalance,YearsSinceLastPromotion,NumFactoresRisco
124,Research & Development,Healthcare Representative,46,10673,Yes,Travel_Frequently,1,2,9,5
1244,Research & Development,Research Scientist,27,2235,Yes,Travel_Rarely,1,2,6,5
1282,Sales,Manager,51,19847,Yes,Travel_Frequently,2,2,11,5
238,Research & Development,Laboratory Technician,36,2088,No,Travel_Frequently,2,2,7,5
351,Research & Development,Laboratory Technician,42,2593,Yes,Travel_Rarely,1,3,7,4
145,Research & Development,Healthcare Representative,51,7484,No,Travel_Frequently,1,2,12,4
199,Research & Development,Manager,41,17181,No,Travel_Frequently,1,2,7,4
10,Research & Development,Laboratory Technician,59,2670,Yes,Travel_Rarely,1,2,0,4
88,Research & Development,Research Scientist,35,2194,No,Travel_Frequently,1,2,1,4
532,Sales,Sales Executive,56,13212,No,Travel_Frequently,1,2,7,4


## 8. RESUMO EXECUTIVO - ATTRITION

In [18]:
%%sql
SELECT '=== RESUMO ATTRITION ===' AS Info;

Info
=== RESUMO ATTRITION ===


### Taxa global

In [19]:
%%sql
SELECT
    'Taxa de Attrition Global' AS Metrica,
    CAST(ROUND(SUM(CASE WHEN Attrition = 'Yes' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS VARCHAR(10)) + '%' AS Valor
FROM Colaboradores;

Metrica,Valor
Taxa de Attrition Global,16.100000%


### Top 3 factores de risco

In [20]:
%%sql
SELECT
    'TOP FACTORES DE RISCO' AS Categoria,
    'Overtime' AS Factor1,
    'Viagens Frequentes' AS Factor2,
    'Salário Baixo' AS Factor3;

Categoria,Factor1,Factor2,Factor3
TOP FACTORES DE RISCO,Overtime,Viagens Frequentes,Salário Baixo


### Cargo com maior attrition

In [21]:
%%sql
SELECT TOP 1
    'Cargo com Maior Attrition' AS Metrica,
    JobRole AS Cargo,
    CAST(ROUND(SUM(CASE WHEN Attrition = 'Yes' THEN 1.0 ELSE 0 END) * 100 / COUNT(*), 1) AS VARCHAR(10)) + '%' AS TaxaAttrition
FROM Colaboradores
GROUP BY JobRole
ORDER BY SUM(CASE WHEN Attrition = 'Yes' THEN 1.0 ELSE 0 END) * 100 / COUNT(*) DESC;

Metrica,Cargo,TaxaAttrition
Cargo com Maior Attrition,Sales Representative,39.800000%


### Número de colaboradores em risco (3+ factores)

In [22]:
%%sql
SELECT
    'Colaboradores em Risco (3+ factores)' AS Metrica,
    COUNT(*) AS Total
FROM Colaboradores
WHERE Attrition = 'No'
  AND (CASE WHEN OverTime = 'Yes' THEN 1 ELSE 0 END +
       CASE WHEN BusinessTravel = 'Travel_Frequently' THEN 1 ELSE 0 END +
       CASE WHEN JobSatisfaction <= 2 THEN 1 ELSE 0 END +
       CASE WHEN WorkLifeBalance <= 2 THEN 1 ELSE 0 END +
       CASE WHEN YearsSinceLastPromotion >= 5 THEN 1 ELSE 0 END +
       CASE WHEN MonthlyIncome < 3000 THEN 1 ELSE 0 END) >= 3;

Metrica,Total
Colaboradores em Risco (3+ factores),164
