# Analise Exploratoria - "Quem Somos"
**Projeto 1: IBM HR Analytics - AiDAPT - Cegid Academy**

Visao geral da empresa: demografia, estrutura organizacional, salarios e antiguidade.
9 seccoes cobrindo todos os aspectos da populacao.

Base de dados: Projeto1_IBM_HR

In [1]:
import os
from dotenv import load_dotenv, find_dotenv
from urllib.parse import quote_plus
from sqlalchemy import create_engine
load_dotenv(find_dotenv())

%load_ext sql

host = os.getenv('MSSQL_HOST', 'localhost')
port = os.getenv('MSSQL_PORT', '1433')
user = os.getenv('MSSQL_USER', 'sa')
password = quote_plus(os.getenv('MSSQL_PASSWORD', 'your_password_here'))
engine = create_engine(f"mssql+pymssql://{user}:{password}@{host}:{port}/Projeto1_IBM_HR")
%sql engine --alias Projeto1_IBM_HR

## 1. VISÃO GERAL DA EMPRESA

### Total de colaboradores

In [2]:
%%sql
SELECT COUNT(*) AS TotalColaboradores FROM Colaboradores;

TotalColaboradores
1470


### Distribuição por Attrition (saídas)

In [3]:
%%sql
SELECT
    Attrition,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Colaboradores), 2) AS DECIMAL(5,2)) AS Percentagem
FROM Colaboradores
GROUP BY Attrition;

Attrition,Total,Percentagem
No,1233,83.88
Yes,237,16.12


## 2. DEMOGRAFIA - IDADE

### Estatísticas de idade

In [4]:
%%sql
SELECT
    MIN(Age) AS IdadeMinima,
    MAX(Age) AS IdadeMaxima,
    AVG(Age) AS IdadeMedia,
    STDEV(Age) AS DesvioPadrao
FROM Colaboradores;

IdadeMinima,IdadeMaxima,IdadeMedia,DesvioPadrao
18,60,36,9.135373489136729


### Distribuição por faixa etária

In [5]:
%%sql
SELECT
    CASE
        WHEN Age < 25 THEN '18-24 (Entrada)'
        WHEN Age < 30 THEN '25-29 (Jovem)'
        WHEN Age < 35 THEN '30-34'
        WHEN Age < 40 THEN '35-39'
        WHEN Age < 45 THEN '40-44'
        WHEN Age < 50 THEN '45-49'
        WHEN Age < 55 THEN '50-54'
        WHEN Age < 60 THEN '55-59 (Pré-reforma)'
        ELSE '60+ (Reforma próxima)'
    END AS FaixaEtaria,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Colaboradores), 2) AS DECIMAL(5,2)) AS Percentagem
FROM Colaboradores
GROUP BY
    CASE
        WHEN Age < 25 THEN '18-24 (Entrada)'
        WHEN Age < 30 THEN '25-29 (Jovem)'
        WHEN Age < 35 THEN '30-34'
        WHEN Age < 40 THEN '35-39'
        WHEN Age < 45 THEN '40-44'
        WHEN Age < 50 THEN '45-49'
        WHEN Age < 55 THEN '50-54'
        WHEN Age < 60 THEN '55-59 (Pré-reforma)'
        ELSE '60+ (Reforma próxima)'
    END
ORDER BY MIN(Age);

FaixaEtaria,Total,Percentagem
18-24 (Entrada),97,6.6
25-29 (Jovem),229,15.58
30-34,325,22.11
35-39,297,20.2
40-44,208,14.15
45-49,141,9.59
50-54,104,7.07
55-59 (Pré-reforma),64,4.35
60+ (Reforma próxima),5,0.34


## 3. DEMOGRAFIA - GÉNERO

### Distribuição por género

In [6]:
%%sql
SELECT
    Gender AS Genero,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Colaboradores), 2) AS DECIMAL(5,2)) AS Percentagem
FROM Colaboradores
GROUP BY Gender;

Genero,Total,Percentagem
Female,588,40.0
Male,882,60.0


## 4. DEMOGRAFIA - ESTADO CIVIL

In [7]:
%%sql
SELECT
    MaritalStatus AS EstadoCivil,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Colaboradores), 2) AS DECIMAL(5,2)) AS Percentagem
FROM Colaboradores
GROUP BY MaritalStatus
ORDER BY Total DESC;

EstadoCivil,Total,Percentagem
Married,673,45.78
Single,470,31.97
Divorced,327,22.24


## 5. EDUCAÇÃO

### Nível de educação

In [8]:
%%sql
SELECT
    Education,
    CASE Education
        WHEN 1 THEN 'Below College'
        WHEN 2 THEN 'College'
        WHEN 3 THEN 'Bachelor'
        WHEN 4 THEN 'Master'
        WHEN 5 THEN 'Doctor'
    END AS NivelEducacao,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Colaboradores), 2) AS DECIMAL(5,2)) AS Percentagem
FROM Colaboradores
GROUP BY Education
ORDER BY Education;

Education,NivelEducacao,Total,Percentagem
1,Below College,170,11.56
2,College,282,19.18
3,Bachelor,572,38.91
4,Master,398,27.07
5,Doctor,48,3.27


### Área de educação

In [9]:
%%sql
SELECT
    EducationField AS AreaEducacao,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Colaboradores), 2) AS DECIMAL(5,2)) AS Percentagem
FROM Colaboradores
GROUP BY EducationField
ORDER BY Total DESC;

AreaEducacao,Total,Percentagem
Life Sciences,606,41.22
Medical,464,31.56
Marketing,159,10.82
Technical Degree,132,8.98
Other,82,5.58
Human Resources,27,1.84


## 6. ESTRUTURA ORGANIZACIONAL

### Distribuição por departamento

In [10]:
%%sql
SELECT
    Department AS Departamento,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Colaboradores), 2) AS DECIMAL(5,2)) AS Percentagem
FROM Colaboradores
GROUP BY Department
ORDER BY Total DESC;

Departamento,Total,Percentagem
Research & Development,961,65.37
Sales,446,30.34
Human Resources,63,4.29


### Distribuição por cargo (JobRole)

In [11]:
%%sql
SELECT
    JobRole AS Cargo,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Colaboradores), 2) AS DECIMAL(5,2)) AS Percentagem
FROM Colaboradores
GROUP BY JobRole
ORDER BY Total DESC;

Cargo,Total,Percentagem
Sales Executive,326,22.18
Research Scientist,292,19.86
Laboratory Technician,259,17.62
Manufacturing Director,145,9.86
Healthcare Representative,131,8.91
Manager,102,6.94
Sales Representative,83,5.65
Research Director,80,5.44
Human Resources,52,3.54


### Distribuição por nível hierárquico

In [12]:
%%sql
SELECT
    JobLevel AS NivelHierarquico,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Colaboradores), 2) AS DECIMAL(5,2)) AS Percentagem
FROM Colaboradores
GROUP BY JobLevel
ORDER BY JobLevel;

NivelHierarquico,Total,Percentagem
1,543,36.94
2,534,36.33
3,218,14.83
4,106,7.21
5,69,4.69


### Cargos por departamento

In [13]:
%%sql
SELECT
    Department AS Departamento,
    JobRole AS Cargo,
    COUNT(*) AS Total
FROM Colaboradores
GROUP BY Department, JobRole
ORDER BY Department, Total DESC;

Departamento,Cargo,Total
Human Resources,Human Resources,52
Human Resources,Manager,11
Research & Development,Research Scientist,292
Research & Development,Laboratory Technician,259
Research & Development,Manufacturing Director,145
Research & Development,Healthcare Representative,131
Research & Development,Research Director,80
Research & Development,Manager,54
Sales,Sales Executive,326
Sales,Sales Representative,83


## 7. ANTIGUIDADE

### Estatísticas de antiguidade na empresa

In [14]:
%%sql
SELECT
    MIN(YearsAtCompany) AS MinAnosEmpresa,
    MAX(YearsAtCompany) AS MaxAnosEmpresa,
    AVG(YearsAtCompany) AS MediaAnosEmpresa,
    AVG(TotalWorkingYears) AS MediaExperienciaTotal
FROM Colaboradores;

MinAnosEmpresa,MaxAnosEmpresa,MediaAnosEmpresa,MediaExperienciaTotal
0,40,7,11


### Distribuição por anos na empresa

In [15]:
%%sql
SELECT
    CASE
        WHEN YearsAtCompany = 0 THEN '0 (Recém-chegado)'
        WHEN YearsAtCompany <= 2 THEN '1-2 anos'
        WHEN YearsAtCompany <= 5 THEN '3-5 anos'
        WHEN YearsAtCompany <= 10 THEN '6-10 anos'
        WHEN YearsAtCompany <= 20 THEN '11-20 anos'
        ELSE '20+ anos'
    END AS AnosNaEmpresa,
    COUNT(*) AS Total,
    CAST(ROUND(COUNT(*) * 100.0 / (SELECT COUNT(*) FROM Colaboradores), 2) AS DECIMAL(5,2)) AS Percentagem
FROM Colaboradores
GROUP BY
    CASE
        WHEN YearsAtCompany = 0 THEN '0 (Recém-chegado)'
        WHEN YearsAtCompany <= 2 THEN '1-2 anos'
        WHEN YearsAtCompany <= 5 THEN '3-5 anos'
        WHEN YearsAtCompany <= 10 THEN '6-10 anos'
        WHEN YearsAtCompany <= 20 THEN '11-20 anos'
        ELSE '20+ anos'
    END
ORDER BY MIN(YearsAtCompany);

AnosNaEmpresa,Total,Percentagem
0 (Recém-chegado),44,2.99
1-2 anos,298,20.27
3-5 anos,434,29.52
6-10 anos,448,30.48
11-20 anos,180,12.24
20+ anos,66,4.49


## 8. SALÁRIOS

### Estatísticas salariais gerais

In [16]:
%%sql
SELECT
    MIN(MonthlyIncome) AS SalarioMinimo,
    MAX(MonthlyIncome) AS SalarioMaximo,
    AVG(MonthlyIncome) AS SalarioMedio,
    STDEV(MonthlyIncome) AS DesvioPadrao
FROM Colaboradores;

SalarioMinimo,SalarioMaximo,SalarioMedio,DesvioPadrao
1009,19999,6502,4707.956783097994


### Salário médio por departamento

In [17]:
%%sql
SELECT
    Department AS Departamento,
    COUNT(*) AS NumColaboradores,
    MIN(MonthlyIncome) AS SalarioMin,
    AVG(MonthlyIncome) AS SalarioMedio,
    MAX(MonthlyIncome) AS SalarioMax
FROM Colaboradores
GROUP BY Department
ORDER BY SalarioMedio DESC;

Departamento,NumColaboradores,SalarioMin,SalarioMedio,SalarioMax
Sales,446,1052,6959,19847
Human Resources,63,1555,6654,19717
Research & Development,961,1009,6281,19999


### Salário médio por nível hierárquico

In [18]:
%%sql
SELECT
    JobLevel AS Nivel,
    COUNT(*) AS NumColaboradores,
    AVG(MonthlyIncome) AS SalarioMedio
FROM Colaboradores
GROUP BY JobLevel
ORDER BY JobLevel;

Nivel,NumColaboradores,SalarioMedio
1,543,2786
2,534,5502
3,218,9817
4,106,15503
5,69,19191


## 9. RESUMO EXECUTIVO - "QUEM SOMOS"

In [19]:
%%sql
SELECT
    'Total de Colaboradores' AS Metrica,
    CAST(COUNT(*) AS VARCHAR(50)) AS Valor
FROM Colaboradores
UNION ALL
SELECT
    'Média de Idade',
    CAST(ROUND(AVG(CAST(Age AS FLOAT)), 1) AS VARCHAR(50)) + ' anos'
FROM Colaboradores
UNION ALL
SELECT
    '% Mulheres',
    CAST(ROUND(SUM(CASE WHEN Gender = 'Female' THEN 1.0 ELSE 0.0 END) * 100 / COUNT(*), 1) AS VARCHAR(50)) + '%'
FROM Colaboradores
UNION ALL
SELECT
    '% Attrition (Saídas)',
    CAST(ROUND(SUM(CASE WHEN Attrition = 'Yes' THEN 1.0 ELSE 0.0 END) * 100 / COUNT(*), 1) AS VARCHAR(50)) + '%'
FROM Colaboradores
UNION ALL
SELECT
    'Salário Médio',
    CAST(ROUND(AVG(CAST(MonthlyIncome AS FLOAT)), 0) AS VARCHAR(50))
FROM Colaboradores
UNION ALL
SELECT
    'Média Anos na Empresa',
    CAST(ROUND(AVG(CAST(YearsAtCompany AS FLOAT)), 1) AS VARCHAR(50)) + ' anos'
FROM Colaboradores;

Metrica,Valor
Total de Colaboradores,1470
Média de Idade,36.9 anos
% Mulheres,40.000000%
% Attrition (Saídas),16.100000%
Salário Médio,6503
Média Anos na Empresa,7 anos
