# Manipulación y tranformacion de datos 

In [1]:
import pandas as pd

## Carga de datos: Leer el archivo CSV y cargarlo en un DataFrame de pandas.

In [2]:
empleados = pd.read_csv('data/Employee_Salaries.csv')

## Exploración inicial
- Verificar la cantidad de registros y columnas en el DataFrame.

In [3]:
print("Cantidad de registros:", len(empleados))

Cantidad de registros: 10291


In [4]:
print("Columnas:", empleados.columns)

Columnas: Index(['Department', 'Department_Name', 'Division', 'Gender', 'Base_Salary',
       'Overtime_Pay', 'Longevity_Pay', 'Grade'],
      dtype='object')


## Mostrar las primeras filas del DataFrame para entender la estructura de los datos.

In [5]:
print("\nPrimeras filas:")


Primeras filas:


In [12]:
empleados.head()

Unnamed: 0,Department,Department_Name,Division,Gender,Base_Salary,Overtime_Pay,Longevity_Pay,Grade
0,ABS,Alcohol Beverage Services,ABS 85 Administration,M,175873.0,0.0,0.0,M2
1,ABS,Alcohol Beverage Services,ABS 85 Administration,M,145613.36,0.0,0.0,M3
2,ABS,Alcohol Beverage Services,ABS 85 Administration,F,136970.0,0.0,0.0,M3
3,ABS,Alcohol Beverage Services,ABS 85 Administrative Services,F,89432.694,0.0,2490.0,21
4,ABS,Alcohol Beverage Services,ABS 85 Administrative Services,F,78947.0,456.68,6257.7,16


## Comprobar si hay valores nulos o faltantes en alguna columna.

In [7]:
print("\nValores nulos:")


Valores nulos:


In [8]:
print(empleados.isnull().sum())

Department          0
Department_Name     0
Division            0
Gender              0
Base_Salary         0
Overtime_Pay        0
Longevity_Pay       0
Grade              33
dtype: int64


# Transformación de datos
- Crear una nueva columna llamada "Salario_Mensual" que contenga el salario mensual de cada empleado (dividiendo el salario anual entre 12)

In [28]:
empleados["Salario_mensual"] = empleados["Base_Salary"] / 12

In [29]:
empleados

Unnamed: 0,Department,Department_Name,Division,Gender,Base_Salary,Overtime_Pay,Longevity_Pay,Grade,Salario_anual,Salario_mensual
0,ABS,Alcohol Beverage Services,ABS 85 Administration,M,175873.0000,0.00,0.00,M2,14656.083333,14656.083333
1,ABS,Alcohol Beverage Services,ABS 85 Administration,M,145613.3600,0.00,0.00,M3,12134.446667,12134.446667
2,ABS,Alcohol Beverage Services,ABS 85 Administration,F,136970.0000,0.00,0.00,M3,11414.166667,11414.166667
3,ABS,Alcohol Beverage Services,ABS 85 Administrative Services,F,89432.6940,0.00,2490.00,21,7452.724500,7452.724500
4,ABS,Alcohol Beverage Services,ABS 85 Administrative Services,F,78947.0000,456.68,6257.70,16,6578.916667,6578.916667
...,...,...,...,...,...,...,...,...,...,...
10286,TBS,Department of Technology and Enterprise Busine...,TBS 34 OSP Low Code Governance and Administration,M,134500.0000,0.00,0.00,N28,11208.333333,11208.333333
10287,ZAH,Office of Zoning and Administrative Hearings,ZAH 05 Director,F,215032.1000,0.00,0.00,,17919.341667,17919.341667
10288,ZAH,Office of Zoning and Administrative Hearings,ZAH 05 Office of Zoning and Administrative Hea...,F,84365.5529,0.00,0.00,N18,7030.462742,7030.462742
10289,ZAH,Office of Zoning and Administrative Hearings,ZAH 05 Zoning and Administrative Hearings,F,98228.0000,0.00,1596.27,N21,8185.666667,8185.666667


## Calcular el promedio de salario por departamento.

In [30]:
promedio_horas_por_departamento = empleados.groupby("Department_Name")["Salario_mensual"].mean()

In [31]:
promedio_horas_por_departamento

Department_Name
Alcohol Beverage Services                                      5404.434987
Board of Appeals Department                                    8706.877244
Board of Elections                                             7126.428071
Community Engagement Cluster                                   6480.344856
Community Use of Public Facilities                             7625.927152
Correction and Rehabilitation                                  7080.015017
County Attorney's Office                                      10806.739197
County Council                                                 8812.880424
Department of Environmental Protection                         8720.372235
Department of Finance                                          9594.153443
Department of General Services                                 7668.492105
Department of Health and Human Services                        7422.027654
Department of Housing and Community Affairs                    8708.815398
Departmen

In [32]:
empleados_mas_de_160_horas = empleados[empleados["Salario_mensual"] > 160]


In [33]:
empleados_mas_de_160_horas

Unnamed: 0,Department,Department_Name,Division,Gender,Base_Salary,Overtime_Pay,Longevity_Pay,Grade,Salario_anual,Salario_mensual
0,ABS,Alcohol Beverage Services,ABS 85 Administration,M,175873.0000,0.00,0.00,M2,14656.083333,14656.083333
1,ABS,Alcohol Beverage Services,ABS 85 Administration,M,145613.3600,0.00,0.00,M3,12134.446667,12134.446667
2,ABS,Alcohol Beverage Services,ABS 85 Administration,F,136970.0000,0.00,0.00,M3,11414.166667,11414.166667
3,ABS,Alcohol Beverage Services,ABS 85 Administrative Services,F,89432.6940,0.00,2490.00,21,7452.724500,7452.724500
4,ABS,Alcohol Beverage Services,ABS 85 Administrative Services,F,78947.0000,456.68,6257.70,16,6578.916667,6578.916667
...,...,...,...,...,...,...,...,...,...,...
10286,TBS,Department of Technology and Enterprise Busine...,TBS 34 OSP Low Code Governance and Administration,M,134500.0000,0.00,0.00,N28,11208.333333,11208.333333
10287,ZAH,Office of Zoning and Administrative Hearings,ZAH 05 Director,F,215032.1000,0.00,0.00,,17919.341667,17919.341667
10288,ZAH,Office of Zoning and Administrative Hearings,ZAH 05 Office of Zoning and Administrative Hea...,F,84365.5529,0.00,0.00,N18,7030.462742,7030.462742
10289,ZAH,Office of Zoning and Administrative Hearings,ZAH 05 Zoning and Administrative Hearings,F,98228.0000,0.00,1596.27,N21,8185.666667,8185.666667


## Agregación de datos
- Calcular el salario total pagado por la empresa en el último mes.

In [51]:
empleados ['Salario_bruto'] = empleados ['Salario_mensual'] + empleados ['Overtime_Pay'] /12 + empleados['Longevity_Pay'] / 12

In [57]:
nomina = empleados['Salario_bruto'].sum()

In [58]:
nomina

85696019.19696666

In [59]:
print(f'La nomina mensual es de {nomina}')

La nomina mensual es de 85696019.19696666


# Calcular el total de empleados en cada departamento.

In [63]:
empleados .groupby("Department_Name")["Department"].count().sort_values(ascending=False)

Department_Name
Department of Health and Human Services                       1877
Department of Police                                          1794
Fire and Rescue Services                                      1440
Department of Transportation                                  1289
Correction and Rehabilitation                                  513
Alcohol Beverage Services                                      462
Department of Public Libraries                                 427
Department of General Services                                 424
Department of Permitting Services                              243
Department of Recreation                                       193
Sheriff's Office                                               188
Department of Environmental Protection                         185
Department of Technology and Enterprise Business Solutions     159
County Council                                                 131
Department of Finance                         

In [62]:
empleados['Department_Name'].value_counts()

Department_Name
Department of Health and Human Services                       1877
Department of Police                                          1794
Fire and Rescue Services                                      1440
Department of Transportation                                  1289
Correction and Rehabilitation                                  513
Alcohol Beverage Services                                      462
Department of Public Libraries                                 427
Department of General Services                                 424
Department of Permitting Services                              243
Department of Recreation                                       193
Sheriff's Office                                               188
Department of Environmental Protection                         185
Department of Technology and Enterprise Business Solutions     159
County Council                                                 131
Department of Finance                         