In [1]:
import pandas as pd

df = pd.read_csv('aggregation_data.csv')

print(df.head())

      Name Department  Salary
0    Alice         HR   50000
1      Bob         IT   60000
2  Charlie    Finance   70000
3    David         IT   55000
4      Eve         HR   80000


In [2]:
import pandas as pd

data = {
    'Department': ['HR', 'Engineering', 'Marketing', 'HR', 'Engineering', 'Marketing', 'Engineering'],
    'Salary': [60000, 85000, 70000, 62000, 90000, 75000, 92000]
}
df = pd.DataFrame(data)

total_salary = df['Salary'].sum()
print(f"Total salary paid across all departments: {total_salary}")

average_salary = df['Salary'].mean()
print(f"Average salary in the dataset: {average_salary}")

min_salary = df['Salary'].min()
print(f"Minimum salary: {min_salary}")

max_salary = df['Salary'].max()
print(f"Maximum salary: {max_salary}")

num_employees = df['Salary'].count() # or len(df)
print(f"Total number of employees: {num_employees}")

Total salary paid across all departments: 534000
Average salary in the dataset: 76285.71428571429
Minimum salary: 60000
Maximum salary: 92000
Total number of employees: 7


In [3]:
import pandas as pd

data = {'Department': ['HR', 'IT', 'HR', 'IT', 'Finance', 'Finance'],
        'Salary': [60000, 75000, 65000, 80000, 70000, 72000]}
df = pd.DataFrame(data)


department_summary = df.groupby('Department')['Salary'].agg(['sum', 'mean', 'count'])

print(department_summary)

               sum     mean  count
Department                        
Finance     142000  71000.0      2
HR          125000  62500.0      2
IT          155000  77500.0      2


In [4]:
import pandas as pd


data = {'Department': ['HR', 'IT', 'HR', 'Finance', 'IT', 'Finance', 'HR'],
        'Salary': [50000, 70000, 55000, 60000, 75000, 62000, 52000]}
df = pd.DataFrame(data)

result = df.groupby('Department')['Salary'].agg(['sum', 'mean', 'min', 'max', 'count'])

print(result)

               sum          mean    min    max  count
Department                                           
Finance     122000  61000.000000  60000  62000      2
HR          157000  52333.333333  50000  55000      3
IT          145000  72500.000000  70000  75000      2


In [5]:
import pandas as pd

data = {
    'Department': ['Sales', 'Marketing', 'Sales', 'IT', 'Marketing', 'IT', 'Sales', 'HR', 'IT', 'HR'],
    'Salary': [50000, 60000, 55000, 70000, 62000, 75000, 53000, 48000, 80000, 52000]
}

df = pd.DataFrame(data)

average_salaries_by_department = df.groupby('Department')['Salary'].mean()

highest_average_salary_department = average_salaries_by_department.idxmax()
highest_average_salary_value = average_salaries_by_department.max()

print("Average salary by department:")
print(average_salaries_by_department)
print(f"\nDepartment with the highest average salary: {highest_average_salary_department}")
print(f"Highest average salary: {highest_average_salary_value}")

Average salary by department:
Department
HR           50000.000000
IT           75000.000000
Marketing    61000.000000
Sales        52666.666667
Name: Salary, dtype: float64

Department with the highest average salary: IT
Highest average salary: 75000.0


In [6]:
import pandas as pd
import numpy as np

data = {'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
        'salary': [75000, 60000, 45000, 55000, 80000]}
df = pd.DataFrame(data)

conditions = [
    (df['salary'] >= 70000),
    (df['salary'] >= 55000) & (df['salary'] < 70000),
    (df['salary'] < 55000)
]
choices = ['High', 'Medium', 'Low']

df['Salary_Level'] = np.select(conditions, choices, default='Unknown')

print(df)

      Name  salary Salary_Level
0    Alice   75000         High
1      Bob   60000       Medium
2  Charlie   45000          Low
3    David   55000       Medium
4      Eva   80000         High


In [7]:
import pandas as pd

data = {
    'Employee_ID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'Salary': [45000, 70000, 30000, 90000, 55000, 120000, 40000, 80000, 60000, 110000]
}
df = pd.DataFrame(data)

bins = [0, 49999, 99999, float('inf')]
labels = ['Entry-level', 'Mid-level', 'Senior-level']
df['Salary Level'] = pd.cut(df['Salary'], bins=bins, labels=labels, right=True)

salary_level_agg = df.groupby('Salary Level').agg(
    Employee_Count=('Employee_ID', 'count'),  # Count employees in each level
    Average_Salary=('Salary', 'mean')         # Calculate average salary in each level
).reset_index()

print(salary_level_agg)

   Salary Level  Employee_Count  Average_Salary
0   Entry-level               3    38333.333333
1     Mid-level               5    71000.000000
2  Senior-level               2   115000.000000


  salary_level_agg = df.groupby('Salary Level').agg(


In [9]:
import pandas as pd

data = {'Department': ['HR', 'IT', 'HR', 'IT', 'Finance', 'HR'],
        'Salary': [50000, 70000, 60000, 80000, 75000, 55000],
        'Years_Experience': [5, 8, 7, 10, 9, 6]}
df = pd.DataFrame(data)

department_aggregates = df.groupby('Department').agg(
    Total_Salary=('Salary', 'sum'),
    Average_Salary=('Salary', 'mean'),
    Min_Salary=('Salary', 'min'),
    Max_Salary=('Salary', 'max'),
    Employee_Count=('Department', 'count'),
    Average_Years_Experience=('Years_Experience', 'mean')
)

department_aggregates.to_csv('department_aggregates.csv', index=False)

print("Aggregated report saved to 'department_aggregates.csv'")

Aggregated report saved to 'department_aggregates.csv'
