In [1]:
import pandas as pd

# Step 1: Create a DataFrame with sample data
data = {
    'EmployeeID': [101, 102, 103, 104, 105],
    'Name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Edward'],
    'Department': ['HR', 'IT', 'Finance', 'IT', 'HR'],
    'Salary': [60000, 80000, 75000, 82000, 58000],
    'JoinDate': ['2015-03-10', '2016-07-24', '2018-11-15', '2019-08-10', '2017-05-30'],
    'PerformanceRating': [4, 5, 3, 5, 4]
}

df = pd.DataFrame(data)

# Converting 'JoinDate' to datetime
df['JoinDate'] = pd.to_datetime(df['JoinDate'])

# Step 3: Add a new column for years of experience
current_year = pd.Timestamp.now().year
df['ExperienceYears'] = current_year - df['JoinDate'].dt.year

# Step 4: Filter employees with Salary > 60,000 and in the IT department
high_salary_it = df[(df['Salary'] > 60000) & (df['Department'] == 'IT')]

# Step 5: Group by Department and calculate average Salary
avg_salary_by_dept = df.groupby('Department')['Salary'].mean()

# Step 6: Sort employees by Performance Rating in descending order
sorted_by_rating = df.sort_values(by='PerformanceRating', ascending=False)

# Step 7: Handle missing data
df.loc[2, 'Salary'] = None
df['Salary'].fillna(df['Salary'].mean(), inplace=True)  # Filling with mean salary

# Step 8: Save the processed DataFrame to a CSV file
df.to_csv('processed_employees.csv', index=False)

# Step 9: Display results
print("Original DataFrame:")
print(df)
print("\nFiltered (High Salary in IT):")
print(high_salary_it)
print("\nAverage Salary by Department:")
print(avg_salary_by_dept)
print("\nSorted by Performance Rating:")
print(sorted_by_rating)

Original DataFrame:
   EmployeeID     Name Department   Salary   JoinDate  PerformanceRating  \
0         101    Alice         HR  60000.0 2015-03-10                  4   
1         102      Bob         IT  80000.0 2016-07-24                  5   
2         103  Charlie    Finance  70000.0 2018-11-15                  3   
3         104    Diana         IT  82000.0 2019-08-10                  5   
4         105   Edward         HR  58000.0 2017-05-30                  4   

   ExperienceYears  
0                9  
1                8  
2                6  
3                5  
4                7  

Filtered (High Salary in IT):
   EmployeeID   Name Department  Salary   JoinDate  PerformanceRating  \
1         102    Bob         IT   80000 2016-07-24                  5   
3         104  Diana         IT   82000 2019-08-10                  5   

   ExperienceYears  
1                8  
3                5  

Average Salary by Department:
Department
Finance    75000.0
HR         59000.0
IT 

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Salary'].fillna(df['Salary'].mean(), inplace=True)  # Filling with mean salary
