In [1]:
import pandas as pd
import os

def load_data(file_path):
    """Load CSV data into a DataFrame with error handling."""
    required_columns = ['Name', 'Age', 'Department', 'Salary']
    try:
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"File {file_path} not found.")
        
        # Read CSV without restricting columns to detect mismatches
        df = pd.read_csv(file_path)
        
        # Check for required columns
        missing_cols = set(required_columns) - set(df.columns)
        if missing_cols:
            raise KeyError(f"Missing required columns: {missing_cols}")
        
        # Convert numeric columns, coerce errors to NaN
        df['Age'] = pd.to_numeric(df['Age'], errors='coerce')
        df['Salary'] = pd.to_numeric(df['Salary'], errors='coerce')
        
        return df[required_columns]  # Select only required columns
    
    except Exception as e:
        print(f"Error loading data: {str(e)}")
        return None

def handle_missing_values(df, fill_value=0):
    """Fill missing values in the DataFrame."""
    return df.fillna(fill_value)

def rename_columns(df, columns_map={'Salary': 'Annual_Salary'}):
    """Rename specified columns."""
    return df.rename(columns=columns_map)

def drop_duplicates(df):
    """Remove duplicate rows."""
    return df.drop_duplicates()

def main():
    file_path = 'sample_data.csv'
    
    # Step 1: Load data
    df = load_data(file_path)
    if df is None:
        return  # Exit if data loading failed
    
    # Step 2: Display first 5 rows
    print("First 5 rows:")
    print(df.head())
    
    # Step 3: Show basic info
    print("\nData Info:")
    print(df.info())
    
    # Step 4: Display data types
    print("\nData Types:")
    print(df.dtypes)
    
    # Step 5: Select and show 'Name' column
    try:
        print("\nNames Column:")
        print(df['Name'])
    except KeyError as e:
        print(f"Error selecting column: {e}")
    
    # Step 6: Slice rows 2-4
    print("\nSliced Rows (Index 2 to 4):")
    print(df.iloc[2:5])
    
    # Step 7-9: Data cleaning
    df = handle_missing_values(df)
    df = rename_columns(df)
    df = drop_duplicates(df)
    
    # Step 10: Group and aggregate
    try:
        grouped = df.groupby('Department').agg({
            'Age': ['mean', 'max', 'sum'],
            'Annual_Salary': ['mean']
        })
        print("\nGrouped Aggregation:")
        print(grouped)
    except KeyError as e:
        print(f"Error during grouping: {e}")
    
    # Step 11: Export data
    try:
        df.to_csv('cleaned_data.csv', index=False)
        print("\nData exported to 'cleaned_data.csv'")
    except Exception as e:
        print(f"Export error: {e}")

if __name__ == "__main__":
    main()

First 5 rows:
      Name  Age Department  Salary
0    Alice   25         HR   50000
1      Bob   30         IT   60000
2  Charlie   35    Finance   70000
3    David   40         IT   80000
4      Eve   22         HR   52000

Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Name        7 non-null      object
 1   Age         7 non-null      int64 
 2   Department  7 non-null      object
 3   Salary      7 non-null      int64 
dtypes: int64(2), object(2)
memory usage: 352.0+ bytes
None

Data Types:
Name          object
Age            int64
Department    object
Salary         int64
dtype: object

Names Column:
0      Alice
1        Bob
2    Charlie
3      David
4        Eve
5      Frank
6      Alice
Name: Name, dtype: object

Sliced Rows (Index 2 to 4):
      Name  Age Department  Salary
2  Charlie   35    Finance   70000
3    David   40       