In [1]:
# Getting Started with Pandas
# Objective: Introduce students to using Pandas for data analysis by loading data into Pandas
# DataFrames.

# Question 1: Importing Pandas and Loading a CSV File
# 1. Open your Jupyter Notebook or a Python environment.
# 2. Import the pandas library.
# 3. Load a CSV file into a DataFrame.





# Question 2: Displaying the First Few Rows
# 4. Use the head() method to display the first five rows of the DataFrame.





# Question 3: Basic Data Information
# 5. Use the info() method to get a concise summary of the DataFrame.

import pandas as pd
import os

# File path
file_path = 'sample_data.csv'  # Change if your file is named differently

# Step 1: Check if file exists and read it
if os.path.exists(file_path):
    try:
        df = pd.read_csv(file_path, usecols=['Name', 'Age', 'Department', 'Salary'])
        print("File loaded successfully.\n")
    except Exception as e:
        print(f"Error reading CSV: {e}")
else:
    print(f"File {file_path} not found.")

# Step 2: Display first 5 rows
print("First 5 rows:")
print(df.head())

# Step 3: Basic info
print("\nData Info:")
print(df.info())

# Step 4: Inspect data types
print("\nData Types:")
print(df.dtypes)

# Step 5: Select a single column
print("\nNames Column:")
print(df['Name'])

# Step 6: Slice specific rows
print("\nSliced Rows (Index 2 to 4):")
print(df[2:5])

# Step 7: Handle missing values
df.fillna(0, inplace=True)

# Step 8: Rename column
df.rename(columns={'Salary': 'Annual_Salary'}, inplace=True)

# Step 9: Drop duplicates
df.drop_duplicates(inplace=True)

# Step 10: Group and aggregate
grouped = df.groupby('Department').agg({
    'Age': ['mean', 'max', 'sum'],
    'Annual_Salary': ['mean']
})
print("\nGrouped Aggregation:")
print(grouped)

# Step 11: Export to CSV
try:
    df.to_csv('cleaned_data.csv', index=False)
    print("\nData exported to 'cleaned_data.csv'")
except Exception as e:
    print(f"Error exporting CSV: {e}")




File loaded successfully.

First 5 rows:
      Name  Age Department  Salary
0    Alice   25         HR   50000
1      Bob   30         IT   60000
2  Charlie   35    Finance   70000
3    David   40         IT   80000
4      Eve   22         HR   52000

Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Name        7 non-null      object
 1   Age         7 non-null      int64 
 2   Department  7 non-null      object
 3   Salary      7 non-null      int64 
dtypes: int64(2), object(2)
memory usage: 352.0+ bytes
None

Data Types:
Name          object
Age            int64
Department    object
Salary         int64
dtype: object

Names Column:
0      Alice
1        Bob
2    Charlie
3      David
4        Eve
5      Frank
6      Alice
Name: Name, dtype: object

Sliced Rows (Index 2 to 4):
      Name  Age Department  Salary
2  Charlie   35    Finance   7

In [None]:
# Data Inspection & Selection
# Objective: Learn how to inspect data and select specific data points.

# Question 1: Inspecting Column Data Types
# 6. Use the dtypes attribute to inspect the data types of each column.




# Question 2: Selecting Columns
# 7. Select a single column from the DataFrame.





# Question 3: Slicing Rows
# 8. Select specific rows using slicing.






In [None]:
# Data Cleaning & Manipulation
# Objective: Practice cleaning data and manipulating DataFrames.

# Question 1: Handling Missing Values
# 9. Use the fillna() method to fill missing values with a specific value.




# QUestion 2: Renaming Columns
# 10. Change the names of specific columns using rename().




# Question 3: Dropping Duplicates
# 11. Remove duplicate rows from the DataFrame.







In [None]:
# Data Aggregation & Exporting
# Objective: Aggregate data and export the results.

# Question 1: Grouping and Aggregating Data
# 12. Group data by a specific column and calculate the mean for each group.





# Question 2: Exporting Data to CSV
# 13. Export the DataFrame to a new CSV file.





# Question 3: Aggregating with Multiple Functions
# 14. Apply several aggregate functions to the grouped data.




