# Cell 1: Importing Pandas and Basic DataFrame Operations
In this cell, we import the pandas library, create a simple DataFrame, and demonstrate some basic operations such as displaying the DataFrame, getting information about it, and generating descriptive statistics.

In [None]:
import pandas as pd

# Creating a simple DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
        'Age': [24, 27, 22, 32, 29],
        'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix']}

df = pd.DataFrame(data)

# Display the DataFrame
print("DataFrame:")
print(df)

# Basic Operations
print("\nDataFrame Info:")
print(df.info())

print("\nDataFrame Description:")
print(df.describe())

# Cell 2: Data Selection and Filtering
This cell demonstrates how to select specific columns and rows in a DataFrame. We select a single column, multiple columns, and filter rows based on a condition.

In [None]:
# Selecting a single column
ages = df['Age']
print("Ages:")
print(ages)

# Selecting multiple columns
name_city = df[['Name', 'City']]
print("\nName and City:")
print(name_city)

# Filtering rows based on a condition
age_filter = df[df['Age'] > 25]
print("\nRows where Age > 25:")
print(age_filter)

# Cell 3: Handling Missing Data
This cell covers how to handle missing data in a DataFrame. We add a column with missing values, then demonstrate how to fill and drop missing values.

In [None]:
# Adding a column with missing values
df['Salary'] = [50000, None, 70000, None, 60000]
print("DataFrame with NaN values:")
print(df)

# Filling missing values
df_filled = df.fillna(0)
print("\nDataFrame with NaN filled with 0:")
print(df_filled)

# Dropping rows with missing values
df_dropped = df.dropna()
print("\nDataFrame with NaN rows dropped:")
print(df_dropped)

# Cell 4: GroupBy Operations
In this cell, we demonstrate the use of the groupby function to group data by a specific column. We calculate summary statistics for each group and apply multiple aggregation functions.

In [None]:
# Grouping by a column and calculating summary statistics
grouped = df.groupby('City')['Age'].mean()
print("Average Age by City:")
print(grouped)

# Applying multiple aggregation functions
grouped_multi = df.groupby('City').agg({'Age': ['mean', 'min', 'max']})
print("\nMultiple Aggregations:")
print(grouped_multi)

# Cell 5: Merging and Joining DataFrames
This cell demonstrates how to merge two DataFrames based on a common column. We create a second DataFrame and merge it with the original one using the merge function.

In [None]:
# Creating another DataFrame
data2 = {'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
         'Salary': [50000, 48000, 70000, 55000, 60000]}
df2 = pd.DataFrame(data2)

# Merging DataFrames on 'Name' column
merged_df = pd.merge(df, df2, on='Name', how='inner', suffixes=('_df', '_df2'))
print("Merged DataFrame:")
print(merged_df)

# Cell 6: Pivot Tables
In this cell, we demonstrate how to create pivot tables to summarize data. We create a simple pivot table to calculate the average age by city and a more complex pivot table with multiple aggregations.

In [None]:
# Creating a pivot table
pivot = df.pivot_table(values='Age', index='City', aggfunc='mean')
print("Pivot Table - Average Age by City:")
print(pivot)

# More complex pivot table with multiple aggregations
pivot_multi = df.pivot_table(values='Age', index='City', aggfunc={'Age': ['mean', 'sum', 'count']})
print("\nPivot Table with Multiple Aggregations:")
print(pivot_multi)

# Cell 7: Custom Functions and Lambdas
This cell demonstrates how to apply custom functions and lambda functions to DataFrame columns. We define a custom function to categorize ages, use a lambda function to calculate the length of names, and another lambda function to create a conditional column.

In [None]:
# Defining a custom function to apply to DataFrame
def age_category(age):
    if age < 25:
        return 'Youth'
    elif 25 <= age < 30:
        return 'Young Adult'
    else:
        return 'Adult'

# Applying the custom function using apply
df['Age Category'] = df['Age'].apply(age_category)
print("DataFrame with Age Category:")
print(df)

# Using lambda functions to create a new column
df['Name Length'] = df['Name'].apply(lambda x: len(x))
print("\nDataFrame with Name Length:")
print(df)

# Using a lambda function for conditional column creation
df['Is Senior'] = df['Age'].apply(lambda x: 'Yes' if x > 30 else 'No')
print("\nDataFrame with Is Senior:")
print(df)