# This code covers various data manipulation operations using the Pandas library, including selecting columns, filtering rows, sorting, adding and removing columns, grouping and aggregation, handling missing values, merging DataFrames, and creating pivot tables. Each section is accompanied by comments explaining the purpose and functionality of the code.

In [1]:
import pandas as pd

# Creating a DataFrame

In [None]:
# Create a DataFrame
data = {'Name': ['John', 'Jane', 'Mike', 'Emily'],
        'Age': [25, 30, 28, 32],
        'City': ['New York', 'London', 'Paris', 'Sydney']}
df = pd.DataFrame(data)
print("DataFrame:")
print(df)

# Selecting columns

In [None]:
# Select a single column
name_column = df['Name']
print("\nName Column:")
print(name_column)

In [None]:
# Select multiple columns
name_age_columns = df[['Name', 'Age']]
print("\nName and Age Columns:")
print(name_age_columns)

# Filtering rows

In [None]:
# Filter rows based on a condition
filtered_df = df[df['Age'] > 25]
print("\nFiltered DataFrame:")
print(filtered_df)

# Sorting DataFrame

In [None]:
# Sort DataFrame by a column
sorted_df = df.sort_values('Age')
print("\nSorted DataFrame:")
print(sorted_df)

# Adding and removing columns


In [None]:
# Add a new column
df['Gender'] = ['Male', 'Female', 'Male', 'Female']
print("\nDataFrame with new column:")
print(df)

In [None]:
# Remove a column
df = df.drop('City', axis=1)
print("\nDataFrame with column removed:")
print(df)

# Grouping and aggregation

In [None]:
# Group DataFrame by a column and calculate mean
grouped_df = df.groupby('Gender').mean()
print("\nGrouped DataFrame:")
print(grouped_df)

# Handling missing values

In [None]:
# Create a DataFrame with missing values
data = {'Name': ['John', 'Jane', 'Mike', 'Emily'],
        'Age': [25, None, 28, 32],
        'City': ['New York', 'London', None, 'Sydney']}
df = pd.DataFrame(data)
print("\nDataFrame with missing values:")
print(df)

In [None]:
# Check for missing values
print("\nMissing Values:")
print(df.isnull().sum())

In [None]:
# Fill missing values with a specific value
df_filled = df.fillna(0)
print("\nDataFrame with filled missing values:")
print(df_filled)

# Merging DataFrames

In [None]:
# Create two DataFrames
data1 = {'Name': ['John', 'Jane', 'Mike', 'Emily'],
         'Age': [25, 30, 28, 32]}
data2 = {'Name': ['John', 'Jane', 'Mike', 'Emily'],
         'City': ['New York', 'London', 'Paris', 'Sydney']}
df1 = pd.DataFrame(data1)
df2 = pd.DataFrame(data2)

In [None]:
# Merge the DataFrames
merged_df = pd.merge(df1, df2, on='Name')
print("\nMerged DataFrame:")
print(merged_df)

# Pivot Tables


In [None]:
# Create a DataFrame
data = {'Name': ['John', 'Jane', 'Mike', 'Emily', 'John', 'Jane', 'Mike', 'Emily'],
        'Subject': ['Math', 'Math', 'Math', 'Math', 'Science', 'Science', 'Science', 'Science'],
        'Score': [85, 90, 92, 88, 78, 82, 80, 85]}
df = pd.DataFrame(data)

In [None]:
# Create a pivot table
pivot_table = df.pivot_table(index='Name', columns='Subject', values='Score', aggfunc='mean')
print("\nPivot Table:")
print(pivot_table)