# Mastering the Pandas Library
This notebook covers detailed topics related to the Pandas library in Python, following a structured outline for practical use cases and applications.

## 1. Basics and Fundamentals
Learn about Pandas' core data structures and basic operations.

In [None]:
# Importing pandas
import pandas as pd

# Creating a Series
series = pd.Series([10, 20, 30], name='Sample Series')
print(series)

# Creating a DataFrame
data = {'Name': ['Alice', 'Bob', 'Charlie'], 'Age': [25, 30, 35]}
df = pd.DataFrame(data)
print(df)

# Inspecting a DataFrame
print(df.head())  # First 5 rows
print(df.info())  # Overview
print(df.describe())  # Summary statistics

## 2. Data Input and Output (I/O)
Read and write data from various file formats.

In [None]:
# Reading and writing CSV files
df.to_csv('example.csv', index=False)
csv_data = pd.read_csv('example.csv')
print(csv_data)

# Reading and writing JSON files
df.to_json('example.json', orient='records')
json_data = pd.read_json('example.json')
print(json_data)

## 3. Indexing and Selection
Access and manipulate subsets of data efficiently.

In [None]:
# Indexing with .iloc and .loc
print(df.iloc[0])  # Row by index
print(df.loc[0, 'Name'])  # Specific cell

# Boolean indexing
adults = df[df['Age'] > 28]
print(adults)

# Setting and resetting an index
df.set_index('Name', inplace=True)
print(df)
df.reset_index(inplace=True)
print(df)

## 4. Data Cleaning and Preprocessing
Clean and preprocess raw data for analysis.

In [None]:
# Handling missing values
df_with_na = pd.DataFrame({'A': [1, None, 3], 'B': [4, 5, None]})
print(df_with_na.isna())  # Check missing values
df_with_na.fillna(0, inplace=True)  # Fill missing values
print(df_with_na)

# Converting data types
df['Age'] = df['Age'].astype(float)
print(df.dtypes)

# String operations
df['Name'] = df['Name'].str.upper()
print(df)

## 5. Data Transformation
Perform transformations like renaming, reshaping, and applying functions.

In [None]:
# Adding and renaming columns
df['Salary'] = [50000, 60000, 70000]
df.rename(columns={'Salary': 'Income'}, inplace=True)
print(df)

# Applying functions
df['Income_after_tax'] = df['Income'].apply(lambda x: x * 0.7)
print(df)

# Pivot tables
pivot = pd.pivot_table(df, values='Income', index='Age', aggfunc='mean')
print(pivot)

## 6. Aggregation and Grouping
Aggregate and summarize data effectively.

In [None]:
# Grouping data
grouped = df.groupby('Age').agg({'Income': ['mean', 'sum']})
print(grouped)

# Rolling and expanding windows
rolling_mean = df['Income'].rolling(window=2).mean()
print(rolling_mean)

## 7. Merging and Combining Data
Combine data from multiple sources.

In [None]:
# Merging two DataFrames
df2 = pd.DataFrame({'Name': ['Alice', 'Bob'], 'City': ['NYC', 'LA']})
merged = pd.merge(df, df2, on='Name', how='left')
print(merged)

# Concatenating DataFrames
df3 = pd.DataFrame({'Name': ['Eve'], 'Age': [40], 'Income': [80000]})
concat = pd.concat([df, df3], ignore_index=True)
print(concat)

## 8. Time-Series Data
Handle and analyze temporal data efficiently.

In [None]:
# Creating a time-series DataFrame
time_data = pd.date_range(start='2023-01-01', periods=5, freq='D')
time_df = pd.DataFrame({'Date': time_data, 'Value': range(5)})
time_df.set_index('Date', inplace=True)
print(time_df)

# Resampling data
resampled = time_df.resample('2D').sum()
print(resampled)

## 9. Performance Optimization
Optimize memory and computational performance.

In [None]:
# Reducing memory usage by converting data types
optimized_df = df.copy()
optimized_df['Income'] = pd.to_numeric(optimized_df['Income'], downcast='integer')
print(optimized_df.dtypes)

# Using vectorized operations
df['Adjusted_Income'] = df['Income'] * 0.9
print(df)

## 10. Visualization
Quickly visualize data trends and distributions.

In [None]:
# Basic plotting
df['Income'].plot(kind='bar')

## 11. Advanced Topics
Master advanced operations like MultiIndex, custom aggregations, and handling hierarchical data.

In [None]:
# MultiIndex DataFrame
multi_index_df = pd.DataFrame({'Category': ['A', 'A', 'B'], 'Subcategory': ['X', 'Y', 'Z'], 'Values': [1, 2, 3]})
multi_index_df.set_index(['Category', 'Subcategory'], inplace=True)
print(multi_index_df)

# Custom aggregations
custom_agg = df.agg({'Income': ['sum', 'mean'], 'Age': 'max'})
print(custom_agg)