 
# 📌Topic 1: Introduction to Pandas

## What is Pandas?
Pandas is a powerful Python library for data manipulation and analysis. It provides flexible data structures such as Series and DataFrame that allow for efficient data handling.

## Installing Pandas
Before using Pandas, you need to install it. Run the following command in your terminal or command prompt:
```sh
pip install pandas
```

# Importing pandas

import pandas as pd

 
## 📍 Creating a Simple Series
A Series is a one-dimensional labeled array capable of holding any data type.
 

In [None]:
# Creating a Series

import pandas as pd

data = [10, 20, 30, 40, 50]

series = pd.Series(data)
series

 
## 📍 Creating a Simple DataFrame
A DataFrame is a two-dimensional labeled data structure with columns of potentially different types.
 

In [None]:
# Creating a DataFrame from a dictionary

import pandas as pd 

data = {
    'Name': ['Prasun', 'Neha', 'Rahul', 'Priya'],
    'Age': [25, 30, 22, 28],
    'City': ['Mumbai', 'Delhi', 'Bangalore', 'Chennai']
}

df = pd.DataFrame(data)

a = df

 
# Topic 2: Pandas Data Structures

## 📍 Pandas Series
A Series is a one-dimensional array-like structure with labels (index). It can hold any data type.
 

In [None]:
import pandas as pd

# Creating a Series from a list

data = [100, 200, 300, 400, 500]
series = pd.Series(data, index=['a', 'b', 'c', 'd', 'e'])

series

 
## 📍 Pandas DataFrame
A DataFrame is a two-dimensional labeled data structure with columns of potentially different types.
 

In [None]:
# Creating a DataFrame from a dictionary

import pandas as pd 

data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya'],
    'Salary': [50000, 60000, 55000, 52000],
    'Department': ['IT', 'HR', 'Finance', 'Marketing']
}
df = pd.DataFrame(data)

a = df 

 
# Topic 3: DataFrame Indexing and Selection

## 📍 Selecting Columns in a DataFrame
We can select a single column or multiple columns from a DataFrame.
 

In [None]:
import pandas as pd

# Creating a sample DataFrame

data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya'],
    'Salary': [50000, 60000, 55000, 52000],
    'Department': ['IT', 'HR', 'Finance', 'Marketing']
}

df = pd.DataFrame(data)

# Selecting a single column

a = df[['Employee']]


In [None]:
import pandas as pd

# Creating a sample DataFrame

data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya'],
    'Salary': [50000, 60000, 55000, 52000],
    'Department': ['IT', 'HR', 'Finance', 'Marketing']
}

df = pd.DataFrame(data)

# Selecting multiple columns

a = df[['Employee', 'Salary']]

 
## 📍 Selecting Rows in a DataFrame
We can use `.loc[]` and `.iloc[]` to select rows based on labels or positions.
 

In [None]:
# Creating a sample DataFrame

import pandas as pd 

data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya'],
    'Salary': [50000, 60000, 55000, 52000],
    'Department': ['IT', 'HR', 'Finance', 'Marketing']
}

df = pd.DataFrame(data)

# Selecting rows using loc

a = df.loc[[1]]

In [None]:
# Creating a sample DataFrame

import pandas as pd

data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya'],
    'Salary': [50000, 60000, 55000, 52000],
    'Department': ['IT', 'HR', 'Finance', 'Marketing']
}

df = pd.DataFrame(data)


# Selecting rows using iloc

a = df.iloc[[2]]

 
## 📍 Filtering Data based on Conditions
We can filter data based on conditions applied to DataFrame columns.
 

In [None]:
import pandas as pd

# Creating a sample DataFrame

data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya'],
    'Salary': [50000, 60000, 55000, 52000],
    'Department': ['IT', 'HR', 'Finance', 'Marketing']
}

df = pd.DataFrame(data)

# Filtering employees with Salary greater than 55000

filtered_df = df[df['Salary'] > 55000]

a = filtered_df

 
# Topic 4: DataFrame Operations and Manipulations

## 📍 Adding a New Column
We can add a new column to a DataFrame by assigning values to it.
 

In [None]:
import pandas as pd

# Creating a sample DataFrame

data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya'],
    'Salary': [50000, 60000, 55000, 52000],
    'Department': ['IT', 'HR', 'Finance', 'Marketing']
}
df = pd.DataFrame(data)

a = df

In [None]:
import pandas as pd

# Creating a sample DataFrame

data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya'],
    'Salary': [50000, 60000, 55000, 52000],
    'Department': ['IT', 'HR', 'Finance', 'Marketing']
}

df = pd.DataFrame(data)

# Adding a new column

df['Bonus'] = df['Salary'] * 0.1

a = df 


 
## 📍 Modifying an Existing Column
We can modify an existing column by reassigning values.
 

In [None]:
import pandas as pd

# Creating a sample DataFrame

data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya'],
    'Salary': [50000, 60000, 55000, 52000],
    'Department': ['IT', 'HR', 'Finance', 'Marketing']
}

df = pd.DataFrame(data)


# Increasing salary by 5%

df['Salary'] = df['Salary'] * 1.05

a = df

 
## 📍  Deleting a Column
We can remove a column using `drop()`.
 

In [None]:
import pandas as pd

# Creating a sample DataFrame

data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya'],
    'Salary': [50000, 60000, 55000, 52000],
    'Department': ['IT', 'HR', 'Finance', 'Marketing']
}

df = pd.DataFrame(data)

# Dropping the Bonus column

df = df.drop(columns=['Salary'])
a = df

 
## 📍 Renaming Columns
We can rename columns using `rename()`.
 

In [None]:
import pandas as pd

# Creating a sample DataFrame

data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya'],
    'Salary': [50000, 60000, 55000, 52000],
    'Department': ['IT', 'HR', 'Finance', 'Marketing']
}

# Renaming columns

df = df.rename(columns={'Employee': 'Emp Name', 'Salary': 'Monthly Salary'})
a = df 

 
# Topic 5: Handling Missing Data in Pandas

## 📍  Identifying Missing Data
We can check for missing values in a DataFrame using `isna()` or `isnull()`.
 

In [None]:
import pandas as pd
import numpy as np

# Creating a sample DataFrame with missing values
data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya'],
    'Salary': [50000, np.nan, 55000, 52000],
    'Department': ['IT', 'HR', np.nan, 'Marketing']
}
df = pd.DataFrame(data)

# Checking for missing values

a = df.isna()

print("Count of missing values:\n", df.isna().sum())

In [None]:
import pandas as pd
import numpy as np

# Creating a sample DataFrame with missing values
data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya'],
    'Salary': [50000, np.nan, 55000, 52000],
    'Department': ['IT', 'HR', np.nan, 'Marketing']
}
df = pd.DataFrame(data)

# Checking for missing values

a =  df.isna().sum()

 
## 📍  Filling Missing Data
We can fill missing values using `fillna()`.
 

In [None]:
import pandas as pd
import numpy as np

# Creating a sample DataFrame with missing values
data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya'],
    'Salary': [50000, np.nan, 55000, 52000],
    'Department': ['IT', 'HR', np.nan, 'Marketing']
}
df = pd.DataFrame(data)


# Filling missing values with a default value

df_filled = df.fillna({'Salary': df['Salary'].mean(), 'Department': 'Unknown'})

a = df_filled

 
## 📍 Dropping Missing Data
We can drop rows or columns containing missing values using `dropna()`.
 

In [None]:
import pandas as pd
import numpy as np

# Creating a sample DataFrame with missing values
data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya'],
    'Salary': [50000, np.nan, 55000, 52000],
    'Department': ['IT', 'HR', np.nan, 'Marketing']
}
df = pd.DataFrame(data)

# Dropping rows with missing values

df_dropped = df.dropna()
a =  df_dropped

 
# Topic 6 : Merging, Joining, and Concatenation in Pandas

## 📍 Merging DataFrames
We can merge DataFrames using `merge()`.
 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Creating sample DataFrames
df1 = pd.DataFrame({
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya', 'Anjali'],
    'Department': ['IT', 'HR', 'Finance', 'Marketing', 'Sales'],
    'Salary': [50000, 60000, 55000, 52000, 45000]
})

df2 = pd.DataFrame({
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya', 'Anjali'],
    'Experience': [5, 7, 3, 4, 2]
})

# Merging DataFrames on 'Employee'

merged_df = pd.merge(df1, df2, on='Employee')

a = merged_df 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Creating sample DataFrames
df1 = pd.DataFrame({
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya', 'Anjali'],
    'Department': ['IT', 'HR', 'Finance', 'Marketing', 'Sales'],
    'Salary': [50000, 60000, 55000, 52000, 45000]
})

df2 = pd.DataFrame({
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya', 'Anjali'],
    'Experience': [5, 7, 3, 4, 2]
})


# Adding a calculated column for Salary per Year of Experience

merged_df['Salary_per_Experience'] = merged_df['Salary'] / merged_df['Experience']

a = merged_df

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Creating sample DataFrames
df1 = pd.DataFrame({
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya', 'Anjali'],
    'Department': ['IT', 'HR', 'Finance', 'Marketing', 'Sales'],
    'Salary': [50000, 60000, 55000, 52000, 45000]
})

df2 = pd.DataFrame({
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya', 'Anjali'],
    'Experience': [5, 7, 3, 4, 2]
})


# Filtering for meaningful data: Employees earning above ₹40,000

filtered_df = merged_df[merged_df['Salary'] > 40000]

a = filtered_df

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Creating sample DataFrames

df1 = pd.DataFrame({
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya', 'Anjali'],
    'Department': ['IT', 'HR', 'Finance', 'Marketing', 'Sales'],
    'Salary': [50000, 60000, 55000, 52000, 45000]
})

df2 = pd.DataFrame({
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya', 'Anjali'],
    'Experience': [5, 7, 3, 4, 2]
})

# Sorting by Salary

sorted_df = filtered_df.sort_values(by='Salary', ascending=False)

a = sorted_df

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Creating sample DataFrames
df1 = pd.DataFrame({
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya', 'Anjali'],
    'Department': ['IT', 'HR', 'Finance', 'Marketing', 'Sales'],
    'Salary': [50000, 60000, 55000, 52000, 45000]
})

df2 = pd.DataFrame({
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya', 'Anjali'],
    'Experience': [5, 7, 3, 4, 2]
})

# Grouping data: Total salary by department

grouped_df = merged_df.groupby('Department').agg({'Salary': 'sum'}).reset_index()

a = grouped_df

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Creating sample DataFrames

df1 = pd.DataFrame({
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya', 'Anjali'],
    'Department': ['IT', 'HR', 'Finance', 'Marketing', 'Sales'],
    'Salary': [50000, 60000, 55000, 52000, 45000]
})

df2 = pd.DataFrame({
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya', 'Anjali'],
    'Experience': [5, 7, 3, 4, 2]
})

# Visualizing Salary vs. Experience

plt.figure(figsize=(8, 5))
plt.bar(merged_df['Employee'], merged_df['Salary'], color='blue', alpha=0.7, label='Salary')
plt.plot(merged_df['Employee'], merged_df['Experience'], marker='o', color='red', label='Experience')
plt.xlabel('Employee')
plt.ylabel('Salary / Experience')
plt.title('Salary and Experience of Employees')
plt.legend()
plt.show()

 
## 📍 Joining DataFrames
We can join DataFrames using `join()`.
 

In [None]:
import pandas as pd
import numpy as np

# Creating a sample DataFrame with missing values

data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya'],
    'Salary': [50000, np.nan, 55000, 52000],
    'Department': ['IT', 'HR', np.nan, 'Marketing']
}
df = pd.DataFrame(data)

# Checking for missing values

a = df.isna()

print("Count of missing values:\n", df.isna().sum())

df1 = pd.DataFrame({
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya'],
    'Department': ['IT', 'HR', 'Finance', 'Marketing'],
    'Salary': [50000, 60000, 55000, 52000]
})

df2 = pd.DataFrame({
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya'],
    'Experience': [5, 7, 3, 4]
})


# Creating sample DataFrames

df3 = pd.DataFrame({
    'Department': ['IT', 'HR', 'Finance', 'Marketing'],
    'Location': ['Bangalore', 'Mumbai', 'Delhi', 'Pune']
})

# Creating sample DataFrame using join()

df4 = df1.set_index('Department').join(df3.set_index('Department'))

a = df4 

In [None]:
import pandas as pd
import numpy as np

# Creating a sample DataFrame df1

df1 = pd.DataFrame({
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya'],
    'Department': ['IT', 'HR', 'Finance', 'Marketing'],
    'Salary': [50000, 60000, 55000, 52000]
})


# Creating sample DataFrame df3

df3 = pd.DataFrame({
    'Department': ['IT', 'HR', 'Finance', 'Marketing'],
    'Location': ['Bangalore', 'Mumbai', 'Delhi', 'Pune']
})

df4 = df1.set_index('Department').join(df3.set_index('Department'))


# Creating another sample DataFrame df5

df5 = pd.DataFrame({
    'Employee': ['Vikas', 'Neha'],
    'Department': ['IT', 'HR'],
    'Salary': [62000, 58000]
})

# Concatenating DataFrames : We can concatenate DataFrames using `concat()`.

concatenated_df = pd.concat([df1, df5], ignore_index=True)

a = concatenated_df

 
# Topic 7 : Pivot Tables and Crosstab in Pandas

## 📍 Creating Pivot Tables
We can summarize data using `pivot_table()`.
 

In [None]:
import pandas as pd

# Creating a sample DataFrame

data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya', 'Vikas', 'Neha'],
    'Department': ['IT', 'HR', 'Finance', 'Marketing', 'IT', 'HR'],
    'Salary': [50000, 60000, 55000, 52000, 62000, 58000],
    'Experience': [5, 7, 3, 4, 6, 8]
}

df = pd.DataFrame(data)

# Creating a pivot table

pivot_table = pd.pivot_table(df, values='Salary', index='Department', columns='Experience', aggfunc='mean', fill_value=0)

a = pivot_table

 
## 📍 Creating a Crosstab
We can use `crosstab()` to compute frequency tables.
 

In [None]:
import pandas as pd 

data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya', 'Vikas', 'Neha'],
    'Department': ['IT', 'HR', 'Finance', 'Marketing', 'IT', 'HR'],
    'Salary': [50000, 60000, 55000, 52000, 62000, 58000],
    'Experience': [5, 7, 3, 4, 6, 8]
}

df = pd.DataFrame(data)


# Creating a crosstab for Department and Experience

crosstab_result = pd.crosstab(df['Department'], df['Experience'])

a = crosstab_result

 
# Topic 8 : Applying Functions and Lambda in Pandas

## 📍 Using `apply()` Method
We can apply functions to Series and DataFrames using `apply()`.
 

In [None]:
import pandas as pd

# Creating a sample DataFrame

data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya', 'Vikas', 'Neha'],
    'Salary': [50000, 60000, 55000, 52000, 62000, 58000],
    'Experience': [5, 7, 3, 4, 6, 8]
}

df = pd.DataFrame(data)

# Defining a function to categorize experience

def experience_category(exp):
    if exp < 5:
        return 'Junior'
    elif exp <= 7:
        return 'Mid-Level'
    else:
        return 'Senior'

# Applying the function to the Experience column

df['Experience Level'] = df['Experience'].apply(experience_category)

a = df

 
## 📍 Using Lambda Functions
We can use `lambda` inside `apply()` to perform quick operations.
 

In [None]:
import pandas as %pdb

data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya', 'Vikas', 'Neha'],
    'Salary': [50000, 60000, 55000, 52000, 62000, 58000],
    'Experience': [5, 7, 3, 4, 6, 8]
}

df = pd.DataFrame(data)

# Increasing salary by 10% using lambda

df['Updated Salary'] = df['Salary'].apply(lambda x: x * 1.10)

a = df

 
# Topic 9 : GroupBy Operations in Pandas

## 📍 Using `groupby()` to Aggregate Data
The `groupby()` function helps in grouping data based on specific columns.
 

In [None]:
import pandas as pd

# Creating a sample DataFrame

data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya', 'Vikas', 'Neha', 'Ankit', 'Meera'],
    'Department': ['IT', 'HR', 'Finance', 'Marketing', 'IT', 'HR', 'Finance', 'Marketing'],
    'Salary': [50000, 60000, 55000, 52000, 62000, 58000, 53000, 51000],
    'Experience': [5, 7, 3, 4, 6, 8, 2, 5]
}

df = pd.DataFrame(data)

# Grouping by Department and calculating mean salary

grouped_salary = df.groupby('Department')['Salary'].mean()

a = grouped_salary


 
## 📍 Using Multiple Aggregations
We can use multiple aggregation functions at once.
 

In [None]:
import pandas as pd

data = {
    'Employee': ['Prasun', 'Krish', 'Amit', 'Priya', 'Vikas', 'Neha', 'Ankit', 'Meera'],
    'Department': ['IT', 'HR', 'Finance', 'Marketing', 'IT', 'HR', 'Finance', 'Marketing'],
    'Salary': [50000, 60000, 55000, 52000, 62000, 58000, 53000, 51000],
    'Experience': [5, 7, 3, 4, 6, 8, 2, 5]
}

df = pd.DataFrame(data)

# Grouping by Department and applying multiple aggregations

grouped_agg = df.groupby('Department').agg({'Salary': ['mean', 'max', 'min'], 'Experience': 'mean'})

a = grouped_agg

 
# Topic 10 : Time Series Analysis in Pandas

## 📍 Creating and Handling Time Series Data
Pandas provides powerful tools for working with time series data.
 

In [None]:
import pandas as pd

# Creating a date range

date_rng = pd.date_range(start='2024-01-01', end='2024-01-10', freq='D')

# Creating a DataFrame with time series data

ts_df = pd.DataFrame({'Date': date_rng, 'Sales': [200, 220, 250, 210, 190, 230, 240, 280, 300, 310]})
ts_df.set_index('Date', inplace=True)

a = ts_df 

 
## 📍 Resampling Time Series Data
The `resample()` function helps in aggregating data over different time periods.
 

In [None]:
import pandas as pd

# Creating a date range

date_rng = pd.date_range(start='2024-01-01', end='2024-01-10', freq='D')

# Creating a DataFrame with time series data

ts_df = pd.DataFrame({'Date': date_rng, 'Sales': [200, 220, 250, 210, 190, 230, 240, 280, 300, 310]})
ts_df.set_index('Date', inplace=True)

# Resampling to weekly frequency

weekly_sales = ts_df.resample('W').sum()

a = weekly_sales

 
## 📍 Rolling Window Analysis
Rolling operations help in calculating moving averages and trends.
 

In [None]:
import pandas as pd

# Creating a date range

date_rng = pd.date_range(start='2024-01-01', end='2024-01-10', freq='D')

# Creating a DataFrame with time series data

ts_df = pd.DataFrame({'Date': date_rng, 'Sales': [200, 220, 250, 210, 190, 230, 240, 280, 300, 310]})
ts_df.set_index('Date', inplace=True)


# Calculating a 3-day moving average

ts_df['Moving_Avg'] = ts_df['Sales'].rolling(window=3).mean()

a = ts_df


 
## 📍 Creating a DataFrame from a CSV file

#### Reading a CSV file into a DataFrame
#### df_csv = pd.read_csv("employees.csv")

 
## 📍 Creating a DataFrame from an Excel file 

#### Reading an Excel file into a DataFrame
#### df_excel = pd.read_excel("sales.xlsx")