# 📌 Pandas Basics - Complete Guide

This notebook covers all fundamental operations and functions in Pandas, step by step.

## 📍 1. Importing Pandas

In [2]:
import pandas as pd

## 📍 2. Creating a Basic DataFrame

In [3]:
# Sample Employee Data
data = {
    'Employee': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'Salary': [50000, 60000, 55000, 70000, 65000],
    'Department': ['IT', 'HR', 'Finance', 'IT', 'HR'],
    'Joining_Date': ['2015-06-23', '2018-07-18', '2016-08-21', '2019-09-12', '2020-01-10']
}

# Creating DataFrame
employee_data = pd.DataFrame(data)
employee_data['Joining_Date'] = pd.to_datetime(employee_data['Joining_Date']) # Convert to datetime

# Display DataFrame
employee_data

Unnamed: 0,Employee,Salary,Department,Joining_Date
0,Alice,50000,IT,2015-06-23
1,Bob,60000,HR,2018-07-18
2,Charlie,55000,Finance,2016-08-21
3,David,70000,IT,2019-09-12
4,Eva,65000,HR,2020-01-10


## 📍 3. Basic Data Exploration

In [4]:
# Display first 3 rows
employee_data.head(3)

Unnamed: 0,Employee,Salary,Department,Joining_Date
0,Alice,50000,IT,2015-06-23
1,Bob,60000,HR,2018-07-18
2,Charlie,55000,Finance,2016-08-21


In [5]:
# Display last 2 rows
employee_data.tail(2)

Unnamed: 0,Employee,Salary,Department,Joining_Date
3,David,70000,IT,2019-09-12
4,Eva,65000,HR,2020-01-10


In [6]:
# Summary Information
employee_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   Employee      5 non-null      object        
 1   Salary        5 non-null      int64         
 2   Department    5 non-null      object        
 3   Joining_Date  5 non-null      datetime64[ns]
dtypes: datetime64[ns](1), int64(1), object(2)
memory usage: 292.0+ bytes


In [7]:
# Statistical Summary
employee_data.describe()

Unnamed: 0,Salary,Joining_Date
count,5.0,5
mean,60000.0,2018-01-28 00:00:00
min,50000.0,2015-06-23 00:00:00
25%,55000.0,2016-08-21 00:00:00
50%,60000.0,2018-07-18 00:00:00
75%,65000.0,2019-09-12 00:00:00
max,70000.0,2020-01-10 00:00:00
std,7905.69415,


## 📍 4. Selecting Data

In [8]:
# Select single column
employee_data['Salary']

0    50000
1    60000
2    55000
3    70000
4    65000
Name: Salary, dtype: int64

In [9]:
# Select multiple columns
employee_data[['Employee', 'Department']]

Unnamed: 0,Employee,Department
0,Alice,IT
1,Bob,HR
2,Charlie,Finance
3,David,IT
4,Eva,HR


In [10]:
# Select rows using loc (label-based)
employee_data.loc[1:3, ['Employee', 'Salary']]

Unnamed: 0,Employee,Salary
1,Bob,60000
2,Charlie,55000
3,David,70000


In [11]:
# Select rows using iloc (position-based)
employee_data.iloc[0:2, 0:3]

Unnamed: 0,Employee,Salary,Department
0,Alice,50000,IT
1,Bob,60000,HR


## 📍 5. Adding & Removing Columns

In [12]:
# Add a new column
employee_data['Bonus'] = employee_data['Salary'] * 0.1
employee_data

Unnamed: 0,Employee,Salary,Department,Joining_Date,Bonus
0,Alice,50000,IT,2015-06-23,5000.0
1,Bob,60000,HR,2018-07-18,6000.0
2,Charlie,55000,Finance,2016-08-21,5500.0
3,David,70000,IT,2019-09-12,7000.0
4,Eva,65000,HR,2020-01-10,6500.0


In [13]:
# Drop a column
employee_data.drop(columns=['Bonus'], inplace=True)
employee_data

Unnamed: 0,Employee,Salary,Department,Joining_Date
0,Alice,50000,IT,2015-06-23
1,Bob,60000,HR,2018-07-18
2,Charlie,55000,Finance,2016-08-21
3,David,70000,IT,2019-09-12
4,Eva,65000,HR,2020-01-10


## 📍 6. Sorting & Ranking

In [14]:
# Sort by Salary descending
employee_data.sort_values(by='Salary', ascending=False)

Unnamed: 0,Employee,Salary,Department,Joining_Date
3,David,70000,IT,2019-09-12
4,Eva,65000,HR,2020-01-10
1,Bob,60000,HR,2018-07-18
2,Charlie,55000,Finance,2016-08-21
0,Alice,50000,IT,2015-06-23


In [15]:
# Rank salaries
employee_data['Salary_Rank'] = employee_data['Salary'].rank(method='dense', ascending=False)
employee_data

Unnamed: 0,Employee,Salary,Department,Joining_Date,Salary_Rank
0,Alice,50000,IT,2015-06-23,5.0
1,Bob,60000,HR,2018-07-18,3.0
2,Charlie,55000,Finance,2016-08-21,4.0
3,David,70000,IT,2019-09-12,1.0
4,Eva,65000,HR,2020-01-10,2.0


## 📍 7. Aggregations

In [16]:
# Group by Department and calculate average salary
employee_data.groupby('Department')['Salary'].mean()

Department
Finance    55000.0
HR         62500.0
IT         60000.0
Name: Salary, dtype: float64

## 📍 8. Merging DataFrames

In [17]:
# Creating another DataFrame
new_hires = pd.DataFrame({
    'Employee': ['Frank', 'Grace'],
    'Salary': [48000, 52000],
    'Department': ['IT', 'Finance']
})

# Concatenating both DataFrames
combined_data = pd.concat([employee_data, new_hires], ignore_index=True)
combined_data

Unnamed: 0,Employee,Salary,Department,Joining_Date,Salary_Rank
0,Alice,50000,IT,2015-06-23,5.0
1,Bob,60000,HR,2018-07-18,3.0
2,Charlie,55000,Finance,2016-08-21,4.0
3,David,70000,IT,2019-09-12,1.0
4,Eva,65000,HR,2020-01-10,2.0
5,Frank,48000,IT,NaT,
6,Grace,52000,Finance,NaT,


## 📍 9. Handling Missing Data

In [18]:
# Checking for missing values
combined_data.isna().sum()

Employee        0
Salary          0
Department      0
Joining_Date    2
Salary_Rank     2
dtype: int64

In [19]:
# Filling missing values
combined_data.fillna({'Salary': 50000}, inplace=True)
combined_data

Unnamed: 0,Employee,Salary,Department,Joining_Date,Salary_Rank
0,Alice,50000,IT,2015-06-23,5.0
1,Bob,60000,HR,2018-07-18,3.0
2,Charlie,55000,Finance,2016-08-21,4.0
3,David,70000,IT,2019-09-12,1.0
4,Eva,65000,HR,2020-01-10,2.0
5,Frank,48000,IT,NaT,
6,Grace,52000,Finance,NaT,


## 📍 10. Exporting Data

#### Export to CSV (won't work in this browser) - Try it in your machine.
#### combined_data.to_csv('employees.csv', index=False)