#1. Importing Libraries

In [1]:
import pandas as pd
import numpy as np


#2. Sample Datasets

In [2]:
# Dataset 1: Student Information
data1 = {
    'StudentID': [101, 102, 103, 104, 105],
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Age': [21, 22, np.nan, 24, 25],
    'Email': ['alice@example.com', None, 'charlie@abc.com', 'david@xyz.com', 'eve@example.com']
}

df1 = pd.DataFrame(data1)

# Dataset 2: Marks Information
data2 = {
    'StudentID': [101, 102, 103, 106],
    'Marks': [88, 76, 90, 85]
}

df2 = pd.DataFrame(data2)


#3. Data Cleaning Examples

3.1 Checking for Null Values

In [3]:
print("Null values in df1:\n", df1.isnull().sum())


Null values in df1:
 StudentID    0
Name         0
Age          1
Email        1
dtype: int64


3.2 Fill Missing Age with Mean

In [4]:
df1['Age'].fillna(df1['Age'].mean(), inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df1['Age'].fillna(df1['Age'].mean(), inplace=True)


3.3 Drop Rows with Missing Email

In [5]:
df1.dropna(subset=['Email'], inplace=True)


 Remove Duplicates (if any)

In [6]:
df1.drop_duplicates(inplace=True)


#4. Pandas Joins

4.1 Inner Join (Only matching StudentIDs)

In [7]:
inner_join = pd.merge(df1, df2, on='StudentID', how='inner')
print("Inner Join:\n", inner_join)


Inner Join:
    StudentID     Name   Age              Email  Marks
0        101    Alice  21.0  alice@example.com     88
1        103  Charlie  23.0    charlie@abc.com     90


4.2 Left Join (All from df1, matching from df2)

In [8]:
left_join = pd.merge(df1, df2, on='StudentID', how='left')
print("Left Join:\n", left_join)


Left Join:
    StudentID     Name   Age              Email  Marks
0        101    Alice  21.0  alice@example.com   88.0
1        103  Charlie  23.0    charlie@abc.com   90.0
2        104    David  24.0      david@xyz.com    NaN
3        105      Eve  25.0    eve@example.com    NaN
