[Pandas Data Cleaning and Preprocessing](https://www.w3resource.com/python-exercises/pandas/pandas-data-cleaning-and-preprocessing.php)

In [1]:
import pandas as pd
import numpy as np

<div class="alert alert-warning">

**1. Handling Missing Data in Pandas**

**Write a Pandas program to fill missing values (NaN) in a DataFrame using fillna().**

</div>

In [2]:
# Create date range
date_range = pd.date_range(start='2023-01-01', periods=35, freq='D')

# Create a dictionary of data
data = {
    'date': date_range,
    'customer_id': np.random.randint(1000, 2000, size=35),
    'product_id': np.random.randint(100, 200, size=35),
    'quantity': np.random.randint(1, 10, size=35),
    'price': np.round(np.random.uniform(10.0, 100.0, size=35), 2),
    'discount': np.round(np.random.uniform(0, 0.3, size=35), 2),
    'revenue': lambda df: df['quantity'] * df['price'] * (1 - df['discount']),
    'store_id': np.random.randint(1, 10, size=35),
    'region': np.random.choice(['North', 'South', 'East', 'West'], size=35),
    'payment_method': np.random.choice(['Credit Card', 'Cash', 'Online'], size=35),
    'is_member': np.random.choice([True, False], size=35),
    'delivery_days': np.random.randint(1, 7, size=35),
    'customer_age': np.random.randint(18, 70, size=35),
    'gender': np.random.choice(['Male', 'Female', 'Other'], size=35),
    'product_category': np.random.choice(['Electronics', 'Clothing', 'Books', 'Food'], size=35),
    'rating': np.round(np.random.uniform(1.0, 5.0, size=35), 1),
    'return_status': np.random.choice(['Returned', 'Not Returned'], size=35),
    'feedback': np.random.choice(['Positive', 'Neutral', 'Negative'], size=35),
}

# Create DataFrame
df = pd.DataFrame(data)

# Compute revenue using the lambda function above
df['revenue'] = df['quantity'] * df['price'] * (1 - df['discount'])

# Add 'shipping_cost' with some missing values
df['shipping_cost'] = np.where(np.random.rand(len(df)) < 0.2, np.nan, np.round(np.random.uniform(5.0, 20.0, size=len(df)), 2))

# Add 'coupon_code' with some missing values
coupon_options = ['SAVE10', 'WELCOME5', 'FREESHIP', np.nan]
df['coupon_code'] = np.random.choice(coupon_options, size=len(df), p=[0.2, 0.2, 0.2, 0.4])

# Add 'delivery_rating' with some missing values
df['delivery_rating'] = np.where(np.random.rand(len(df)) < 0.3, np.nan, np.round(np.random.uniform(1.0, 5.0, size=len(df)), 1))

# Display the updated DataFrame
display(df)

Unnamed: 0,date,customer_id,product_id,quantity,price,discount,revenue,store_id,region,payment_method,...,delivery_days,customer_age,gender,product_category,rating,return_status,feedback,shipping_cost,coupon_code,delivery_rating
0,2023-01-01,1226,198,8,64.49,0.05,490.124,7,North,Online,...,2,50,Male,Electronics,3.7,Not Returned,Negative,,WELCOME5,4.2
1,2023-01-02,1104,183,9,18.8,0.11,150.588,2,East,Credit Card,...,5,56,Female,Electronics,2.5,Returned,Positive,13.19,,1.7
2,2023-01-03,1598,110,1,79.65,0.28,57.348,7,West,Credit Card,...,2,65,Female,Food,3.7,Not Returned,Positive,13.75,,3.6
3,2023-01-04,1902,102,3,10.23,0.21,24.2451,1,North,Online,...,1,66,Female,Electronics,1.8,Not Returned,Neutral,12.84,SAVE10,
4,2023-01-05,1647,107,2,53.53,0.2,85.648,7,West,Credit Card,...,5,69,Female,Books,1.7,Returned,Positive,13.13,WELCOME5,
5,2023-01-06,1903,170,9,59.82,0.09,489.9258,6,South,Credit Card,...,1,23,Male,Books,2.6,Not Returned,Positive,12.2,FREESHIP,2.5
6,2023-01-07,1956,155,3,24.37,0.2,58.488,9,South,Online,...,1,62,Female,Food,1.7,Not Returned,Negative,10.02,,1.3
7,2023-01-08,1615,145,1,29.76,0.21,23.5104,9,East,Credit Card,...,2,41,Other,Electronics,3.6,Not Returned,Positive,,FREESHIP,2.4
8,2023-01-09,1357,138,4,64.86,0.17,215.3352,3,West,Cash,...,5,49,Other,Electronics,3.9,Returned,Negative,12.9,,
9,2023-01-10,1493,148,2,74.57,0.24,113.3464,5,North,Online,...,2,40,Male,Electronics,3.4,Returned,Neutral,17.2,SAVE10,2.9


<div class="alert alert-success">

**Solution 01:**
</div>

In [3]:
# 1. Handling Missing Data in Pandas
# Write a Pandas program to fill missing values (NaN) in a DataFrame using fillna().
result_df = df.fillna({
    'shipping_cost': df['shipping_cost'].mean(),
    'coupon_code': 'No Coupon',
    'delivery_rating': df['delivery_rating'].mean()
})

result_df

Unnamed: 0,date,customer_id,product_id,quantity,price,discount,revenue,store_id,region,payment_method,...,delivery_days,customer_age,gender,product_category,rating,return_status,feedback,shipping_cost,coupon_code,delivery_rating
0,2023-01-01,1226,198,8,64.49,0.05,490.124,7,North,Online,...,2,50,Male,Electronics,3.7,Not Returned,Negative,13.41,WELCOME5,4.2
1,2023-01-02,1104,183,9,18.8,0.11,150.588,2,East,Credit Card,...,5,56,Female,Electronics,2.5,Returned,Positive,13.19,,1.7
2,2023-01-03,1598,110,1,79.65,0.28,57.348,7,West,Credit Card,...,2,65,Female,Food,3.7,Not Returned,Positive,13.75,,3.6
3,2023-01-04,1902,102,3,10.23,0.21,24.2451,1,North,Online,...,1,66,Female,Electronics,1.8,Not Returned,Neutral,12.84,SAVE10,3.2
4,2023-01-05,1647,107,2,53.53,0.2,85.648,7,West,Credit Card,...,5,69,Female,Books,1.7,Returned,Positive,13.13,WELCOME5,3.2
5,2023-01-06,1903,170,9,59.82,0.09,489.9258,6,South,Credit Card,...,1,23,Male,Books,2.6,Not Returned,Positive,12.2,FREESHIP,2.5
6,2023-01-07,1956,155,3,24.37,0.2,58.488,9,South,Online,...,1,62,Female,Food,1.7,Not Returned,Negative,10.02,,1.3
7,2023-01-08,1615,145,1,29.76,0.21,23.5104,9,East,Credit Card,...,2,41,Other,Electronics,3.6,Not Returned,Positive,13.41,FREESHIP,2.4
8,2023-01-09,1357,138,4,64.86,0.17,215.3352,3,West,Cash,...,5,49,Other,Electronics,3.9,Returned,Negative,12.9,,3.2
9,2023-01-10,1493,148,2,74.57,0.24,113.3464,5,North,Online,...,2,40,Male,Electronics,3.4,Returned,Neutral,17.2,SAVE10,2.9


<div class="alert alert-warning">

**2. Removing Missing Data observations in Pandas**

**Write a Pandas program to drop rows with missing data.**

</div>

In [4]:
# Create date range
date_range = pd.date_range(start='2023-01-01', periods=35, freq='D')

# Create a dictionary of data
data = {
    'date': date_range,
    'customer_id': np.random.randint(1000, 2000, size=35),
    'product_id': np.random.randint(100, 200, size=35),
    'quantity': np.random.randint(1, 10, size=35),
    'price': np.round(np.random.uniform(10.0, 100.0, size=35), 2),
    'discount': np.round(np.random.uniform(0, 0.3, size=35), 2),
    'revenue': lambda df: df['quantity'] * df['price'] * (1 - df['discount']),
    'store_id': np.random.randint(1, 10, size=35),
    'region': np.random.choice(['North', 'South', 'East', 'West'], size=35),
    'payment_method': np.random.choice(['Credit Card', 'Cash', 'Online'], size=35),
    'is_member': np.random.choice([True, False], size=35),
    'delivery_days': np.random.randint(1, 7, size=35),
    'customer_age': np.random.randint(18, 70, size=35),
    'gender': np.random.choice(['Male', 'Female', 'Other'], size=35),
    'product_category': np.random.choice(['Electronics', 'Clothing', 'Books', 'Food'], size=35),
    'rating': np.round(np.random.uniform(1.0, 5.0, size=35), 1),
    'return_status': np.random.choice(['Returned', 'Not Returned'], size=35),
    'feedback': np.random.choice(['Positive', 'Neutral', 'Negative'], size=35),
}

# Create DataFrame
df = pd.DataFrame(data)

# Compute revenue using the lambda function above
df['revenue'] = df['quantity'] * df['price'] * (1 - df['discount'])

# Add 'shipping_cost' with some missing values
df['shipping_cost'] = np.where(np.random.rand(len(df)) < 0.2, np.nan, np.round(np.random.uniform(5.0, 20.0, size=len(df)), 2))

# Add 'coupon_code' with some missing values
coupon_options = ['SAVE10', 'WELCOME5', 'FREESHIP', np.nan]
df['coupon_code'] = np.random.choice(coupon_options, size=len(df), p=[0.2, 0.2, 0.2, 0.4])

# Add 'delivery_rating' with some missing values
df['delivery_rating'] = np.where(np.random.rand(len(df)) < 0.3, np.nan, np.round(np.random.uniform(1.0, 5.0, size=len(df)), 1))

# Display the updated DataFrame
display(df)

Unnamed: 0,date,customer_id,product_id,quantity,price,discount,revenue,store_id,region,payment_method,...,delivery_days,customer_age,gender,product_category,rating,return_status,feedback,shipping_cost,coupon_code,delivery_rating
0,2023-01-01,1970,110,4,37.5,0.16,126.0,8,South,Cash,...,3,63,Female,Food,3.9,Returned,Positive,8.0,SAVE10,
1,2023-01-02,1582,125,6,33.94,0.26,150.6936,1,South,Online,...,4,36,Female,Electronics,1.7,Not Returned,Neutral,5.84,WELCOME5,3.8
2,2023-01-03,1481,129,9,48.04,0.11,384.8004,2,East,Cash,...,3,25,Male,Food,2.1,Not Returned,Positive,,FREESHIP,
3,2023-01-04,1135,157,5,98.49,0.03,477.6765,5,North,Credit Card,...,6,41,Other,Food,2.9,Not Returned,Positive,13.89,FREESHIP,
4,2023-01-05,1632,162,1,29.4,0.13,25.578,7,South,Cash,...,2,39,Female,Electronics,2.3,Returned,Negative,19.44,,1.6
5,2023-01-06,1329,126,7,83.63,0.12,515.1608,9,East,Online,...,5,41,Male,Food,2.8,Not Returned,Neutral,,,
6,2023-01-07,1740,169,6,87.73,0.06,494.7972,4,South,Online,...,6,42,Male,Clothing,2.2,Returned,Neutral,14.87,WELCOME5,1.0
7,2023-01-08,1106,154,5,52.01,0.29,184.6355,7,West,Cash,...,6,22,Other,Books,3.9,Returned,Neutral,13.58,WELCOME5,4.9
8,2023-01-09,1034,104,4,12.82,0.08,47.1776,7,South,Credit Card,...,1,63,Female,Clothing,1.9,Not Returned,Negative,14.61,,1.9
9,2023-01-10,1679,192,6,45.77,0.08,252.6504,4,South,Cash,...,5,55,Male,Electronics,2.9,Returned,Negative,10.15,,


<div class="alert alert-success">

**Solution 02:**
</div>

In [5]:
# 2. Handling Duplicates in Pandas
# Write a Pandas program to drop rows with missing data.

result_df = df.dropna()
result_df

Unnamed: 0,date,customer_id,product_id,quantity,price,discount,revenue,store_id,region,payment_method,...,delivery_days,customer_age,gender,product_category,rating,return_status,feedback,shipping_cost,coupon_code,delivery_rating
1,2023-01-02,1582,125,6,33.94,0.26,150.6936,1,South,Online,...,4,36,Female,Electronics,1.7,Not Returned,Neutral,5.84,WELCOME5,3.8
4,2023-01-05,1632,162,1,29.4,0.13,25.578,7,South,Cash,...,2,39,Female,Electronics,2.3,Returned,Negative,19.44,,1.6
6,2023-01-07,1740,169,6,87.73,0.06,494.7972,4,South,Online,...,6,42,Male,Clothing,2.2,Returned,Neutral,14.87,WELCOME5,1.0
7,2023-01-08,1106,154,5,52.01,0.29,184.6355,7,West,Cash,...,6,22,Other,Books,3.9,Returned,Neutral,13.58,WELCOME5,4.9
8,2023-01-09,1034,104,4,12.82,0.08,47.1776,7,South,Credit Card,...,1,63,Female,Clothing,1.9,Not Returned,Negative,14.61,,1.9
10,2023-01-11,1293,127,1,50.05,0.27,36.5365,8,North,Credit Card,...,4,38,Male,Clothing,1.2,Not Returned,Positive,16.97,WELCOME5,3.7
11,2023-01-12,1156,129,9,36.71,0.1,297.351,6,West,Online,...,5,56,Other,Electronics,1.6,Returned,Negative,14.56,,2.4
15,2023-01-16,1304,141,9,26.34,0.04,227.5776,7,West,Cash,...,4,61,Female,Clothing,3.8,Not Returned,Negative,7.22,,1.9
16,2023-01-17,1992,143,9,33.92,0.18,250.3296,1,South,Credit Card,...,2,49,Other,Electronics,3.5,Not Returned,Negative,17.82,,4.3
18,2023-01-19,1018,194,5,69.64,0.12,306.416,2,South,Online,...,4,52,Female,Electronics,1.0,Not Returned,Negative,14.81,FREESHIP,1.4


<div class="alert alert-warning">

**3. Detecting Duplicates in Pandas**

**Write a Pandas program to detect duplicates using duplicated() method.**

</div>

In [6]:
# Create date range
date_range = pd.date_range(start='2023-01-01', periods=35, freq='D')

# Create a dictionary of data
data = {
    'date': date_range,
    'customer_id': np.random.randint(1000, 2000, size=35),
    'product_id': np.random.randint(100, 200, size=35),
    'quantity': np.random.randint(1, 10, size=35),
    'price': np.round(np.random.uniform(10.0, 100.0, size=35), 2),
    'discount': np.round(np.random.uniform(0, 0.3, size=35), 2),
    'revenue': lambda df: df['quantity'] * df['price'] * (1 - df['discount']),
    'store_id': np.random.randint(1, 10, size=35),
    'region': np.random.choice(['North', 'South', 'East', 'West'], size=35),
    'payment_method': np.random.choice(['Credit Card', 'Cash', 'Online'], size=35),
    'is_member': np.random.choice([True, False], size=35),
    'delivery_days': np.random.randint(1, 7, size=35),
    'customer_age': np.random.randint(18, 70, size=35),
    'gender': np.random.choice(['Male', 'Female', 'Other'], size=35),
    'product_category': np.random.choice(['Electronics', 'Clothing', 'Books', 'Food'], size=35),
    'rating': np.round(np.random.uniform(1.0, 5.0, size=35), 1),
    'return_status': np.random.choice(['Returned', 'Not Returned'], size=35),
    'feedback': np.random.choice(['Positive', 'Neutral', 'Negative'], size=35),
}

# Create DataFrame
df = pd.DataFrame(data)

# Compute revenue using the lambda function above
df['revenue'] = df['quantity'] * df['price'] * (1 - df['discount'])

# Add 'shipping_cost' with some missing values
df['shipping_cost'] = np.where(np.random.rand(len(df)) < 0.2, np.nan, np.round(np.random.uniform(5.0, 20.0, size=len(df)), 2))

# Add 'coupon_code' with some missing values
coupon_options = ['SAVE10', 'WELCOME5', 'FREESHIP', np.nan]
df['coupon_code'] = np.random.choice(coupon_options, size=len(df), p=[0.2, 0.2, 0.2, 0.4])

# Add 'delivery_rating' with some missing values
df['delivery_rating'] = np.where(np.random.rand(len(df)) < 0.3, np.nan, np.round(np.random.uniform(1.0, 5.0, size=len(df)), 1))


duplicates = df.sample(n=5, replace=False)  # Select 5 random rows without replacement
df = pd.concat([df, duplicates], ignore_index=True)

# Display the updated DataFrame
display(df)

Unnamed: 0,date,customer_id,product_id,quantity,price,discount,revenue,store_id,region,payment_method,...,delivery_days,customer_age,gender,product_category,rating,return_status,feedback,shipping_cost,coupon_code,delivery_rating
0,2023-01-01,1087,171,6,77.82,0.06,438.9048,1,North,Credit Card,...,4,38,Female,Electronics,2.7,Not Returned,Negative,13.15,,
1,2023-01-02,1050,147,3,52.16,0.28,112.6656,3,North,Cash,...,6,48,Male,Electronics,1.8,Returned,Positive,,,2.7
2,2023-01-03,1279,187,4,27.03,0.2,86.496,3,West,Credit Card,...,1,53,Other,Books,2.8,Not Returned,Negative,11.63,WELCOME5,1.0
3,2023-01-04,1511,187,1,57.89,0.29,41.1019,1,West,Credit Card,...,4,21,Female,Food,1.2,Not Returned,Neutral,16.18,WELCOME5,3.8
4,2023-01-05,1476,137,8,78.49,0.18,514.8944,4,East,Cash,...,6,22,Other,Clothing,2.9,Not Returned,Positive,19.71,,2.5
5,2023-01-06,1127,126,4,83.79,0.15,284.886,7,East,Cash,...,3,46,Female,Books,3.4,Returned,Neutral,,SAVE10,
6,2023-01-07,1444,135,5,80.19,0.08,368.874,3,West,Credit Card,...,1,59,Other,Books,3.0,Not Returned,Positive,,,3.0
7,2023-01-08,1509,113,8,31.84,0.24,193.5872,1,South,Cash,...,6,65,Male,Electronics,4.4,Returned,Neutral,12.69,FREESHIP,3.0
8,2023-01-09,1008,139,6,84.23,0.14,434.6268,7,South,Credit Card,...,4,61,Female,Clothing,3.5,Not Returned,Positive,,,2.2
9,2023-01-10,1020,188,3,71.54,0.16,180.2808,9,East,Online,...,1,25,Male,Food,2.7,Returned,Negative,16.46,WELCOME5,2.5


<div class="alert alert-success">

**Solution 03:**
</div>

In [7]:
# 3. Data Cleaning Techniques
# Write a Pandas program to detect duplicates using duplicated() method.
duplicates = df.duplicated()
duplicates_df = df[duplicates]
duplicates_df

Unnamed: 0,date,customer_id,product_id,quantity,price,discount,revenue,store_id,region,payment_method,...,delivery_days,customer_age,gender,product_category,rating,return_status,feedback,shipping_cost,coupon_code,delivery_rating
35,2023-01-28,1151,109,2,87.44,0.28,125.9136,3,West,Online,...,4,31,Female,Clothing,3.3,Not Returned,Neutral,18.12,FREESHIP,1.8
36,2023-01-29,1412,118,6,35.58,0.05,202.806,6,North,Online,...,6,23,Female,Clothing,4.7,Not Returned,Negative,,,4.9
37,2023-01-31,1024,145,5,83.17,0.3,291.095,4,West,Credit Card,...,6,51,Other,Clothing,4.3,Returned,Positive,9.41,FREESHIP,3.1
38,2023-02-02,1748,192,5,89.31,0.26,330.447,8,North,Cash,...,6,23,Female,Food,2.4,Returned,Neutral,5.83,SAVE10,
39,2023-01-17,1703,163,3,99.79,0.09,272.4267,4,West,Cash,...,3,49,Male,Food,2.1,Returned,Positive,10.25,,


<div class="alert alert-warning">

**4. Handling Duplicates in Pandas**

**Write a Pandas program to remove duplicates rows from a DataFrame.**

</div>

In [8]:
# Create date range
date_range = pd.date_range(start='2023-01-01', periods=35, freq='D')

# Create a dictionary of data
data = {
    'date': date_range,
    'customer_id': np.random.randint(1000, 2000, size=35),
    'product_id': np.random.randint(100, 200, size=35),
    'quantity': np.random.randint(1, 10, size=35),
    'price': np.round(np.random.uniform(10.0, 100.0, size=35), 2),
    'discount': np.round(np.random.uniform(0, 0.3, size=35), 2),
    'revenue': lambda df: df['quantity'] * df['price'] * (1 - df['discount']),
    'store_id': np.random.randint(1, 10, size=35),
    'region': np.random.choice(['North', 'South', 'East', 'West'], size=35),
    'payment_method': np.random.choice(['Credit Card', 'Cash', 'Online'], size=35),
    'is_member': np.random.choice([True, False], size=35),
    'delivery_days': np.random.randint(1, 7, size=35),
    'customer_age': np.random.randint(18, 70, size=35),
    'gender': np.random.choice(['Male', 'Female', 'Other'], size=35),
    'product_category': np.random.choice(['Electronics', 'Clothing', 'Books', 'Food'], size=35),
    'rating': np.round(np.random.uniform(1.0, 5.0, size=35), 1),
    'return_status': np.random.choice(['Returned', 'Not Returned'], size=35),
    'feedback': np.random.choice(['Positive', 'Neutral', 'Negative'], size=35),
}

# Create DataFrame
df = pd.DataFrame(data)

# Compute revenue using the lambda function above
df['revenue'] = df['quantity'] * df['price'] * (1 - df['discount'])

# Add 'shipping_cost' with some missing values
df['shipping_cost'] = np.where(np.random.rand(len(df)) < 0.2, np.nan, np.round(np.random.uniform(5.0, 20.0, size=len(df)), 2))

# Add 'coupon_code' with some missing values
coupon_options = ['SAVE10', 'WELCOME5', 'FREESHIP', np.nan]
df['coupon_code'] = np.random.choice(coupon_options, size=len(df), p=[0.2, 0.2, 0.2, 0.4])

# Add 'delivery_rating' with some missing values
df['delivery_rating'] = np.where(np.random.rand(len(df)) < 0.3, np.nan, np.round(np.random.uniform(1.0, 5.0, size=len(df)), 1))


duplicates = df.sample(n=5, replace=False)  # Select 5 random rows without replacement
df = pd.concat([df, duplicates], ignore_index=True)

# Display the updated DataFrame
display(df)

Unnamed: 0,date,customer_id,product_id,quantity,price,discount,revenue,store_id,region,payment_method,...,delivery_days,customer_age,gender,product_category,rating,return_status,feedback,shipping_cost,coupon_code,delivery_rating
0,2023-01-01,1318,104,6,71.6,0.18,352.272,2,North,Cash,...,4,67,Female,Books,2.0,Returned,Negative,,,
1,2023-01-02,1770,104,7,65.97,0.08,424.8468,4,South,Cash,...,4,40,Female,Clothing,3.1,Returned,Positive,13.97,,2.3
2,2023-01-03,1463,151,5,77.43,0.11,344.5635,4,East,Credit Card,...,4,59,Other,Books,3.5,Not Returned,Negative,8.1,SAVE10,4.6
3,2023-01-04,1434,159,1,30.51,0.27,22.2723,5,North,Online,...,6,60,Female,Electronics,4.2,Returned,Neutral,12.84,WELCOME5,3.4
4,2023-01-05,1237,146,6,22.36,0.25,100.62,3,South,Cash,...,6,61,Female,Food,3.9,Not Returned,Negative,,,
5,2023-01-06,1905,102,5,32.84,0.08,151.064,6,South,Cash,...,6,27,Female,Electronics,2.9,Returned,Negative,10.46,WELCOME5,5.0
6,2023-01-07,1495,116,9,45.71,0.22,320.8842,9,South,Cash,...,1,44,Other,Clothing,3.7,Returned,Negative,5.42,FREESHIP,3.3
7,2023-01-08,1860,117,4,70.18,0.11,249.8408,4,North,Online,...,2,61,Male,Books,5.0,Not Returned,Neutral,,FREESHIP,3.9
8,2023-01-09,1398,195,8,23.19,0.27,135.4296,8,North,Online,...,2,41,Male,Clothing,3.5,Returned,Positive,18.48,WELCOME5,
9,2023-01-10,1397,177,8,43.17,0.24,262.4736,4,West,Credit Card,...,2,51,Male,Clothing,3.5,Returned,Neutral,7.87,FREESHIP,2.2


<div class="alert alert-success">

**Solution 04:**
</div>

In [9]:
df.drop_duplicates()

Unnamed: 0,date,customer_id,product_id,quantity,price,discount,revenue,store_id,region,payment_method,...,delivery_days,customer_age,gender,product_category,rating,return_status,feedback,shipping_cost,coupon_code,delivery_rating
0,2023-01-01,1318,104,6,71.6,0.18,352.272,2,North,Cash,...,4,67,Female,Books,2.0,Returned,Negative,,,
1,2023-01-02,1770,104,7,65.97,0.08,424.8468,4,South,Cash,...,4,40,Female,Clothing,3.1,Returned,Positive,13.97,,2.3
2,2023-01-03,1463,151,5,77.43,0.11,344.5635,4,East,Credit Card,...,4,59,Other,Books,3.5,Not Returned,Negative,8.1,SAVE10,4.6
3,2023-01-04,1434,159,1,30.51,0.27,22.2723,5,North,Online,...,6,60,Female,Electronics,4.2,Returned,Neutral,12.84,WELCOME5,3.4
4,2023-01-05,1237,146,6,22.36,0.25,100.62,3,South,Cash,...,6,61,Female,Food,3.9,Not Returned,Negative,,,
5,2023-01-06,1905,102,5,32.84,0.08,151.064,6,South,Cash,...,6,27,Female,Electronics,2.9,Returned,Negative,10.46,WELCOME5,5.0
6,2023-01-07,1495,116,9,45.71,0.22,320.8842,9,South,Cash,...,1,44,Other,Clothing,3.7,Returned,Negative,5.42,FREESHIP,3.3
7,2023-01-08,1860,117,4,70.18,0.11,249.8408,4,North,Online,...,2,61,Male,Books,5.0,Not Returned,Neutral,,FREESHIP,3.9
8,2023-01-09,1398,195,8,23.19,0.27,135.4296,8,North,Online,...,2,41,Male,Clothing,3.5,Returned,Positive,18.48,WELCOME5,
9,2023-01-10,1397,177,8,43.17,0.24,262.4736,4,West,Credit Card,...,2,51,Male,Clothing,3.5,Returned,Neutral,7.87,FREESHIP,2.2


In [10]:
# 4. Handling Duplicates in Pandas
# Write a Pandas program to remove duplicates rows from a DataFrame.

df = df.drop_duplicates()
df.reset_index(drop=True, inplace=True)