In [1]:
# Step 1: Import Required Libraries
import pandas as pd
import numpy as np

# Step 2: Create Sample Customer Feedback Data
data = {
    "Feedback_ID": [101, 102, 103, 104, 105, 106, 107, 107],
    "Customer_Name": ["Alice", "Bob", None, "David", "Eva", "Frank", "Grace", "Grace"],
    "Email": ["alice@mail.com", "bob@mail.com", "charlie@mail.com", None, "eva@mail.com", "frank@mail.com", "grace@mail.com", "grace@mail.com"],
    "Feedback_Text": ["Great service!", None, "Average experience", "Loved the product", "Could be better", "Excellent!", "Good support", "Good support"],
    "Rating": [5, 4, None, 5, 3, None, 4, 4],
    "Feedback_Date": ["2024-01-15", "2024-02-10", "2024-03-05", None, "2024-04-20", "2024-05-30", "2024-06-12", "2024-06-12"]
}

df = pd.DataFrame(data)
print("Original Data:\n", df)

# Step 3: Identify Missing Values
print("\nMissing Values:\n", df.isnull().sum())

# Step 4: Drop Rows with Missing Email (Critical Data)
df = df.dropna(subset=["Email"])
print("\nAfter Dropping Rows with Missing Email:\n", df)

# Step 5: Fill Missing Values
df["Customer_Name"].fillna("Unknown", inplace=True)  # Fill missing customer names
df["Feedback_Text"].fillna("No feedback provided", inplace=True)  # Fill missing feedback text
df["Rating"].fillna(df["Rating"].median(), inplace=True)  # Fill missing ratings with median

print("\nAfter Filling Missing Values:\n", df)

# Step 6: Convert Feedback_Date to Date Format
df["Feedback_Date"] = pd.to_datetime(df["Feedback_Date"])
print("\nData Types After Conversion:\n", df.dtypes)

# Step 7: Remove Duplicate Feedback Records
df = df.drop_duplicates()
print("\nAfter Removing Duplicates:\n", df)

# Step 8: Rename Columns for Clarity
df = df.rename(columns={
    "Feedback_Text": "Comment",
    "Rating": "Score",
    "Feedback_Date": "Date_Submitted"
})
print("\nFinal Cleaned Customer Feedback Data:\n", df.head())


Original Data:
    Feedback_ID Customer_Name             Email       Feedback_Text  Rating  \
0          101         Alice    alice@mail.com      Great service!     5.0   
1          102           Bob      bob@mail.com                None     4.0   
2          103          None  charlie@mail.com  Average experience     NaN   
3          104         David              None   Loved the product     5.0   
4          105           Eva      eva@mail.com     Could be better     3.0   
5          106         Frank    frank@mail.com          Excellent!     NaN   
6          107         Grace    grace@mail.com        Good support     4.0   
7          107         Grace    grace@mail.com        Good support     4.0   

  Feedback_Date  
0    2024-01-15  
1    2024-02-10  
2    2024-03-05  
3          None  
4    2024-04-20  
5    2024-05-30  
6    2024-06-12  
7    2024-06-12  

Missing Values:
 Feedback_ID      0
Customer_Name    1
Email            1
Feedback_Text    1
Rating           2
Feedba

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Customer_Name"].fillna("Unknown", inplace=True)  # Fill missing customer names
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Feedback_Text"].fillna("No feedback provided", inplace=True)  # Fill missing feedback text
The behavior will change in pandas 3.0. This inplace m