In [9]:
import pandas as pd

# 1. Load dataset
df = pd.read_csv("retail_store_sales.csv")

# 2. Clean Item column (fill missing with mode)
df['Item'] = df['Item'].fillna(df['Item'].mode()[0])

# 3. Convert numeric columns
df['Price Per Unit'] = pd.to_numeric(df['Price Per Unit'], errors='coerce')
df['Quantity'] = pd.to_numeric(df['Quantity'], errors='coerce')

# 4. Fill missing numeric values with median
df['Price Per Unit'] = df['Price Per Unit'].fillna(df['Price Per Unit'].median())
df['Quantity'] = df['Quantity'].fillna(df['Quantity'].median())

# 5. Recalculate Total Spent
df['Total Spent'] = df['Price Per Unit'] * df['Quantity']

# 6. Fill Discount Applied with False
# Convert Discount Applied to uppercase strings
df['Discount Applied'] = df['Discount Applied'].astype(str).str.upper()

# Map TRUE/FALSE strings to boolean
df['Discount Applied'] = df['Discount Applied'].map({'TRUE': True, 'FALSE': False})

# Fill any missing values with False
df['Discount Applied'] = df['Discount Applied'].fillna(False)

# 7. Convert Transaction Date to datetime
df['Transaction Date'] = pd.to_datetime(df['Transaction Date'], errors='coerce')

# 8. Extract useful parts of date (optional)
df['Year'] = df['Transaction Date'].dt.year
df['Month'] = df['Transaction Date'].dt.month
df['Day'] = df['Transaction Date'].dt.day

# # 9. Save cleaned data to Excel
# df.to_excel("cleaned_transactions.xlsx", index=False)

# print("✅ Cleaned dataset saved as cleaned_transactions.xlsx")


print(df.dtypes)

Transaction ID              object
Customer ID                 object
Category                    object
Item                        object
Price Per Unit             float64
Quantity                   float64
Total Spent                float64
Payment Method              object
Location                    object
Transaction Date    datetime64[ns]
Discount Applied              bool
Year                         int32
Month                        int32
Day                          int32
dtype: object


  df['Discount Applied'] = df['Discount Applied'].fillna(False)
