In [13]:
import pandas as pd

In [14]:
# Sample data with validation issues
data = {
   'price': [100, 80, 120, 90],
   'cost': [70, 90, 80, 60],
   'date': ['2024-01-01', '2025-06-01', '2023-12-31', '2024-03-15'],
   'email': ['user@domain.com', 'invalid.email', 'another@domain.com', '@nocontent.com']
}
df = pd.DataFrame(data)
# Convert date type
df['date'] = pd.to_datetime(df['date'])
df

Unnamed: 0,price,cost,date,email
0,100,70,2024-01-01,user@domain.com
1,80,90,2025-06-01,invalid.email
2,120,80,2023-12-31,another@domain.com
3,90,60,2024-03-15,@nocontent.com


In [15]:
# Business rule: price > cost
profit = df['price'] - df['cost']
profit_issues = df[profit < 0].index
if len(profit_issues) > 0:
   print(f"Negative profit at rows: {profit_issues[0]}")

Negative profit at rows: 1


In [16]:
# Date validation: not in future
future_dates = df[df['date'].dt.year > 2024].index
if len(future_dates) > 0:
    print(f"Future dates at rows: {future_dates.tolist()}")
   

Future dates at rows: [1]


In [17]:
# Email pattern validation
email_pattern = r'^[\w\.-]+@[\w\.-]+\.\w+$'   # That's a regex that matches email address
# now, get an Index object with just those bad email entries
invalid_emails = df[~df['email'].str.match(email_pattern)].index
if len(invalid_emails) > 0:
    print(f"Invalid emails at rows: {invalid_emails.tolist()}")

Invalid emails at rows: [1, 3]
