In [8]:
import pandas as pd
import numpy as np

pd.set_option("display.width",140)
pd.set_option("display.max_rows", 40)

df = pd.DataFrame({
    "product": ["Mug", "Shirt", None, "Hat", "Mug", "Shirt"],
    "price": [5.0, 15.0, 12.0, None, 5.0, np.nan],
    "category": ["Home", "Clothes", "Clothes", "Accessories", None, "Clothes"]
})

	#	•	None = Python’s built-in “nothing.”
	# •	np.nan = NumPy’s special “not a number.” Pandas treats both as NA (missing).

df.isna().sum()     # .isna() → returns true/false for missing values -- .sum() → counts them (True = 1, False = 0).

df.fillna(value=1)  #.fillna() replaces NA with a value you decide.

df.dropna()         #Removes rows that contain NA in any column (or selected columns if subset=).

print(df.dtypes)

df["cleanProduct"] = df["product"].str.strip().str.lower()  # .str.strip() → removes spaces start/end -- .str.lower() → lowercase everything

catalog = pd.DataFrame({
    "SKU": ["A1", "B2", "C3", "d4 "],
    "ProductName": [" Mug ", "t-shirt", "Hat", "Jeans"],
    "Category": ["HOME", "clothes ", None, "clothes"]
})

sales = pd.DataFrame({
    "InvoiceNo": [101,102,103,104],
    "SKU": ["A1","B2","C3","D4"],
    "Quantity": [2,1,3,1]
})

catalog["ProductName"] = catalog["ProductName"].str.strip().str.lower()
catalog["Category"] = catalog["Category"].str.strip().str.lower().fillna("unknown")
catalog["SKU"] = catalog["SKU"].str.upper().str.strip()

print(catalog)
print(sales)

dfFull = sales.merge(catalog, on="SKU", how="left")

print(dfFull)

prices = {"A1": 5.0, "B2": 15.0, "C3": 10.0, "D4": 20.0}

dfFull["unitPrice"] = dfFull["SKU"].map(prices)
dfFull["revenue"] = dfFull["Quantity"] * dfFull["unitPrice"]
print(dfFull)

discounts = {"A1": 0.0, "B2": 0.1, "C3": 0.15, "D4": 0.05}

dfFull["discount"] = dfFull["SKU"].map(discounts).fillna(0.0)
dfFull["net_revenue"] = dfFull["revenue"] * (1 - dfFull["discount"])

print(dfFull)




product      object
price       float64
category     object
dtype: object
  SKU ProductName Category
0  A1         mug     home
1  B2     t-shirt  clothes
2  C3         hat  unknown
3  D4       jeans  clothes
   InvoiceNo SKU  Quantity
0        101  A1         2
1        102  B2         1
2        103  C3         3
3        104  D4         1
   InvoiceNo SKU  Quantity ProductName Category
0        101  A1         2         mug     home
1        102  B2         1     t-shirt  clothes
2        103  C3         3         hat  unknown
3        104  D4         1       jeans  clothes
   InvoiceNo SKU  Quantity ProductName Category  unitPrice  revenue
0        101  A1         2         mug     home        5.0     10.0
1        102  B2         1     t-shirt  clothes       15.0     15.0
2        103  C3         3         hat  unknown       10.0     30.0
3        104  D4         1       jeans  clothes       20.0     20.0
   InvoiceNo SKU  Quantity ProductName Category  unitPrice  revenue  discoun