In [1]:
import pandas as pd

data = {
    "Name": ["Product A", "Product B", "Product C"],
    "Price": ["100", "250", "300"],     
    "Stock": ["50", "0", "120"],       
    "ReleaseDate": ["2023-01-15", "2022-11-20", "2023-03-05"],
    "Availability": ["in_stock", "pre_order", "in_stock"]      
}

df = pd.DataFrame(data)

print("Original DataFrame:")
print(df)
print("\n Original Data Types:")
print(df.dtypes)

df["Price"] = pd.to_numeric(df["Price"], errors="coerce")
df["Stock"] = pd.to_numeric(df["Stock"], errors="coerce")
df["ReleaseDate"] = pd.to_datetime(df["ReleaseDate"], errors="coerce")
df["Availability"] = df["Availability"].astype("category")

print("\n Cleaned Data Types:")
print(df.dtypes)

print("\n Cleaned DataFrame:")
print(df)


Original DataFrame:
        Name Price Stock ReleaseDate Availability
0  Product A   100    50  2023-01-15     in_stock
1  Product B   250     0  2022-11-20    pre_order
2  Product C   300   120  2023-03-05     in_stock

 Original Data Types:
Name            object
Price           object
Stock           object
ReleaseDate     object
Availability    object
dtype: object

 Cleaned Data Types:
Name                    object
Price                    int64
Stock                    int64
ReleaseDate     datetime64[ns]
Availability          category
dtype: object

 Cleaned DataFrame:
        Name  Price  Stock ReleaseDate Availability
0  Product A    100     50  2023-01-15     in_stock
1  Product B    250      0  2022-11-20    pre_order
2  Product C    300    120  2023-03-05     in_stock


In [None]:
import pandas as pd
import numpy as np

data = {
    "Name": ["Product A", "Product B", None],
    "Price": [100, None, None],
    "Stock": [50, None, None],
    "ReleaseDate": [None, None, None], 
    "Notes": [np.nan, "Special offer", np.nan] 
}

df = pd.DataFrame(data)

print("Original DataFrame:")
print(df)

empty_cols = df.columns[df.isna().all()].tolist()
print("\nColumns entirely empty:", empty_cols)
ws ---
empty_rows = df.index[df.isna().all(axis=1)].tolist()
print("Rows entirely empty:", empty_rows)

# --- 3. Drop entirely empty columns & rows ---
df_cleaned = df.dropna(axis=1, how="all")  # drop columns
df_cleaned = df_cleaned.dropna(axis=0, how="all")  # drop rows

print("\n✅ Cleaned DataFrame (after dropping empty rows/cols):")
print(df_cleaned)

# --- 4. Detect nearly empty columns (e.g., more than 70% NaN) ---
threshold = 0.7
nearly_empty_cols = df.columns[(df.isna().mean() > threshold)].tolist()
print("\nColumns nearly empty (>70% missing):", nearly_empty_cols)
