In [1]:
import pandas as pd

data = {
    "Customer_ID": [101, 102, 103, 104, 105, 106, 107, 108],
    "Customer_Name": ["Alice", "Bob", "Charlie", "David", "Eva", "Frank", "Grace", "Helen"],
    "Product_Category": ["Electronics", "Furniture", "Clothing", "Electronics", 
                         "Furniture", "Electronics", "Clothing", "Furniture"],
    "Purchase_Amount": [1200, 800, 150, 600, 2000, 300, 450, 1800],
    "Items_Purchased": [1, 2, 5, 1, 3, 2, 4, 5],
    "City": ["New York", "San Francisco", "Los Angeles", "New York", 
             "Chicago", "San Francisco", "Los Angeles", "New York"],
    "Purchase_Date": pd.to_datetime(["2025-01-05", "2025-01-10", "2025-01-12", "2025-01-15",
                                     "2025-01-18", "2025-01-20", "2025-01-22", "2025-01-25"])
}

df = pd.DataFrame(data)

# 1. Load customer purchase data
print("Customer Purchase Dataset:\n", df, "\n")

# 2. Display first and last few rows
print("First 5 rows:\n", df.head(), "\n")
print("Last 5 rows:\n", df.tail(), "\n")

# 3. Select specific columns
print("Selected columns:\n", df[["Customer_ID", "Product_Category", "Purchase_Amount"]], "\n")

# 4. Customers who purchased Electronics
electronics_customers = df[df["Product_Category"] == "Electronics"]
print("Customers who purchased Electronics:\n", electronics_customers, "\n")

# 5. Customers with Purchase Amount > 500
high_purchase = df[df["Purchase_Amount"] > 500]
print("Customers with Purchase Amount > $500:\n", high_purchase, "\n")

# 6. Transactions between two specific dates
date_filtered = df[df["Purchase_Date"].between("2025-01-10", "2025-01-20")]
print("Transactions between 2025-01-10 and 2025-01-20:\n", date_filtered, "\n")

# 7. Customers in New York or San Francisco
cities_filtered = df[df["City"].isin(["New York", "San Francisco"])]
print("Customers in New York or San Francisco:\n", cities_filtered, "\n")

# 8. Select specific rows using .loc[] and .iloc[]
loc_example = df.loc[2:4]  # rows 2 to 4
iloc_example = df.iloc[2:5]  # rows 2 to 4 (iloc is exclusive at the end)
print("Rows using loc[]:\n", loc_example, "\n")
print("Rows using iloc[]:\n", iloc_example, "\n")

# 9. Change index to Customer_ID
df_indexed = df.set_index("Customer_ID")
print("DataFrame with Customer_ID as index:\n", df_indexed, "\n")

# 10. Reset index back to default
df_reset = df_indexed.reset_index()
print("DataFrame after resetting index:\n", df_reset, "\n")

# 11. High-value customers: Purchase > $1000 in Furniture
high_value_furniture = df[(df["Purchase_Amount"] > 1000) & (df["Product_Category"] == "Furniture")]
print("High-value Furniture Customers:\n", high_value_furniture, "\n")

# 12. Customers with Items Purchased between 3 and 10
items_filtered = df[df["Items_Purchased"].between(3, 10)]
print("Customers with 3 to 10 items purchased:\n", items_filtered, "\n")

#13. Boolean filtering for purchases > $500 AND region = "New York"
filtered_customers = df[(df["Purchase_Amount"] > 500) & (df["City"] == "New York")]

print("Customers with Purchase Amount > $500 in New York:\n", filtered_customers)

Customer Purchase Dataset:
    Customer_ID Customer_Name Product_Category  Purchase_Amount  \
0          101         Alice      Electronics             1200   
1          102           Bob        Furniture              800   
2          103       Charlie         Clothing              150   
3          104         David      Electronics              600   
4          105           Eva        Furniture             2000   
5          106         Frank      Electronics              300   
6          107         Grace         Clothing              450   
7          108         Helen        Furniture             1800   

   Items_Purchased           City Purchase_Date  
0                1       New York    2025-01-05  
1                2  San Francisco    2025-01-10  
2                5    Los Angeles    2025-01-12  
3                1       New York    2025-01-15  
4                3        Chicago    2025-01-18  
5                2  San Francisco    2025-01-20  
6                4    Los A