In [2]:
import pandas as pd

customers_data = {
    "CustomerID": [101,102,103,104,105],
    "Name": ["Alice","Bob","Charlie","David","Eva"],
    "Email": ["alice@example.com","bob@example.com","charlie@example.com","david@example.com","eva@example.com"],
    "Location": ["New York","Los Angeles","Chicago","Houston","Phoenix"]
}
df_customers = pd.DataFrame(customers_data)

purchase_data = {
    "CustomerID": [101,102,103,104,105,101,103],
    "OrderID": [201,202,203,204,205,206,207],
    "Product": ["Laptop","Smartphone","Headphones","Monitor","Keyboard","Mouse","Tablet"],
    "Amount": [1200,800,150,300,100,50,400]
}
df_purchase = pd.DataFrame(purchase_data)

products_data = {
    "ProductID": [1,2,3,4,5,6,7],
    "Product": ["Laptop","Smartphone","Headphones","Monitor","Keyboard","Mouse","Tablet"],
    "Category": ["Electronics","Electronics","Electronics","Electronics","Electronics","Electronics","Electronics"],
    "Price": [1200,800,150,300,100,50,400]
}
df_products = pd.DataFrame(products_data)

df_customers.to_csv("customers.csv", index=False)
df_purchase.to_csv("purchase_history.csv", index=False)
df_products.to_csv("products.csv", index=False)
print("✅ Sample data CSV files created")


✅ Sample data CSV files created


In [3]:

# 1️ Read all datasets

df_customers = pd.read_csv("customers.csv")
df_purchase = pd.read_csv("purchase_history.csv")
df_products = pd.read_csv("products.csv")

# 2️ Merge sales and customer data
df_customer_sales = pd.merge(df_purchase, df_customers, on="CustomerID", how="left")
print("\nCustomer + Purchase History:\n", df_customer_sales)


# 3️ Join product details with purchase history
df_full = pd.merge(df_customer_sales, df_products, on="Product", how="left", suffixes=('_purchase','_product'))
print("\nFull Integrated Dataset:\n", df_full)


# 4️ Concatenate new customer records
new_customers = pd.DataFrame({
    "CustomerID":[106,107],
    "Name":["Frank","Grace"],
    "Email":["frank@example.com","grace@example.com"],
    "Location":["Seattle","Boston"]
})
df_customers_all = pd.concat([df_customers, new_customers], ignore_index=True)
print("\nAll Customers after concatenation:\n", df_customers_all)


# 5️ Handle duplicate columns
# Already handled by suffixes in merge (Amount_purchase, Product_product, etc.)
# You can rename columns if needed
df_full.rename(columns={"Amount":"PurchaseAmount"}, inplace=True)

# 6️ Export final integrated dataset
df_full.to_csv("final_integrated_sales_data.csv", index=False)
print("✅ Final integrated dataset exported to final_integrated_sales_data.csv")



Customer + Purchase History:
    CustomerID  OrderID     Product  Amount     Name                Email  \
0         101      201      Laptop    1200    Alice    alice@example.com   
1         102      202  Smartphone     800      Bob      bob@example.com   
2         103      203  Headphones     150  Charlie  charlie@example.com   
3         104      204     Monitor     300    David    david@example.com   
4         105      205    Keyboard     100      Eva      eva@example.com   
5         101      206       Mouse      50    Alice    alice@example.com   
6         103      207      Tablet     400  Charlie  charlie@example.com   

      Location  
0     New York  
1  Los Angeles  
2      Chicago  
3      Houston  
4      Phoenix  
5     New York  
6      Chicago  

Full Integrated Dataset:
    CustomerID  OrderID     Product  Amount     Name                Email  \
0         101      201      Laptop    1200    Alice    alice@example.com   
1         102      202  Smartphone     800   