In [4]:
import pandas as pd

# -------------------------------
# 1. Load the Sales Data (CSV)
# -------------------------------
df = pd.read_csv("Sales Data.csv")

print("First 5 Rows:")
print(df.head(), "\n")

# -------------------------------
# 2. Explore Data Structure
# -------------------------------
print("Dataset Info:")
print(df.info(), "\n")

print("Missing Values per Column:")
print(df.isnull().sum(), "\n")

# -------------------------------
# 3. Data Cleaning
# -------------------------------
# Remove unnecessary column
if 'Unnamed: 0' in df.columns:
    df = df.drop('Unnamed: 0', axis=1)

# Remove duplicates
df = df.drop_duplicates()

# Convert Order Date to datetime
df['Order Date'] = pd.to_datetime(df['Order Date'])

# Check for negative or zero sales
df = df[df['Sales'] > 0]

print("Cleaned Data:")
print(df.head(), "\n")

# -------------------------------
# 4. Transformation & Analysis
# -------------------------------
# Example 1: Total Sales per City
city_sales = df.groupby('City')['Sales'].sum().reset_index().sort_values('Sales', ascending=False)
print("Total Sales by City:")
print(city_sales, "\n")

# Example 2: Most Sold Products
top_products = df.groupby('Product')['Quantity Ordered'].sum().reset_index().sort_values('Quantity Ordered', ascending=False)
print("Top 5 Products by Quantity Ordered:")
print(top_products.head(), "\n")

# Example 3: Average Order Value per Month
monthly_avg = df.groupby('Month')['Sales'].mean().reset_index()
print("Average Sales per Month:")
print(monthly_avg, "\n")

# -------------------------------
# 5. Save Cleaned Unified Data
# -------------------------------
df.to_csv("Cleaned_Sales_Data.csv", index=False)
print("Cleaned data saved as 'Cleaned_Sales_Data.csv'")
print(df['Sales'].sum())
# Apply multiple aggregate functions to one column
# print(df['A'].agg(['sum', 'mean', 'min', 'max', 'std']))



First 5 Rows:
   Unnamed: 0  Order ID               Product  Quantity Ordered  Price Each  \
0           0    295665    Macbook Pro Laptop                 1     1700.00   
1           1    295666    LG Washing Machine                 1      600.00   
2           2    295667  USB-C Charging Cable                 1       11.95   
3           3    295668      27in FHD Monitor                 1      149.99   
4           4    295669  USB-C Charging Cable                 1       11.95   

            Order Date                        Purchase Address  Month  \
0  2019-12-30 00:01:00  136 Church St, New York City, NY 10001     12   
1  2019-12-29 07:03:00     562 2nd St, New York City, NY 10001     12   
2  2019-12-12 18:21:00    277 Main St, New York City, NY 10001     12   
3  2019-12-22 15:13:00     410 6th St, San Francisco, CA 94016     12   
4  2019-12-18 12:38:00           43 Hill St, Atlanta, GA 30301     12   

     Sales            City  Hour  
0  1700.00   New York City     0  
1 