##   Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
import plotly.express as px

## Load the Dataset

In [None]:
df = pd.read_csv("ecommerce_sales.csv")
df.head()

## Clean the Data

In [None]:
df.dropna(inplace=True)

# Convert Order Date to datetime
df['Order Date'] = pd.to_datetime(df['Order Date'], errors='coerce')
df = df[df['Order Date'].notnull()]

# Create Month column
df['Month'] = df['Order Date'].dt.month

# Add Sales column
df['Sales'] = df['Quantity Ordered'] * df['Price Each']

# Extract city from address
def get_city(address):
    return address.split(',')[1].strip() + ' (' + address.split(',')[2].split(' ')[1] + ')'

df['City'] = df['Purchase Address'].apply(get_city)

df.head()

##  Monthly Sales Analysis

In [None]:
monthly_sales = df.groupby('Month').sum(numeric_only=True)['Sales']

fig = px.line(x=monthly_sales.index, y=monthly_sales.values,
              labels={'x': 'Month', 'y': 'Sales in USD'},
              title='📈 Monthly Sales Trend')
fig.show()

## Sales by City

In [None]:
city_sales = df.groupby('City').sum(numeric_only=True)['Sales']

fig = px.bar(x=city_sales.index, y=city_sales.values,
             labels={'x': 'City', 'y': 'Sales in USD'},
             title='🏙️ Sales by City')
fig.update_layout(xaxis_tickangle=-45)
fig.show()

## Best Selling Products

In [None]:
product_group = df.groupby('Product').sum(numeric_only=True)
quantity_ordered = product_group['Quantity Ordered']
products = product_group.index

fig = px.bar(x=products, y=quantity_ordered,
             labels={'x': 'Product', 'y': 'Quantity Sold'},
             title='🔥 Best Selling Products')
fig.update_layout(xaxis_tickangle=45)
fig.show()

## Correlation - Price vs Quantity Ordered

In [None]:
fig = px.scatter(df, x='Price Each', y='Quantity Ordered',
                 title='💸 Price vs Quantity Ordered',
                 labels={'Price Each': 'Product Price', 'Quantity Ordered': 'Qty Sold'},
                 opacity=0.6)
fig.show()

## Hourly Order Trends

In [None]:
df['Hour'] = df['Order Date'].dt.hour
hourly_orders = df.groupby('Hour').count()['Order ID']

fig = px.line(x=hourly_orders.index, y=hourly_orders.values,
              labels={'x': 'Hour of the Day', 'y': 'Number of Orders'},
              title='⏰ Peak Order Hours')
fig.show()

## Save Cleaned Dataset

In [None]:
df.to_csv("cleaned_ecommerce_sales.csv", index=False)