# Shopify Sales — Exploratory Data Analysis

This notebook explores the generated Shopify orders dataset for a fictional
**Beauty & Skincare** online store (~65 000 orders, 18 months).


In [None]:
import pandas as pd
import plotly.express as px
import plotly.io as pio

pio.templates.default = 'plotly_white'

df = pd.read_csv('data/shopify_orders.csv', parse_dates=['order_date'])
df['order_month'] = df['order_date'].dt.to_period('M').dt.to_timestamp()
df.head()


## Dataset Overview


In [None]:
print(f'Rows : {len(df):,}')
print(f'Columns: {df.shape[1]}')
print(f'Date range: {df["order_date"].min().date()} → {df["order_date"].max().date()}')
df.describe()


## Revenue Over Time


In [None]:
rev = df.groupby('order_month')['total_price'].sum().reset_index()
fig = px.area(rev, x='order_month', y='total_price',
              labels={'order_month': 'Month', 'total_price': 'Revenue ($)'},
              title='Monthly Revenue')
fig.update_traces(line_shape='spline')
fig.show()


## Orders Per Month


In [None]:
orders = df.groupby('order_month')['order_id'].count().reset_index()
fig = px.bar(orders, x='order_month', y='order_id',
             labels={'order_month': 'Month', 'order_id': 'Orders'},
             title='Orders Per Month')
fig.show()


## Top 10 Products by Revenue


In [None]:
top10 = df.groupby('product_name')['total_price'].sum().nlargest(10).sort_values().reset_index()
fig = px.bar(top10, x='total_price', y='product_name', orientation='h',
             labels={'total_price': 'Revenue ($)', 'product_name': 'Product'},
             title='Top 10 Products by Revenue')
fig.show()


## Revenue by Category


In [None]:
cat_rev = df.groupby('product_category')['total_price'].sum().reset_index()
fig = px.pie(cat_rev, names='product_category', values='total_price',
             title='Revenue by Category', hole=0.4)
fig.show()


## Revenue by Country (Top 10)


In [None]:
country = df.groupby('customer_country')['total_price'].sum().nlargest(10).sort_values().reset_index()
fig = px.bar(country, x='total_price', y='customer_country', orientation='h',
             labels={'total_price': 'Revenue ($)', 'customer_country': 'Country'},
             title='Top 10 Countries by Revenue')
fig.show()


## Order Status Distribution


In [None]:
status = df['order_status'].value_counts().reset_index()
fig = px.bar(status, x='count', y='order_status', orientation='h',
             labels={'count': 'Orders', 'order_status': 'Status'},
             title='Order Status Distribution')
fig.show()


## Discount Code Popularity


In [None]:
disc = df[df['discount_code'] != '']['discount_code'].value_counts().reset_index()
fig = px.bar(disc, x='count', y='discount_code', orientation='h',
             labels={'count': 'Times Used', 'discount_code': 'Code'},
             title='Most Popular Discount Codes')
fig.show()
