In [26]:
import dash
from dash import dcc, html, callback, Input, Output
import pandas as pd
import plotly.express as px

In [27]:
# !pip install openpyxl

In [28]:
# Load sales data
file_path = "../data/processed/sample.csv"  # Update this path
df = pd.read_csv(file_path)

___
SALES PER COUNTRY
___

In [29]:
# Aggregate sales by country
df['Revenue'] = df['Quantity'] * df['UnitPrice']
country_sales = df.groupby('Country', as_index=False).agg({'Revenue': 'sum', 'Quantity': 'sum'})

# Load country coordinates for mapping
geo_data = px.data.gapminder()[['country', 'iso_alpha']].drop_duplicates()
all_countries = pd.DataFrame({'Country': geo_data['country'], 'iso_alpha': geo_data['iso_alpha']})
country_sales = country_sales.merge(all_countries, on='Country', how='left').fillna({'Revenue': 0, 'Quantity': 0})
country_sales = country_sales[country_sales['Country'] != 'Antarctica']

country_sales.head()

Unnamed: 0,Country,Revenue,Quantity,iso_alpha
0,Australia,1524.9,870,AUS
1,Austria,15.3,6,AUT
2,Belgium,97.48,68,BEL
3,Channel Islands,15.9,2,
4,EIRE,468.17,291,


___
REVENUE OVER TIME
___

In [30]:
# Aggregate revenue over time
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
time_series = df.resample('M', on='InvoiceDate').agg({'Revenue': 'sum'}).reset_index()
time_series.head()

Unnamed: 0,InvoiceDate,Revenue
0,2010-12-31,1200.07
1,2011-01-31,1019.46
2,2011-02-28,1097.83
3,2011-03-31,1907.35
4,2011-04-30,1040.44


___
MONTHLY CUSTOMER RETENTION
___

In [31]:


# df_last_6_months = df[df['InvoiceDate'] >= six_months_ago].copy()
# df_last_6_months['Month'] = df_last_6_months['InvoiceDate'].dt.to_period('M')
# df_last_6_months.head()

In [32]:
# customer_months = df_last_6_months.groupby(['CustomerID', 'Month']).size().reset_index(name='Purchases')
# customer_months['PreviousMonth'] = customer_months['Month'] - 1

# customer_months.head()

In [33]:
# returning_customers = customer_months.merge(
#     customer_months, 
#     left_on=['CustomerID', 'Month'], 
#     right_on=['CustomerID', 'PreviousMonth']
# )

# returning_customers.head()

In [34]:
# monthly_retention = returning_customers.groupby('Month_x').agg({'CustomerID': 'nunique'}).reset_index()
# monthly_retention.head()

In [35]:
import plotly.express as px

def get_product_revenue_quantity(df):
    df['Revenue'] = df['Quantity'] * df['UnitPrice']
    df_grouped = df.groupby('Description', as_index=False)['Revenue'].sum()
    df_grouped = df_grouped.sort_values(by='Revenue', ascending=False)
    
    return df_grouped

# Get product revenue
product_revenue = get_product_revenue_quantity(df)

# Select the top 10 products (already sorted)
top_product_revenue = product_revenue.head(10)

top_product_revenue['Description'] = pd.Categorical(
    top_product_revenue['Description'], 
    categories=top_product_revenue.sort_values('Revenue', ascending=False)['Description'], 
    ordered=True
)

# Create horizontal bar chart
fig = px.bar(
    top_product_revenue,
    x='Revenue',
    y='Description',
    orientation='h',
    title='Revenue by Product',
    labels={'Revenue': 'Revenue ($)', 'Description': 'Product'}
)

# Ensure bars are ordered from highest to lowest
fig.update_layout(yaxis={'categoryorder': 'total ascending'})  # 'total ascending' for highest on top

# Show figure
fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
product_revenue = get_product_revenue_quantity(df)

num_months = 8
product_revenue.head()

Unnamed: 0,Description,Revenue
190,FELTCRAFT CHRISTMAS FAIRY,1260.0
415,PARTY BUNTING,675.3
374,NO SINGING METAL SIGN,475.4
135,COLOUR GLASS. STAR T-LIGHT HOLDER,396.0
325,LETTER HOLDER HOME SWEET HOME,390.0
