In [200]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

Market Share Distribution by brand - groups up all brands with lower than 1% market share

In [201]:
# Load brand data
brand_data = pd.read_excel('Customer_Success_Engineer_-_Project_Data.xlsx', sheet_name='Segment Brands', header=1)

# Summarize total revenue by brand
brand_revenue = brand_data.groupby('Brand')['Revenue'].sum().reset_index()

# Calculate market share
brand_revenue['Market Share'] = (brand_revenue['Revenue'] / brand_revenue['Revenue'].sum()) * 100

# Define a threshold for minimum market share to be shown individually
threshold = 1  # for example, 1%

# Create a new row for 'Other' by summing up brands below the threshold
other_revenue = brand_revenue[brand_revenue['Market Share'] < threshold]['Revenue'].sum()
other_market_share = brand_revenue[brand_revenue['Market Share'] < threshold]['Market Share'].sum()

# Remove brands below the threshold
brand_revenue = brand_revenue[brand_revenue['Market Share'] >= threshold]

# Append the 'Other' category to the DataFrame
other_row = pd.DataFrame(data={'Brand': ['Other'], 'Revenue': [other_revenue], 'Market Share': [other_market_share]})
brand_revenue = pd.concat([brand_revenue, other_row], ignore_index=True)

# Sort by market share to ensure 'Other' is not dominating the chart if it's a large share
brand_revenue.sort_values('Market Share', inplace=True)

# Create a pie chart
fig = px.pie(brand_revenue, values='Market Share', names='Brand', title='Market Share Distribution by Brand')

# Show the pie chart
fig.show()

Brand Performance Comparison

Grouping the data by 'Brand' to calculate the sum of revenues for each.
Sorting the results to ensure that the bar chart shows brands in order of their revenue performance.
Plotting the data using Plotly's bar chart function.

In [209]:
# Load brand data from the Excel sheet
brand_data = pd.read_excel('Customer_Success_Engineer_-_Project_Data.xlsx', sheet_name='Segment Brands', header=1)

# Summarize total revenue by brand (assuming 'Brand' is the correct column name)
brand_performance = brand_data.groupby('Brand')['Revenue'].sum().reset_index()

# Truncate brand names to a maximum of 20 characters
brand_performance['Brand'] = brand_performance['Brand'].apply(lambda x: x[:20])

# Sort the DataFrame based on revenue in descending order for better visualization
brand_performance = brand_performance.sort_values('Revenue', ascending=False)

# Create the bar chart
fig = px.bar(brand_performance, x='Brand', y='Revenue', title='Brand Performance Comparison',
             labels={'Revenue': 'Total Revenue ($)', 'Brand': 'Brand (max 20 chars)'},
             color="Brand")  # Coloring by brand helps in distinguishing different brands visually

# Improve the layout
fig.update_layout(
    xaxis_title='Brand (max 20 chars)',
    yaxis_title='Total Revenue ($)',
    xaxis={'categoryorder':'total descending'},  # This ensures the bars are sorted by revenue
    yaxis=dict(type='log')  # Using a log scale for the y-axis can be helpful if there are large differences in revenue
)

# Show the figure
fig.show()

Unit Sales Over Time for Top Brands

In [211]:
# Load the 'Weekly' data, assuming 'Weekly' data has the unit sales and 'Brand' information
sales_data = pd.read_excel('Customer_Success_Engineer_-_Project_Data.xlsx', sheet_name='Weekly', header=1)

# Convert 'Week' to datetime (the following is a basic method; you may need to adjust it based on your actual data format)
sales_data['Week'] = pd.to_datetime(sales_data['Week'].str.split(' - ').str[0])

# Identify the top brands by total unit sales
top_brands = sales_data.groupby('Brand')['Units'].sum().nlargest(5).index

# Filter the data to include only the top brands
top_brands_data = sales_data[sales_data['Brand'].isin(top_brands)]

# Group by 'Week' and 'Brand' and sum up unit sales
unit_sales_over_time = top_brands_data.groupby(['Week', 'Brand'])['Units'].sum().reset_index()

# Pivot the data to have 'Week' as index and the unit sales of each top brand as a column
unit_sales_pivot = unit_sales_over_time.pivot(index='Week', columns='Brand', values='Units')

# Reset the index to turn 'Week' back into a column
unit_sales_pivot.reset_index(inplace=True)

# Melt the DataFrame to have a single 'Units' column with corresponding 'Brand' and 'Week'
unit_sales_melted = unit_sales_pivot.melt(id_vars='Week', var_name='Brand', value_name='Units')

# Plotting the multi-line chart
fig = px.line(unit_sales_melted, x='Week', y='Units', color='Brand', title='Unit Sales Over Time for Top Brands')

# Improve the layout
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Unit Sales',
    legend_title='Brand'
)

# Show the figure
fig.show()

Shows market share of each subcategory. 

In [207]:


# Load the data from the 'Segment Products' sheet
products_data = pd.read_excel('Customer_Success_Engineer_-_Project_Data.xlsx', sheet_name='Segment Products', header=1)


# We then explode the dataframe to have a separate row for each subcategory of each product.
products_data['Subcategories'] = products_data['Subcategories'].str.split(', ')
products_data_exploded = products_data.explode('Subcategories')

# Now we group by 'Subcategories' and sum up the 'Revenue'
category_performance = products_data_exploded.groupby('Subcategories')['Revenue'].sum().reset_index()

# Sort the categories by revenue in descending order
category_performance.sort_values('Revenue', ascending=False, inplace=True)

# Create a bar chart or a treemap to visualize sales performance by subcategory
# fig = px.bar(category_performance, x='Subcategories', y='Revenue', title='Sales Performance by Subcategories')
# Or use a treemap:
fig = px.treemap(category_performance, path=['Subcategories'], values='Revenue', title='Sales Performance by Subcategories')

# Improve layout of the plot
fig.update_layout(xaxis_title='Subcategories', yaxis_title='Total Revenue ($)', xaxis={'categoryorder':'total descending'})

# Show the figure
fig.show()


In [213]:
# Load data from the 'Weekly' sheet
weekly_data = pd.read_excel('Customer_Success_Engineer_-_Project_Data.xlsx', sheet_name='Weekly', header=1)

# Convert 'Week' to datetime (adjust the string split if the date format is different)
weekly_data['Week'] = pd.to_datetime(weekly_data['Week'].str.split(' - ').str[0])

# Group by 'Week' and calculate the average price and total units
weekly_summary = weekly_data.groupby('Week').agg({'Price': 'mean', 'Units': 'sum'}).reset_index()

# Create figure with secondary y-axis for Units using make_subplots
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add Price trace on primary y-axis
fig.add_trace(
    go.Scatter(x=weekly_summary['Week'], y=weekly_summary['Price'], name='Average Price', mode='lines+markers'),
    secondary_y=False,
)

# Add Units trace on secondary y-axis
fig.add_trace(
    go.Scatter(x=weekly_summary['Week'], y=weekly_summary['Units'], name='Total Units Sold', mode='lines+markers'),
    secondary_y=True,
)

# Add titles and axis labels
fig.update_layout(
    title_text='Average Price and Total Units Sold Trending Over Time'
)

fig.update_xaxes(title_text='Week')
fig.update_yaxes(title_text='Average Price ($)', secondary_y=False)
fig.update_yaxes(title_text='Total Units Sold', secondary_y=True)

# Show the plot
fig.show()

In [215]:
# Loading the data from the Excel spreadsheet
monthly_data = pd.read_excel('Customer_Success_Engineer_-_Project_Data.xlsx', sheet_name='Monthly', header=1)

# Grouping by 'Month' and 'Brand', and summing the 'Revenue'
monthly_grouped = monthly_data.groupby(['Month', 'Brand'])['Revenue'].sum().reset_index()

# Plotting the monthly revenue trends by brand
fig = px.line(monthly_grouped, x='Month', y='Revenue', color='Brand', title='Monthly Revenue Trends by Brand',
              labels={'Month': 'Month', 'Revenue': 'Revenue ($)'})

# Show the plot
fig.show()

Weekly & Monthly Revenue Trends (Whole market)

Theres a huge dip in march, but interestingly it only shows the sharp dip on the weekly chart. the monthly chart tells a different story. However, the last month shows significantly lower sales, likely due to a recall. 

In [217]:
# Load data from the Weekly sheet
weekly_data = pd.read_excel('Customer_Success_Engineer_-_Project_Data.xlsx', sheet_name='Weekly', header=1)

# Load data from the Monthly sheet
monthly_data = pd.read_excel('Customer_Success_Engineer_-_Project_Data.xlsx', sheet_name='Monthly', header=1)

# Processing for weekly_data
weekly_data['Week'] = weekly_data['Week'].str.split(' - ').str[0].str.strip()
weekly_data['Week'] = pd.to_datetime(weekly_data['Week'], errors='coerce')
weekly_data.sort_values('Week', inplace=True)
weekly_revenue = weekly_data.groupby('Week')['Revenue'].sum().reset_index()

# Processing for monthly_data
# Combining 'Year' and 'Month' to form a datetime object
monthly_data['Month'] = pd.to_datetime(monthly_data['Year'].astype(str) + '-' + monthly_data['Month'], format='%Y-%B', errors='coerce')
monthly_data.sort_values('Month', inplace=True)
monthly_revenue = monthly_data.groupby('Month')['Revenue'].sum().reset_index()

# Plotting weekly revenue trends
fig_weekly = px.line(weekly_revenue, x='Week', y='Revenue',
              title='Time Series Analysis of Weekly Revenue Trends',
              labels={'Revenue': 'Total Revenue', 'Week': 'Date'})

fig_weekly.update_layout(xaxis_title='Date', yaxis_title='Revenue ($)')
fig_weekly.show()

# Plotting monthly revenue trends
fig_monthly = px.line(monthly_revenue, x='Month', y='Revenue',
              title='Time Series Analysis of Monthly Revenue Trends',
              labels={'Revenue': 'Total Revenue', 'Month': 'Date'})

fig_monthly.update_layout(xaxis_title='Date', yaxis_title='Revenue ($)')
fig_monthly.show()

Segment Trends. aggregated daily data for all segment products. There is a huge dip in march that should probably be investigated. 

Volume is very steady overall, however, overall revenue over the year has trended down. Perhaps food is being sourced less overall on amazon. 

seems there may have been a recall in december 2023 which sharply affected brand sales. https://www.petful.com/brands/purina-recall/

It seems some of the same sources of ingredients may have been in multiple brands.  


In [218]:
# Load the data
data = pd.read_excel('Customer_Success_Engineer_-_Project_Data.xlsx', sheet_name='Segment Trends', header=1)

# Make sure the 'Date' column is in datetime format
data['Date'] = pd.to_datetime(data['Date'])

# Initialize a figure
fig = go.Figure()

# Add traces for unit sales and revenue
fig.add_trace(go.Scatter(x=data['Date'], y=data['Unit Sales'], mode='lines+markers', name='Unit Sales'))
fig.add_trace(go.Scatter(x=data['Date'], y=data['Revenue'], mode='lines+markers', name='Revenue'))

# Customize the layout
fig.update_layout(
    title='Unit Sales and Revenue Over Time',
    xaxis_title='Date',
    yaxis_title='Count / Amount',
    legend_title='Metric'
)

# Show the plot
fig.show()

Unit Sales Growth Trend, Monthly

Growth=(last units−first units/first units)×100. This formula provides the percentage growth or decline in unit sales over the year.

SQUISHMALLOW had massive growth; and interestingly they seem to be plushies for humans. Seems they have had big success in this segment as well. 

Also, there was a huge social media following/viral presence for the plushies.

In [221]:
# Load the monthly data
monthly_data = pd.read_excel('Customer_Success_Engineer_-_Project_Data.xlsx', sheet_name='Monthly', header=1)

# Convert the 'Month' column to datetime to ensure proper sorting
monthly_data['Month'] = pd.to_datetime(monthly_data['Month'], format='%B')

# Sort the data
monthly_data.sort_values(by=['Brand', 'Year', 'Month'], inplace=True)

# Group by Brand and calculate the first and last entry for Units
# Assuming that the data is structured such that the first entry is from January and the last from December
brand_growth = monthly_data.groupby('Brand').agg(
    first_units=pd.NamedAgg(column='Units', aggfunc='first'),
    last_units=pd.NamedAgg(column='Units', aggfunc='last')
).reset_index()

def calculate_growth(row):
    if row['first_units'] == 0:
        return None if row['last_units'] == 0 else float('inf')
    return ((row['last_units'] - row['first_units']) / row['first_units']) * 100

brand_growth['Growth'] = brand_growth.apply(calculate_growth, axis=1)

# Sort by growth percentage
brand_growth = brand_growth.sort_values(by='Growth', ascending=False)

# Create a bar chart
fig = px.bar(brand_growth, x='Brand', y='Growth', title='Year-Over-Year Unit Sales Growth by Brand')

# Show the figure
fig.show()


Product Revenue Share

In [223]:
# Load data from the 'Segment Products' sheet 
segment_products_data = pd.read_excel('Customer_Success_Engineer_-_Project_Data.xlsx', sheet_name='Segment Products', header=1)

# Calculate the total revenue for normalization
total_brand_revenue = segment_products_data['Revenue'].sum()

# Calculate revenue share for each ASIN using .loc to avoid SettingWithCopyWarning
segment_products_data.loc[:, 'Revenue Share'] = (segment_products_data['Revenue'] / total_brand_revenue) * 100

# Sort the data by 'Revenue' in descending order and take the top 20
top_20_products = segment_products_data.sort_values(by='Revenue', ascending=False).head(20)

# Create a bar chart of Revenue Share by ASIN for the top 20 products
fig = px.bar(top_20_products, x='ASIN', y='Revenue Share',
             title='Top 20 Products by Revenue Share',
             labels={'Revenue Share': 'Revenue Share (%)', 'ASIN': 'Product ASIN'},
             color="ASIN"
             )

# Enhance the layout for better readability
fig.update_layout(
    xaxis_title="Product ASIN",
    yaxis_title="Revenue Share (%)",
    xaxis_tickangle=-45, 
    yaxis=dict(type='linear'), 
    plot_bgcolor='white', 
)

# Show the plot
fig.show()