In [200]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

Market Share Distribution by brand - groups up all brands with lower than 1% market share

In [201]:
# Load brand data
brand_data = pd.read_excel('Customer_Success_Engineer_-_Project_Data.xlsx', sheet_name='Segment Brands', header=1)

# Summarize total revenue by brand
brand_revenue = brand_data.groupby('Brand')['Revenue'].sum().reset_index()

# Calculate market share
brand_revenue['Market Share'] = (brand_revenue['Revenue'] / brand_revenue['Revenue'].sum()) * 100

# Define a threshold for minimum market share to be shown individually
threshold = 1  # for example, 1%

# Create a new row for 'Other' by summing up brands below the threshold
other_revenue = brand_revenue[brand_revenue['Market Share'] < threshold]['Revenue'].sum()
other_market_share = brand_revenue[brand_revenue['Market Share'] < threshold]['Market Share'].sum()

# Remove brands below the threshold
brand_revenue = brand_revenue[brand_revenue['Market Share'] >= threshold]

# Append the 'Other' category to the DataFrame
other_row = pd.DataFrame(data={'Brand': ['Other'], 'Revenue': [other_revenue], 'Market Share': [other_market_share]})
brand_revenue = pd.concat([brand_revenue, other_row], ignore_index=True)

# Sort by market share to ensure 'Other' is not dominating the chart if it's a large share
brand_revenue.sort_values('Market Share', inplace=True)

# Create a pie chart
fig = px.pie(brand_revenue, values='Market Share', names='Brand', title='Market Share Distribution by Brand')

# Show the pie chart
fig.show()

Brand Performance Comparison

Grouping the data by 'Brand' to calculate the sum of revenues for each.
Sorting the results to ensure that the bar chart shows brands in order of their revenue performance.
Plotting the data using Plotly's bar chart function.

In [209]:
# Load brand data from the Excel sheet
brand_data = pd.read_excel('Customer_Success_Engineer_-_Project_Data.xlsx', sheet_name='Segment Brands', header=1)

# Summarize total revenue by brand (assuming 'Brand' is the correct column name)
brand_performance = brand_data.groupby('Brand')['Revenue'].sum().reset_index()

# Truncate brand names to a maximum of 20 characters
brand_performance['Brand'] = brand_performance['Brand'].apply(lambda x: x[:20])

# Sort the DataFrame based on revenue in descending order for better visualization
brand_performance = brand_performance.sort_values('Revenue', ascending=False)

# Create the bar chart
fig = px.bar(brand_performance, x='Brand', y='Revenue', title='Brand Performance Comparison',
             labels={'Revenue': 'Total Revenue ($)', 'Brand': 'Brand (max 20 chars)'},
             color="Brand")  # Coloring by brand helps in distinguishing different brands visually

# Improve the layout
fig.update_layout(
    xaxis_title='Brand (max 20 chars)',
    yaxis_title='Total Revenue ($)',
    xaxis={'categoryorder':'total descending'},  # This ensures the bars are sorted by revenue
    yaxis=dict(type='log')  # Using a log scale for the y-axis can be helpful if there are large differences in revenue
)

# Show the figure
fig.show()

Unit Sales Over Time for Top Brands

In [211]:
# Load the 'Weekly' data, assuming 'Weekly' data has the unit sales and 'Brand' information
sales_data = pd.read_excel('Customer_Success_Engineer_-_Project_Data.xlsx', sheet_name='Weekly', header=1)

# Convert 'Week' to datetime (the following is a basic method; you may need to adjust it based on your actual data format)
sales_data['Week'] = pd.to_datetime(sales_data['Week'].str.split(' - ').str[0])

# Identify the top brands by total unit sales
top_brands = sales_data.groupby('Brand')['Units'].sum().nlargest(5).index

# Filter the data to include only the top brands
top_brands_data = sales_data[sales_data['Brand'].isin(top_brands)]

# Group by 'Week' and 'Brand' and sum up unit sales
unit_sales_over_time = top_brands_data.groupby(['Week', 'Brand'])['Units'].sum().reset_index()

# Pivot the data to have 'Week' as index and the unit sales of each top brand as a column
unit_sales_pivot = unit_sales_over_time.pivot(index='Week', columns='Brand', values='Units')

# Reset the index to turn 'Week' back into a column
unit_sales_pivot.reset_index(inplace=True)

# Melt the DataFrame to have a single 'Units' column with corresponding 'Brand' and 'Week'
unit_sales_melted = unit_sales_pivot.melt(id_vars='Week', var_name='Brand', value_name='Units')

# Plotting the multi-line chart
fig = px.line(unit_sales_melted, x='Week', y='Units', color='Brand', title='Unit Sales Over Time for Top Brands')

# Improve the layout
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Unit Sales',
    legend_title='Brand'
)

# Show the figure
fig.show()

Shows market share of each subcategory. 

In [207]:


# Load the data from the 'Segment Products' sheet
products_data = pd.read_excel('Customer_Success_Engineer_-_Project_Data.xlsx', sheet_name='Segment Products', header=1)


# We then explode the dataframe to have a separate row for each subcategory of each product.
products_data['Subcategories'] = products_data['Subcategories'].str.split(', ')
products_data_exploded = products_data.explode('Subcategories')

# Now we group by 'Subcategories' and sum up the 'Revenue'
category_performance = products_data_exploded.groupby('Subcategories')['Revenue'].sum().reset_index()

# Sort the categories by revenue in descending order
category_performance.sort_values('Revenue', ascending=False, inplace=True)

# Create a bar chart or a treemap to visualize sales performance by subcategory
# fig = px.bar(category_performance, x='Subcategories', y='Revenue', title='Sales Performance by Subcategories')
# Or use a treemap:
fig = px.treemap(category_performance, path=['Subcategories'], values='Revenue', title='Sales Performance by Subcategories')

# Improve layout of the plot
fig.update_layout(xaxis_title='Subcategories', yaxis_title='Total Revenue ($)', xaxis={'categoryorder':'total descending'})

# Show the figure
fig.show()
