In [None]:
import sys
sys.path.append('../..')
import pandas as pd
import src.cleaning as clean
import seaborn as sns
import scipy.stats as stats
import itertools

In [None]:
sales_data = pd.read_csv('../../data/anon_data.csv')
cleaned_data = clean.clean_data(sales_data)

In [None]:
clean_w_profit_per_quant = cleaned_data.assign(profit_per_item=cleaned_data['profit']/cleaned_data['quantity'])

In [None]:
clean_w_date_quarter = clean_w_profit_per_quant.assign(year_quarter=clean_w_profit_per_quant['ship_date'].dt.to_period('Q'))

In [None]:
grouped_by_sales_team = clean_w_date_quarter.groupby('sales_team_name')

In [None]:
def plot_barplot(data, x: str, y:str, xlabel: str, ylabel: str, title: str, hue: str = None):
    plot = sns.barplot(data, x=x, y=y, hue=hue, orient='v', palette='flare')
    for label in plot.get_xticklabels():
        label.set_rotation(70)
    plot.set(xlabel=xlabel, ylabel=ylabel, title=title)
    plot.grid()
    plot.set_axisbelow(True) 
    return plot

# Sum Profit

In [None]:
profit_by_sales_team = grouped_by_sales_team['profit'].sum().reset_index(drop=False)
profit_by_sales_team['profit'] = profit_by_sales_team['profit'] / 1_000_000
barplot = plot_barplot(profit_by_sales_team, x='sales_team_name', y='profit', xlabel= 'Sales Team', ylabel='Profit (Millons)', title='Total Profit By Sales Team')
barplot


In [None]:
# Save to image
figure = barplot.get_figure()
figure.savefig('../../images/sales_team_profit/total_profit.png', transparent=True, bbox_inches='tight')

In [None]:
cleaned_data['profit'].sum()

In [None]:
profit_by_sales_team = grouped_by_sales_team['quantity'].sum().reset_index(drop=False)
profit_by_sales_team['quantity'] = profit_by_sales_team['quantity'] / 1_000_000
barplot = plot_barplot(profit_by_sales_team, x='sales_team_name', y='quantity', xlabel= 'Sales Team', ylabel='Quantity Sold (Millions)', title='Quantity Sold By Sales Team')
barplot
figure = barplot.get_figure()
figure.savefig('../../images/sales_team_profit/total_quantity.png', transparent=True, bbox_inches='tight')

# Average Profit By Record

In [None]:
# Average Profit By Sales Invoice
profit_by_sales_team = grouped_by_sales_team['profit'].mean().reset_index(drop=False)
barplot = plot_barplot(profit_by_sales_team, x='sales_team_name', y='profit', xlabel= 'Sales Team', ylabel='Average Profit', title='Average Profit By Sales Team')
barplot


In [None]:
profit_by_sales_team = grouped_by_sales_team['profit'].median().reset_index(drop=False)
barplot = plot_barplot(profit_by_sales_team, x='sales_team_name', y='profit', xlabel= 'Sales Team', ylabel='Average Profit', title='Average Profit By Sales Team')
barplot.set(ylim=(-8, 10))
figure = barplot.get_figure()
figure.savefig('../../images/sales_team_profit/average_profit.png', transparent=True, bbox_inches='tight')


In [None]:
# Test whether the differnce is significant, using 
team_list = dict()
b2b = clean_w_date_quarter.loc[clean_w_date_quarter['sales_team_name'] == 'B2B Others', 'profit']
team_list.setdefault('b2b', b2b)
comp_north = clean_w_date_quarter.loc[clean_w_date_quarter['sales_team_name'] == 'Components North', 'profit']
team_list.setdefault('comp_north', comp_north)
corp_north = clean_w_date_quarter.loc[clean_w_date_quarter['sales_team_name'] == 'Corporate North', 'profit']
team_list.setdefault('corp_north', corp_north)
corp_south = clean_w_date_quarter.loc[clean_w_date_quarter['sales_team_name'] == 'Corporate South', 'profit']
team_list.setdefault('corp_south', corp_south)
dc_power = clean_w_date_quarter.loc[clean_w_date_quarter['sales_team_name'] == 'DC Power North & NW', 'profit']
team_list.setdefault('dc_power', dc_power)
office_auto = clean_w_date_quarter.loc[clean_w_date_quarter['sales_team_name'] == 'Office Automation', 'profit']
team_list.setdefault('office_auto', office_auto)
smb_north = clean_w_date_quarter.loc[clean_w_date_quarter['sales_team_name'] == 'SMB 1 North', 'profit']
team_list.setdefault('smb_north', smb_north)
smb_south = clean_w_date_quarter.loc[clean_w_date_quarter['sales_team_name'] == 'SMB 1 South', 'profit']
team_list.setdefault('smb_south', smb_south)
cart_prod = list(itertools.product(team_list, team_list))

In [None]:
# Test whether the differnce is significant, using kruskal wallis as by looking at means and medians, data is significantly positively skewed and kruskal wallis is non-parametric 
for team_1, team_2 in cart_prod:
    print(f'{team_1} vs {team_2}')
    print(stats.kruskal(team_list[team_1], team_list[team_2]).pvalue)


In [None]:
print(stats.median_test(b2b, corp_north))

In [None]:
# Test medians aswell
for team_1, team_2 in cart_prod:
    print(f'{team_1} vs {team_2}')
    print(stats.median_test(team_list[team_1], team_list[team_2]).pvalue)

In [None]:
# Due to the number of data points, its is safe to say that the differences between sales_teams are significant. 

# Profit Per Item Sold

In [None]:
profit_by_sales_team = grouped_by_sales_team['profit'].sum()
quantity_by_sales_team = grouped_by_sales_team['quantity'].sum()
profit_per_item = profit_by_sales_team/quantity_by_sales_team
profit_per_item = profit_per_item.reset_index(drop=False)
profit_per_item.columns = ['sales_team_name', 'profit_per_item']
barplot = plot_barplot(profit_per_item, x='sales_team_name', y='profit_per_item', xlabel= 'Sales Team', ylabel='Profit Per Item', title='Average Profit Per Item Sold By Sales Team')
barplot.set(ylim=(-8, 10))
figure = barplot.get_figure()
figure.savefig('../../images/sales_team_profit/profit_per_item_sold.png', transparent=True, bbox_inches='tight')

# Profit Per Order

In [None]:
# Not dropping negative orders
profit_sales_team_sale = clean_w_date_quarter.groupby(['sales_team_name', 'sales_order_number'])[['profit']].sum()
profit_sales_team_expanded = profit_sales_team_sale.reset_index(level=1, drop=False)
med_profit_sales_team = profit_sales_team_expanded.groupby('sales_team_name')['profit'].mean()
med_profit_sales_team


In [None]:
profit_by_sales_team = grouped_by_sales_team['profit'].sum()
# Need to only consider orders with positive quantity as although negative quantity will cancel out positive quantity, all orders would be counted towards the total.
# However returns are not an order but rather an undoing of an order, therefore the original order is effectively an order with no profit counted as 1. Could take it further and remove the order entire but that is quite hard to 
# handle as returns can cover multiple orders. Therefore profit by item sold is a better metric.
only_sales_not_returns = clean_w_date_quarter.loc[clean_w_date_quarter['quantity']> 0, :]
orders_per_sales_team = only_sales_not_returns.groupby('sales_team_name')['sales_order_number'].unique().apply(len)
profit_per_item = profit_by_sales_team/orders_per_sales_team
profit_per_item = profit_per_item.reset_index(drop=False)
profit_per_item.columns = ['sales_team_name', 'profit_per_sale']
barplot = plot_barplot(profit_per_item, x='sales_team_name', y='profit_per_sale', xlabel= 'Sales Team', ylabel='Profit Per Sale', title='Average Profit Per Sale By Sales Team')
barplot.set(ylim=(-10, 70))
barplot
figure = barplot.get_figure()
figure.savefig('../../images/sales_team_profit/profit_per_order.png', transparent=True, bbox_inches='tight')

In [None]:
profit_per_item

# Profit Per Quarter

In [None]:
grouped_by_sales_team_quarter = clean_w_date_quarter.groupby(['sales_team_name', 'year_quarter'])
summed_by_team_quarter = grouped_by_sales_team_quarter['profit'].sum().reset_index(level=[0,1], drop=False)
summed_by_team_quarter['profit'] = summed_by_team_quarter['profit']/100_000
summed_by_team_quarter['year_quarter'] = summed_by_team_quarter['year_quarter'].astype(str)
plot = sns.lineplot(data=summed_by_team_quarter, x='year_quarter', y='profit', hue='sales_team_name')
plot.set(xlabel='Year/Quarter', ylabel='Profit (Hundred Thousands)', title='Profit Per Quarter By Sales Team')
sns.move_legend(plot, "upper left", bbox_to_anchor=(1,1))
plot.grid()
plot.set_axisbelow(True) 
figure = plot.get_figure()
figure.savefig('../../images/sales_team_profit/profit_by_quarter.png', transparent=True, bbox_inches='tight')

In [None]:
grouped_by_sales_team_quarter = clean_w_date_quarter.groupby(['sales_team_name', 'year_quarter'])
summed_by_team_quarter = grouped_by_sales_team_quarter['quantity'].sum().reset_index(level=[0,1], drop=False)
summed_by_team_quarter['quantity'] = summed_by_team_quarter['quantity']/100_000
summed_by_team_quarter['year_quarter'] = summed_by_team_quarter['year_quarter'].astype(str)
plot = sns.lineplot(data=summed_by_team_quarter, x='year_quarter', y='quantity', hue='sales_team_name')
plot.set(xlabel='Year/Quarter', ylabel='Quantity (Hundred Thousands)', title='Quantity Sold Per Quarter By Sales Team')
sns.move_legend(plot, "upper left", bbox_to_anchor=(1,1))
plot.grid()
plot.set_axisbelow(True) 
figure = plot.get_figure()
figure.savefig('../../images/sales_team_profit/quantity_by_quarter.png', transparent=True, bbox_inches='tight')

# Profit Per Item Per Quarter

In [None]:
grouped_by_sales_team_quarter = clean_w_date_quarter.groupby(['sales_team_name', 'year_quarter'])
profit_by_sales_team = grouped_by_sales_team_quarter['profit'].sum()
quantity_by_sales_team = grouped_by_sales_team_quarter['quantity'].sum()
profit_per_item = profit_by_sales_team/quantity_by_sales_team
profit_per_item_by_team_quarter = profit_per_item.reset_index(level=[0,1], drop=False)

profit_per_item_by_team_quarter.columns = ['sales_team_name', 'year_quarter', 'profit_per_item']
profit_per_item_by_team_quarter['profit_per_item'] = profit_per_item_by_team_quarter['profit_per_item']
profit_per_item_by_team_quarter['year_quarter'] = profit_per_item_by_team_quarter['year_quarter'].astype(str)
plot = sns.lineplot(data=profit_per_item_by_team_quarter, x='year_quarter', y='profit_per_item', hue='sales_team_name', sizes=(400,400))
plot.set(xlabel='Year/Quarter', ylabel='Profit Per Item', title='Profit Per Quarter By Sales Team')
plot.grid()
plot.set_axisbelow(True) 
sns.move_legend(plot, "upper left", bbox_to_anchor=(1,1))

In [None]:
grouped_by_sales_team_quarter = clean_w_date_quarter.groupby(['sales_team_name', 'year_quarter'])
profit_by_sales_team = grouped_by_sales_team_quarter['profit'].sum()
quantity_by_sales_team = grouped_by_sales_team_quarter['quantity'].count()
profit_per_item = profit_by_sales_team/quantity_by_sales_team
profit_per_item_by_team_quarter = profit_per_item.reset_index(level=[0,1], drop=False)

profit_per_item_by_team_quarter.columns = ['sales_team_name', 'year_quarter', 'profit_per_item']
profit_per_item_by_team_quarter['profit_per_item'] = profit_per_item_by_team_quarter['profit_per_item']
profit_per_item_by_team_quarter['year_quarter'] = profit_per_item_by_team_quarter['year_quarter'].astype(str)
plot = sns.lineplot(data=profit_per_item_by_team_quarter, x='year_quarter', y='profit_per_item', hue='sales_team_name', sizes=(400,400))
plot.set(xlabel='Year/Quarter', ylabel='Profit Per Order', title='Profit Per Quarter By Sales Team')
sns.move_legend(plot, "upper left", bbox_to_anchor=(1,1))
plot.grid()
plot.set_axisbelow(True) 
figure = plot.get_figure()
figure.savefig('../../images/sales_team_profit/profit_by_quarter.png', transparent=True, bbox_inches='tight')