In [1]:
import pandas as pd
from pathlib import Path
from datetime import datetime

In [2]:
today = datetime.today()
in_file = Path.cwd() / "data" / "raw" / "customer_master.xlsx"
report_dir = Path.cwd() / "reports"
report_file = report_dir / "Commissions_Analysis_{today:%b-%d-%Y}.xlsx"

In [7]:
df = pd.read_excel(in_file, dtype = {'zip_code': 'str'})

In [8]:
df.head()

Unnamed: 0,company_name,channel,zip_code,city,state,account_num,total_sales
0,Universal Technology Vision,retail,22910,Charlottesville,VA,AH5590,1257912
1,East Design Hill,retail,66546,Wakarusa,KS,OL0453,1158564
2,Studio Pacific Galaxy,retail,79698,Abilene,TX,YR6861,1663488
3,Galaxy Building,retail,85275,Mesa,AZ,AS3124,1193560
4,Resource Innovation Future,retail,97013,Canby,OR,DK1362,958040


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   company_name  50 non-null     object
 1   channel       50 non-null     object
 2   zip_code      50 non-null     object
 3   city          50 non-null     object
 4   state         50 non-null     object
 5   account_num   50 non-null     object
 6   total_sales   50 non-null     int64 
dtypes: int64(1), object(6)
memory usage: 2.9+ KB


In [10]:
df.tail()

Unnamed: 0,company_name,channel,zip_code,city,state,account_num,total_sales
45,Omega Research Data,partner,32209,Jacksonville,FL,KX0489,3185196
46,Solutions Universal,reseller,66212,Shawnee Mission,KS,SA4443,6796068
47,Bell Frontier Resource,retail,95172,San Jose,CA,LA6029,1719822
48,Frontier Architecture,retail,27605,Raleigh,NC,OC5697,1168464
49,Vision People Solutions,retail,24557,Gretna,VA,WL5283,1299450


In [11]:
df.describe()

Unnamed: 0,total_sales
count,50.0
mean,2529873.0
std,2482702.0
min,746216.0
25%,1115702.0
50%,1328859.0
75%,1705738.0
max,9121596.0


In [12]:
df.describe(include='object')

Unnamed: 0,company_name,channel,zip_code,city,state,account_num
count,50,50,50,50,50,50
unique,50,3,50,48,31,50
top,Technology Net Star,retail,22910,Washington,VA,GU6442
freq,1,38,1,2,4,1


In [13]:
last_year_sales = df['total_sales'].sum()

In [14]:
last_year_sales

126493662

In [15]:
print(f'{last_year_sales:,.0f}')

126,493,662


In [19]:
commission_target = 1_000_000
effective_rate = commission_target / last_year_sales
print(f'{effective_rate:,.2%}')

0.79%


In [20]:
df['commission'] = effective_rate * df['total_sales']

In [21]:
df.head()

Unnamed: 0,company_name,channel,zip_code,city,state,account_num,total_sales,commission
0,Universal Technology Vision,retail,22910,Charlottesville,VA,AH5590,1257912,9944.466625
1,East Design Hill,retail,66546,Wakarusa,KS,OL0453,1158564,9159.06759
2,Studio Pacific Galaxy,retail,79698,Abilene,TX,YR6861,1663488,13150.761656
3,Galaxy Building,retail,85275,Mesa,AZ,AS3124,1193560,9435.729673
4,Resource Innovation Future,retail,97013,Canby,OR,DK1362,958040,7573.818204


In [22]:
df['commission'] = df['commission'].round()

In [23]:
df.head()

Unnamed: 0,company_name,channel,zip_code,city,state,account_num,total_sales,commission
0,Universal Technology Vision,retail,22910,Charlottesville,VA,AH5590,1257912,9944.0
1,East Design Hill,retail,66546,Wakarusa,KS,OL0453,1158564,9159.0
2,Studio Pacific Galaxy,retail,79698,Abilene,TX,YR6861,1663488,13151.0
3,Galaxy Building,retail,85275,Mesa,AZ,AS3124,1193560,9436.0
4,Resource Innovation Future,retail,97013,Canby,OR,DK1362,958040,7574.0


In [24]:
df['commission'].sum()

1000004.0

In [25]:
df['commission'].mean()

20000.08

In [26]:
df['commission'].describe().round()

count       50.0
mean     20000.0
std      19627.0
min       5899.0
25%       8820.0
50%      10506.0
75%      13485.0
max      72111.0
Name: commission, dtype: float64