In [1]:
import pandas as pd
import numpy as np

## Reading from Excel Sheets
- You can read several sheets from the same excel sheet into Pandas

In [2]:
inventory = pd.read_excel('store_data.xlsx', sheet_name='inventory')
newinventory = pd.read_excel('store_data.xlsx', sheet_name='newinventory')
tax = pd.read_excel('store_data.xlsx', sheet_name='taxcode')
color = pd.read_excel('store_data.xlsx', sheet_name='color')
staff = pd.read_excel('store_data.xlsx', sheet_name='employees', index_col= 'employeeid')

## Value Counts (Lets check in on our staff!)

In [3]:
staff

Unnamed: 0_level_0,first name,last name,position,pay
employeeid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,John,Smith,manager,75000
2,Mary,Smith,manager,100000
3,John,Montana,,50
4,Tom,Thompson,cashier,45000
5,Wanda,Watson,cashier,42000


### Lets see how many employees of each type we have

In [4]:
staff['position'].value_counts()

manager    2
cashier    2
Name: position, dtype: int64

In [5]:
staff.loc[staff.position.notna()]

Unnamed: 0_level_0,first name,last name,position,pay
employeeid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,John,Smith,manager,75000
2,Mary,Smith,manager,100000
4,Tom,Thompson,cashier,45000
5,Wanda,Watson,cashier,42000


### It appears that we lost one, let's try to track them down!

In [6]:
staff['position'].value_counts(dropna = False)

manager    2
cashier    2
NaN        1
Name: position, dtype: int64

### Let's look at the rows where job position is NaN

In [7]:
staff.loc[staff.position.isna()]

Unnamed: 0_level_0,first name,last name,position,pay
employeeid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3,John,Montana,,50


### John Montana is a private contractor, so we have to update his position

In [8]:
staff.loc[staff.position.isna(), 'position'] = 'Contractor'
staff.loc[staff.position == 'Contractor']

Unnamed: 0_level_0,first name,last name,position,pay
employeeid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3,John,Montana,Contractor,50


#### Looks like we have 2 Johns with job titles now, let's find out what percent of our staff that is

In [9]:
staff['first name'].value_counts(normalize = True)*100

John     40.0
Tom      20.0
Mary     20.0
Wanda    20.0
Name: first name, dtype: float64

#### Let's see how many staff members we have in each tax bracket

In [10]:
staff.pay.value_counts(bins = 3)

(66683.333, 100000.0]     2
(33366.667, 66683.333]    2
(-49.951, 33366.667]      1
Name: pay, dtype: int64

#### Groupby and count functions in a similar way to pd.value_counts(), This will be relevant in the SQL world!

In [11]:
staff.groupby(by='first name')['first name'].count()

first name
John     2
Mary     1
Tom      1
Wanda    1
Name: first name, dtype: int64

In [12]:
staff.groupby(by=['first name', 'last name'])['first name'].count()

first name  last name
John        Montana      1
            Smith        1
Mary        Smith        1
Tom         Thompson     1
Wanda       Watson       1
Name: first name, dtype: int64

# Let's talk about managing store inventory and concatenate/merge some data

In [13]:
inventory

Unnamed: 0,productid,product,department,preparation,price,stock
0,1,bananas,produce,unprepared,2.0,100
1,2,apples,produce,unprepared,3.5,200
2,3,fruit salad,produce,prepared,5.99,380
3,4,pasta,pantry,unprepared,2.29,50
4,5,pasta sauce,pantry,unprepared,2.99,200
5,6,bread,bakery,unprepared,1.99,100
6,7,pizza,bakery,prepared,8.99,10
7,8,turkey sandwich,deli,prepared,6.99,5
8,9,cheese sandwich,deli,prepared,5.5,8
9,10,toilet paper,household,unprepared,100.0,1


In [14]:
newinventory

Unnamed: 0,productid,product,department,preparation,price,stock
0,12,ice cream,frozen,unprepared,4.99,100
1,13,ice cream cones,pantry,unprepared,3.5,900
2,14,chocolate syrup,pantry,unprepared,5.99,380
3,15,beans,pantry,unprepared,2.99,50
4,16,chicken,butcher,prepared,4.99,10


#### Looks like we have some new inventory that we want to add to our inventory dataframe, lets go ahead and do that by using "concat"

In [15]:
full_inventory = pd.concat([inventory, newinventory])

In [16]:
full_inventory

Unnamed: 0,productid,product,department,preparation,price,stock
0,1,bananas,produce,unprepared,2.0,100
1,2,apples,produce,unprepared,3.5,200
2,3,fruit salad,produce,prepared,5.99,380
3,4,pasta,pantry,unprepared,2.29,50
4,5,pasta sauce,pantry,unprepared,2.99,200
5,6,bread,bakery,unprepared,1.99,100
6,7,pizza,bakery,prepared,8.99,10
7,8,turkey sandwich,deli,prepared,6.99,5
8,9,cheese sandwich,deli,prepared,5.5,8
9,10,toilet paper,household,unprepared,100.0,1


In [17]:
full_inventory = full_inventory.set_index('productid')
full_inventory

Unnamed: 0_level_0,product,department,preparation,price,stock
productid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,bananas,produce,unprepared,2.0,100
2,apples,produce,unprepared,3.5,200
3,fruit salad,produce,prepared,5.99,380
4,pasta,pantry,unprepared,2.29,50
5,pasta sauce,pantry,unprepared,2.99,200
6,bread,bakery,unprepared,1.99,100
7,pizza,bakery,prepared,8.99,10
8,turkey sandwich,deli,prepared,6.99,5
9,cheese sandwich,deli,prepared,5.5,8
10,toilet paper,household,unprepared,100.0,1


### We want to see what color all of our products are (inner vs left join)

In [18]:
color.head()

Unnamed: 0,productid,color(s)
0,1,yellow
1,2,red
2,3,"red, yellow, green"
3,4,yellow
4,5,red


In [19]:
full_inventory.merge(color, left_index = True, right_on = 'productid')

Unnamed: 0,product,department,preparation,price,stock,productid,color(s)
0,bananas,produce,unprepared,2.0,100,1,yellow
1,apples,produce,unprepared,3.5,200,2,red
2,fruit salad,produce,prepared,5.99,380,3,"red, yellow, green"
3,pasta,pantry,unprepared,2.29,50,4,yellow
4,pasta sauce,pantry,unprepared,2.99,200,5,red
5,bread,bakery,unprepared,1.99,100,6,brown
6,pizza,bakery,prepared,8.99,10,7,"red, brown, white"
7,turkey sandwich,deli,prepared,6.99,5,8,"brown, green, yellow"
8,cheese sandwich,deli,prepared,5.5,8,9,"brown, yellow"
9,toilet paper,household,unprepared,100.0,1,10,white


In [20]:
full_inventory.merge(color, left_index = True, right_on = 'productid', how = 'left')

Unnamed: 0,product,department,preparation,price,stock,productid,color(s)
0,bananas,produce,unprepared,2.0,100,1,yellow
1,apples,produce,unprepared,3.5,200,2,red
2,fruit salad,produce,prepared,5.99,380,3,"red, yellow, green"
3,pasta,pantry,unprepared,2.29,50,4,yellow
4,pasta sauce,pantry,unprepared,2.99,200,5,red
5,bread,bakery,unprepared,1.99,100,6,brown
6,pizza,bakery,prepared,8.99,10,7,"red, brown, white"
7,turkey sandwich,deli,prepared,6.99,5,8,"brown, green, yellow"
8,cheese sandwich,deli,prepared,5.5,8,9,"brown, yellow"
9,toilet paper,household,unprepared,100.0,1,10,white


In [21]:
full_inventory.merge(color, left_index = True, right_on = 'productid', how = 'outer').set_index('productid')

Unnamed: 0_level_0,product,department,preparation,price,stock,color(s)
productid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,bananas,produce,unprepared,2.0,100.0,yellow
2,apples,produce,unprepared,3.5,200.0,red
3,fruit salad,produce,prepared,5.99,380.0,"red, yellow, green"
4,pasta,pantry,unprepared,2.29,50.0,yellow
5,pasta sauce,pantry,unprepared,2.99,200.0,red
6,bread,bakery,unprepared,1.99,100.0,brown
7,pizza,bakery,prepared,8.99,10.0,"red, brown, white"
8,turkey sandwich,deli,prepared,6.99,5.0,"brown, green, yellow"
9,cheese sandwich,deli,prepared,5.5,8.0,"brown, yellow"
10,toilet paper,household,unprepared,100.0,1.0,white


### Let's see what all of our employees favorite foods are!
- Just kidding, there is an issue here, we have joined on 2 clashing ID numbers (watch out for this!)

In [22]:
full_inventory.merge(staff, left_on = 'productid', right_index = True)[['product', 'first name', 'last name']]

Unnamed: 0_level_0,product,first name,last name
productid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,bananas,John,Smith
2,apples,Mary,Smith
3,fruit salad,John,Montana
4,pasta,Tom,Thompson
5,pasta sauce,Wanda,Watson


### Let's group by some different things to understand our inventory a little bit better
- Will see how many products we have that need to be prepared in the store vs. shipped ready to sell
- Will check out how many products we have in each department
- Will want to see the total amount of items in stock in each department
- Will want to check out the average price of an item split by preparation need AND department
- Will look at an interesting way that you can get the full list of items from each department

In [23]:
full_inventory.head()

Unnamed: 0_level_0,product,department,preparation,price,stock
productid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,bananas,produce,unprepared,2.0,100
2,apples,produce,unprepared,3.5,200
3,fruit salad,produce,prepared,5.99,380
4,pasta,pantry,unprepared,2.29,50
5,pasta sauce,pantry,unprepared,2.99,200


In [24]:
full_inventory.groupby(by = 'preparation')['product'].count()

preparation
prepared       5
unprepared    11
Name: product, dtype: int64

In [25]:
full_inventory.groupby(by = ['department', 'preparation'])['product'].count()

department  preparation
bakery      prepared       1
            unprepared     1
butcher     prepared       1
deli        prepared       2
frozen      unprepared     1
household   unprepared     2
pantry      unprepared     5
produce     prepared       1
            unprepared     2
Name: product, dtype: int64

In [26]:
full_inventory.groupby(by = 'department')['stock'].sum()

department
bakery        110
butcher        10
deli           13
frozen        100
household    1001
pantry       1580
produce       680
Name: stock, dtype: int64

In [27]:
full_inventory.groupby(by = ['preparation', 'department'])['price'].mean()

preparation  department
prepared     bakery         8.990
             butcher        4.990
             deli           6.245
             produce        5.990
unprepared   bakery         1.990
             frozen         4.990
             household     51.750
             pantry         3.552
             produce        2.750
Name: price, dtype: float64

In [28]:
full_inventory.groupby(by = 'department')['product'].apply(list)

department
bakery                                          [bread, pizza]
butcher                                              [chicken]
deli                        [turkey sandwich, cheese sandwich]
frozen                                             [ice cream]
household                           [toilet paper, toothpicks]
pantry       [pasta, pasta sauce, ice cream cones, chocolat...
produce                         [bananas, apples, fruit salad]
Name: product, dtype: object

In [29]:
full_inventory.groupby(by = 'department')['product'].apply(list)[5]

['pasta', 'pasta sauce', 'ice cream cones', 'chocolate syrup', 'beans']

In [30]:
[print(f"{x} is an item in the pantry department") for x in full_inventory.groupby(by = 'department')['product'].apply(list)[5]]

pasta is an item in the pantry department
pasta sauce is an item in the pantry department
ice cream cones is an item in the pantry department
chocolate syrup is an item in the pantry department
beans is an item in the pantry department


[None, None, None, None, None]

### Lets check out how we can figure out how to tax our items properly with a 2-column join
- We want to jump back into merging, specifically merging on two columns

In [31]:
tax

Unnamed: 0,codename,department,preparation,multiplier
0,raw produce,produce,unprepared,1.05
1,cut produce,produce,prepared,1.1
2,shelf-stable,pantry,unprepared,1.05
3,delivered bakery,bakery,unprepared,1.05
4,in-store baked,bakery,prepared,1.12
5,deli,deli,prepared,1.12
6,household,household,unprepared,1.05
7,freezer food,frozen,unprepared,1.05
8,meat department,butcher,prepared,1.07


### Not good!!!!

In [32]:
full_inventory.merge(tax, left_on = 'department', right_on = 'department')

Unnamed: 0,product,department,preparation_x,price,stock,codename,preparation_y,multiplier
0,bananas,produce,unprepared,2.0,100,raw produce,unprepared,1.05
1,bananas,produce,unprepared,2.0,100,cut produce,prepared,1.1
2,apples,produce,unprepared,3.5,200,raw produce,unprepared,1.05
3,apples,produce,unprepared,3.5,200,cut produce,prepared,1.1
4,fruit salad,produce,prepared,5.99,380,raw produce,unprepared,1.05
5,fruit salad,produce,prepared,5.99,380,cut produce,prepared,1.1
6,pasta,pantry,unprepared,2.29,50,shelf-stable,unprepared,1.05
7,pasta sauce,pantry,unprepared,2.99,200,shelf-stable,unprepared,1.05
8,ice cream cones,pantry,unprepared,3.5,900,shelf-stable,unprepared,1.05
9,chocolate syrup,pantry,unprepared,5.99,380,shelf-stable,unprepared,1.05


### Here, we needed to merge on multiple columns to make sure that we didn't cause rows to be duplicated

In [33]:
taxed_inventory = full_inventory.merge(tax, left_on = ['department', 'preparation'], right_on = ['department', 'preparation'])


taxed_inventory.columns = ['product', 'department', 'preparation', 'price', 'stock', 'taxcode', 'tax rate']
taxed_inventory

Unnamed: 0,product,department,preparation,price,stock,taxcode,tax rate
0,bananas,produce,unprepared,2.0,100,raw produce,1.05
1,apples,produce,unprepared,3.5,200,raw produce,1.05
2,fruit salad,produce,prepared,5.99,380,cut produce,1.1
3,pasta,pantry,unprepared,2.29,50,shelf-stable,1.05
4,pasta sauce,pantry,unprepared,2.99,200,shelf-stable,1.05
5,ice cream cones,pantry,unprepared,3.5,900,shelf-stable,1.05
6,chocolate syrup,pantry,unprepared,5.99,380,shelf-stable,1.05
7,beans,pantry,unprepared,2.99,50,shelf-stable,1.05
8,bread,bakery,unprepared,1.99,100,delivered bakery,1.05
9,pizza,bakery,prepared,8.99,10,in-store baked,1.12


### Now that we have a full dataset with all of our inventory data in one place, let's talk about applying some functions to it!

In [34]:
def apply_tax(row):
    return(round(row['price'] * row['tax rate'], 2))

In [35]:
taxed_inventory.apply(apply_tax, axis = 1)

0       2.10
1       3.68
2       6.59
3       2.40
4       3.14
5       3.68
6       6.29
7       3.14
8       2.09
9      10.07
10      7.83
11      6.16
12    105.00
13      3.68
14      5.24
15      5.34
dtype: float64

In [36]:
round(taxed_inventory.price * taxed_inventory['tax rate'], 2)

0       2.10
1       3.68
2       6.59
3       2.40
4       3.14
5       3.68
6       6.29
7       3.14
8       2.09
9      10.07
10      7.83
11      6.16
12    105.00
13      3.68
14      5.24
15      5.34
dtype: float64

### Now something slightly more complicated, how would we change the tax rate on different departments with the same function?

In [37]:
def change_tax_rate(row):
    if row['department'] == 'produce':
        return("NO TAX")
    elif row['department'] == 'pantry':
        return(row['tax rate'] - .01)
    elif row['department'] == 'household':
        return(row['tax rate'] + .5)
    else:
        return(row['tax rate'])

In [38]:
taxed_inventory['NEW TAX'] = taxed_inventory.apply(change_tax_rate, axis = 1)
taxed_inventory

Unnamed: 0,product,department,preparation,price,stock,taxcode,tax rate,NEW TAX
0,bananas,produce,unprepared,2.0,100,raw produce,1.05,NO TAX
1,apples,produce,unprepared,3.5,200,raw produce,1.05,NO TAX
2,fruit salad,produce,prepared,5.99,380,cut produce,1.1,NO TAX
3,pasta,pantry,unprepared,2.29,50,shelf-stable,1.05,1.04
4,pasta sauce,pantry,unprepared,2.99,200,shelf-stable,1.05,1.04
5,ice cream cones,pantry,unprepared,3.5,900,shelf-stable,1.05,1.04
6,chocolate syrup,pantry,unprepared,5.99,380,shelf-stable,1.05,1.04
7,beans,pantry,unprepared,2.99,50,shelf-stable,1.05,1.04
8,bread,bakery,unprepared,1.99,100,delivered bakery,1.05,1.05
9,pizza,bakery,prepared,8.99,10,in-store baked,1.12,1.12


### We can also apply functions down the columns
- This is very similar to applying across rows, but you are working with cells rather than rows
- Let's pump up those profits today by making some much needed changes to our inventory

In [39]:
def increase_quality(prodname):
    return('Luxury ' + prodname)

def increase_profit(pricing):
    return(pricing + 100)

taxed_inventory['product'] = taxed_inventory['product'].apply(increase_quality)
taxed_inventory['price'] = taxed_inventory['price'].apply(increase_profit)

In [40]:
taxed_inventory

Unnamed: 0,product,department,preparation,price,stock,taxcode,tax rate,NEW TAX
0,Luxury bananas,produce,unprepared,102.0,100,raw produce,1.05,NO TAX
1,Luxury apples,produce,unprepared,103.5,200,raw produce,1.05,NO TAX
2,Luxury fruit salad,produce,prepared,105.99,380,cut produce,1.1,NO TAX
3,Luxury pasta,pantry,unprepared,102.29,50,shelf-stable,1.05,1.04
4,Luxury pasta sauce,pantry,unprepared,102.99,200,shelf-stable,1.05,1.04
5,Luxury ice cream cones,pantry,unprepared,103.5,900,shelf-stable,1.05,1.04
6,Luxury chocolate syrup,pantry,unprepared,105.99,380,shelf-stable,1.05,1.04
7,Luxury beans,pantry,unprepared,102.99,50,shelf-stable,1.05,1.04
8,Luxury bread,bakery,unprepared,101.99,100,delivered bakery,1.05,1.05
9,Luxury pizza,bakery,prepared,108.99,10,in-store baked,1.12,1.12


### Now it's time to figure out what our new revenue is!

In [41]:
taxed_inventory['revenue'] = taxed_inventory.price * taxed_inventory.stock

In [42]:
taxed_inventory.revenue

0      10200.00
1      20700.00
2      40276.20
3       5114.50
4      20598.00
5      93150.00
6      40276.20
7       5149.50
8      10199.00
9       1089.90
10       534.95
11       844.00
12       200.00
13    103500.00
14     10499.00
15      1049.90
Name: revenue, dtype: float64

In [43]:
taxed_inventory.revenue.sum()

363381.14999999997

### We may need to push up prices a little bit further, let's apply a quick lambda function to fix that!

In [44]:
taxed_inventory.price.apply(lambda x :(x + 100))*taxed_inventory.stock

0      20200.00
1      40700.00
2      78276.20
3      10114.50
4      40598.00
5     183150.00
6      78276.20
7      10149.50
8      20199.00
9       2089.90
10      1034.95
11      1644.00
12       300.00
13    203500.00
14     20499.00
15      2049.90
dtype: float64

In [45]:
(taxed_inventory.price.apply(lambda x :(x + 100))*taxed_inventory.stock).sum()

712781.1499999999

### Job well done everybody, lets go grab some sandwiches!

In [46]:
taxed_inventory[taxed_inventory['product'].str.contains('sandwich')]

Unnamed: 0,product,department,preparation,price,stock,taxcode,tax rate,NEW TAX,revenue
10,Luxury turkey sandwich,deli,prepared,106.99,5,deli,1.12,1.12,534.95
11,Luxury cheese sandwich,deli,prepared,105.5,8,deli,1.12,1.12,844.0
