In [1]:
import os
import pandas as pd
import jupyter_black

jupyter_black.load()

current_dir = os.path.dirname(os.path.abspath("__file__"))

### Set file path to datasets

In [2]:
week1_path = os.path.join(current_dir, "data", "Restaurant - Week 1 Sales.csv")
week2_path = os.path.join(current_dir, "data", "Restaurant - Week 2 Sales.csv")
satisfaction_path = os.path.join(
    current_dir, "data", "Restaurant - Week 1 Satisfaction.csv"
)
customers_path = os.path.join(current_dir, "data", "Restaurant - Customers.csv")
foods_path = os.path.join(current_dir, "data", "Restaurant - Foods.csv")

In [3]:
week1 = pd.read_csv(week1_path)
week2 = pd.read_csv(week2_path)
satisfaction = pd.read_csv(satisfaction_path)
customers = pd.read_csv(customers_path)
foods = pd.read_csv(foods_path)

### `pd.concat()`

In [4]:
pd.concat(objs=[week1, week2], ignore_index=True).head(3)

Unnamed: 0,Customer ID,Food ID
0,537,9
1,97,4
2,658,1


In [5]:
sales = pd.concat(objs=[week1, week2], keys=["Week 1", "Week 2"])
display(sales.head(3))
sales.loc[[("Week 1", 2)]]
sales.loc[("Week 1", 2), "Customer ID"]

Unnamed: 0,Unnamed: 1,Customer ID,Food ID
Week 1,0,537,9
Week 1,1,97,4
Week 1,2,658,1


658

### Inner Joins

In [6]:
week1.merge(
    week2, how="inner", on="Customer ID", suffixes=[" - Week 1", " - Week 2"]
).head(3)

Unnamed: 0,Customer ID,Food ID - Week 1,Food ID - Week 2
0,537,9,5
1,155,9,3
2,155,1,3


In [7]:
week1.merge(
    week2,
    how="inner",
    on=["Customer ID", "Food ID"],
).head(3)

Unnamed: 0,Customer ID,Food ID
0,304,3
1,540,3
2,937,10


### Outer Joins

In [8]:
week1.merge(
    week2,
    how="outer",
    on="Customer ID",
    suffixes=[" Week 1", " Week 2"],
    indicator=True,
).head(3)

Unnamed: 0,Customer ID,Food ID Week 1,Food ID Week 2,_merge
0,537,9.0,5.0,both
1,97,4.0,,left_only
2,658,1.0,,left_only


### Left Join

In [9]:
week1.merge(foods, on="Food ID", how="left", sort=True).head(3)

Unnamed: 0,Customer ID,Food ID,Food Item,Price
0,658,1,Sushi,3.99
1,600,1,Sushi,3.99
2,155,1,Sushi,3.99


### The `left_on` and `right_on` parameters

In [10]:
customers.merge(week1, left_on="ID", right_on="Customer ID", sort=True).drop(
    columns=["Customer ID"]
).head(3)

Unnamed: 0,ID,First Name,Last Name,Gender,Company,Occupation,Food ID
0,3,Roger,Black,Male,Tagfeed,Account Executive,2
1,10,Steven,Ryan,Male,Twinder,Community Outreach Specialist,2
2,20,Lisa,Rice,Female,Oloo,Programmer IV,1


### Merging on index

In [11]:
customers = customers.set_index("ID")

In [12]:
week1.merge(customers, left_on="Customer ID", right_index=True).head(3)

Unnamed: 0,Customer ID,Food ID,First Name,Last Name,Gender,Company,Occupation
0,537,9,Cheryl,Carroll,Female,Zoombeat,Registered Nurse
1,97,4,Amanda,Watkins,Female,Ozu,Account Coordinator
2,658,1,Patrick,Webb,Male,Browsebug,Community Outreach Specialist


### The `join()` method

A "shortcut" for a simple merge (inner join on index)

In [13]:
week1.join(satisfaction).head(3)

Unnamed: 0,Customer ID,Food ID,Satisfaction Rating
0,537,9,2
1,97,4,7
2,658,1,3


### The `pd.merge()` method

We knows this