## 10.1 Introducing the Datasets

In [None]:
import pandas as pd

In [None]:
pd.read_csv("meetup/members1.csv").head()

In [None]:
pd.read_csv("meetup/members2.csv").head()

In [None]:
members1 = pd.read_csv("meetup/members1.csv")
members2 = pd.read_csv("meetup/members2.csv")

In [None]:
categories = pd.read_csv("meetup/categories.csv")
categories.head()

In [None]:
cities = pd.read_csv("meetup/cities.csv")
cities.head()

In [None]:
groups = pd.read_csv("meetup/groups.csv")
groups.head()

In [None]:
events1 = pd.read_csv("meetup/events1.csv")
events1.head()

In [None]:
events2 = pd.read_csv("meetup/events2.csv")
events2.head()

## 10.2 Concatenating the Datasets

In [None]:
pd.concat(objs = [members1, members2])

In [None]:
len(members1)

In [None]:
len(members2)

In [None]:
pd.concat(objs = [members1, members2], ignore_index = True)

In [None]:
pd.concat(objs = [members1, members2], keys = ["A", "B"])

### 10.2.1 Missing Values in Merged DataFrames

In [None]:
data_A = [
    ["Marie Curie", "Svante Arrhenius"],
    ["Lord Rayleigh", "William Ramsay"]
]

nobel_winners_A = pd.DataFrame(
    data = data_A,
    columns = ["Physics", "Chemistry"],
    index = [1903, 1904]
)

nobel_winners_A

In [None]:
data_B = [
    ["William Ramsay", "Ivan Pavlov"],
    ["Adolf van Baeyer", "Robert Koch"]
]

nobel_winners_B = pd.DataFrame(
    data = data_B,
    columns = ["Chemistry", "Medicine"],
    index = [1904, 1905]
)

nobel_winners_B

In [None]:
pd.concat(objs = [nobel_winners_A, nobel_winners_B])

In [None]:
# The two lines below are equivalent
pd.concat(
    objs = [nobel_winners_A, nobel_winners_B],
    axis = 1
)

pd.concat(
    objs = [nobel_winners_A, nobel_winners_B],
    axis = "columns"
)

In [None]:
data_C = [
    ["Frederic Mistral", "Institut de Droit International"],
    ["Henryk Sienkiewicz", "Bertha von Suttner"]
]

nobel_winners_C = pd.DataFrame(
    data = data_C,
    columns = ["Literature", "Peace"],
    index = [1904, 1905]
)

nobel_winners_C

In [None]:
nobel_winners_A

In [None]:
pd.concat(objs = [nobel_winners_A, nobel_winners_C])

## 10.3 Inner Joins

In [None]:
events1.tail(3)

In [None]:
events2.tail(3)

In [None]:
events1.merge(
    right = events2, how = "inner", on = "group_id"
).head()

In [None]:
events1[events1["group_id"] == 5817262]

In [None]:
events2[events2["group_id"] == 5817262]

In [None]:
events1.merge(
    right = events2, 
    how = "inner",
    on = "group_id",
    suffixes = ["_M1", " _M2"]
).head()

In [None]:
events1.merge(
    right = events2,
    how = "inner",
    on = ["group_id", "event_name"],
    suffixes = [" Month 1", " Month 2"]
).head()

## 10.4 Outer Joins

In [None]:
events1.merge(
    right = events2, how = "outer", on = "group_id"
).head()

In [None]:
events1.merge(
    right = events2, how = "outer", on = "group_id"
).tail()

In [None]:
events1.merge(
    right = events2, how = "outer", on = "group_id"
).iloc[8:13]

In [None]:
events1.merge(
    right = events2,
    how = "outer", 
    on = "group_id", 
    indicator = True
)

In [None]:
outer_join = events1.merge(
    right = events2,
    how = "outer",
    on = "group_id",
    indicator = True
)

in_left_only = outer_join["_merge"] == "left_only"

outer_join[in_left_only].head()

## 10.5 Left and Right Joins

In [None]:
groups.head(1)

In [None]:
groups.merge(
    right = categories, how = "left", on = "category_id"
).head()

## 10.6 The left_on and right_on Parameters

In [None]:
cities.head(2)

In [None]:
cities.rename(
    columns = { "city_id": "city identifier" },
    inplace = True
)

cities.head(2)

In [None]:
groups.merge(
    cities,
    how = "left",
    left_on = "city_id",
    right_on = "city identifier"
).head()

## 10.7 Merging on Index Labels

In [None]:
members1.set_index(keys = "member_id", inplace = True)
members1.head(2)

In [None]:
groups.head(2)

In [None]:
groups.merge(
    right = members1,
    how = "left",
    left_on = "organizer_id",
    right_index = True
).head()

## 10.8 Coding Challenge

In [None]:
pd.read_csv("restaurant/week_1_sales.csv").head()

In [None]:
week1 = pd.read_csv("restaurant/week_1_sales.csv")
week2 = pd.read_csv("restaurant/week_2_sales.csv")

In [None]:
pd.read_csv("restaurant/customers.csv", index_col = "ID").head()

In [None]:
customers = pd.read_csv(
    "restaurant/customers.csv", index_col = "ID"
)

In [None]:
pd.read_csv("restaurant/foods.csv", index_col = "Food ID")

In [None]:
foods = pd.read_csv("restaurant/foods.csv", index_col = "Food ID")

In [None]:
pd.concat(objs = [week1, week2], keys = ["Week 1", "Week 2"])

In [None]:
week1.merge(
    right = week2, how = "inner", on = "Customer ID"
).head()

In [None]:
week1.merge(
    right = week2, how = "inner", on = "Customer ID"
).drop_duplicates(subset = ["Customer ID"]).head()

In [None]:
week1.merge(
    right = week2,
    how = "inner",
    on = ["Customer ID", "Food ID"]
)

In [None]:
week1.merge(
    right = week2,
    how = "outer",
    on = "Customer ID",
    indicator = True
).head()

In [None]:
week1.merge(
    right = customers,
    how = "left",
    left_on = "Customer ID",
    right_index = True
).head()

## 10.9 Summary