# 2024 week 4: Unpopular Seats


https://preppindata.blogspot.com/2024/01/2024-week-4-unpopular-seats.html


## Solution


In [1]:
import pandas as pd

pd.options.mode.copy_on_write = True

In [2]:
# Import Flow Card dataset and add a `Flow Card?` tag
flow_card_yes = pd.read_excel(
    "data/input.xlsx",
    sheet_name="Flow Card",
).assign(**{"Flow Card?": True})
flow_card_yes

Unnamed: 0,CustomerID,Seat,Row,Class,Flow Card?
0,654,2,2,FC,True
1,466,4,5,FC,True
2,27,4,3,FC,True
3,519,1,4,FC,True
4,933,2,3,FC,True
...,...,...,...,...,...
9719,3040,10,38,E,True
9720,4429,3,28,E,True
9721,2593,10,37,E,True
9722,4336,6,42,E,True


In [3]:
# Import non-Flow Card datasets
flow_card_no = pd.read_excel(
    "data/input.xlsx",
    sheet_name=["Non_flow Card", "Non_flow Card2"],
)
# Union the non-Flow Card datasets and add a `Flow Card?` tag
flow_card_no = pd.concat(flow_card_no.values()).assign(**{"Flow Card?": False})
flow_card_no

Unnamed: 0,CustomerID,Seat,Row,Class,Flow Card?
0,765,1,3,FC,False
1,501,2,7,FC,False
2,885,4,2,FC,False
3,203,1,5,FC,False
4,676,2,3,FC,False
...,...,...,...,...,...
9735,3005,7,35,E,False
9736,4685,4,27,E,False
9737,2512,8,38,E,False
9738,3863,4,37,E,False


In [4]:
# Union the Flow Card and non-Flow Card datasets
seat_bookings = pd.concat((flow_card_yes, flow_card_no))
seat_bookings

Unnamed: 0,CustomerID,Seat,Row,Class,Flow Card?
0,654,2,2,FC,True
1,466,4,5,FC,True
2,27,4,3,FC,True
3,519,1,4,FC,True
4,933,2,3,FC,True
...,...,...,...,...,...
9735,3005,7,35,E,False
9736,4685,4,27,E,False
9737,2512,8,38,E,False
9738,3863,4,37,E,False


In [5]:
# Group by various fields and count number of bookings by group
bookings_by_group = (
    seat_bookings.groupby(
        ["Row", "Seat", "Class", "Flow Card?"],
    )
    .count()
    .rename(columns={"CustomerID": "Customers"})
)
bookings_by_group

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Customers
Row,Seat,Class,Flow Card?,Unnamed: 4_level_1
1,1,FC,False,38
1,1,FC,True,20
1,2,FC,False,40
1,2,FC,True,19
1,3,FC,False,51
...,...,...,...,...
42,8,E,True,51
42,9,E,False,81
42,9,E,True,36
42,10,E,False,90


In [6]:
# Sum up bookings across Flow Card and non-Flow Card customers as well
bookings_by_group = bookings_by_group.groupby(
    ["Row", "Seat", "Class"],
)["Customers"].sum()
bookings_by_group

Row  Seat  Class
1    1     FC        58
     2     FC        59
     3     FC        64
     4     FC        56
2    1     FC        59
                   ... 
42   6     E         93
     7     E         93
     8     E        119
     9     E        117
     10    E        119
Name: Customers, Length: 289, dtype: int64

In [7]:
# Import seating plan
seating_plan = pd.read_excel("data/input.xlsx", sheet_name="Seat Plan")
seating_plan

Unnamed: 0,Class,Seat,Row
0,FC,1,1
1,FC,2,1
2,FC,3,1
3,FC,4,1
4,FC,1,2
...,...,...,...
291,E,6,42
292,E,7,42
293,E,8,42
294,E,9,42


In [8]:
# Join seating plan with actual bookings made
joined = seating_plan.join(
    bookings_by_group,
    on=["Row", "Seat", "Class"],
    how="outer",
)
joined

Unnamed: 0,Class,Seat,Row,Customers
0,FC,1,1,58.0
1,FC,2,1,59.0
2,FC,3,1,64.0
3,FC,4,1,56.0
4,FC,1,2,59.0
...,...,...,...,...
291,E,6,42,93.0
292,E,7,42,93.0
293,E,8,42,119.0
294,E,9,42,117.0


In [9]:
# Identify seats with no customer bookings
no_customer_bookings = joined["Customers"].isna()
output = joined[no_customer_bookings]

sort_columns = [
    "Class",
    "Seat",
    "Row",
]
output = output[sort_columns].sort_values(
    sort_columns,
    ignore_index=True,
)
output

Unnamed: 0,Class,Seat,Row
0,E,5,28
1,E,5,36
2,E,5,40
3,E,5,41
4,E,6,32
5,E,6,37
6,E,6,40


## Tests


In [10]:
import pandas.testing as pdt

In [11]:
expected_output = pd.read_csv("data/output.csv").sort_values(
    sort_columns,
    ignore_index=True,
)
expected_output

Unnamed: 0,Class,Seat,Row
0,E,5,28
1,E,5,36
2,E,5,40
3,E,5,41
4,E,6,32
5,E,6,37
6,E,6,40


In [12]:
pdt.assert_frame_equal(expected_output, output)