# 2024 week 3: Performance Against Targets


https://preppindata.blogspot.com/2024/01/2024-week-3-performance-against-targets.html


## Solution


In [1]:
import pandas as pd

pd.options.mode.copy_on_write = True

In [2]:
flow_card_yes = pd.read_csv(
    "data/input_flow_card_yes.csv", parse_dates=["Date"], date_format="%d/%m/%Y"
)
flow_card_yes.head()

Unnamed: 0,Date,Flight Number,From,To,Class,Price,Flow Card?,Bags Checked,Meal Type
0,2024-07-22,PA010,Tokyo,New York,Economy,2380.0,Yes,0,Egg Free
1,2024-04-20,PA002,New York,London,Economy,3490.0,Yes,1,Vegan
2,2024-01-23,PA010,Tokyo,New York,Premium Economy,825.0,Yes,1,Vegetarian
3,2024-06-05,PA006,Tokyo,London,First Class,618.0,Yes,3,Vegan
4,2024-03-30,PA004,Perth,London,First Class,446.0,Yes,1,Nut Free


In [3]:
flow_card_no = pd.read_csv(
    "data/input_flow_card_no.csv", parse_dates=["Date"], date_format="%d/%m/%Y"
)
flow_card_no.head()

Unnamed: 0,Date,Flight Number,From,To,Class,Price,Flow Card?,Bags Checked,Meal Type
0,2024-09-28,PA008,Perth,New York,Economy,1855.0,No,2,Vegetarian
1,2024-10-01,PA008,Perth,New York,Business Class,634.8,No,0,Vegetarian
2,2024-03-04,PA007,New York,Perth,Business Class,458.4,No,3,Nut Free
3,2024-02-25,PA010,Tokyo,New York,Premium Economy,1435.0,No,0,
4,2024-03-29,PA004,Perth,London,Economy,2730.0,No,2,Vegan


In [4]:
# Import all sheets from sales target data
target = pd.read_excel("data/input_sales_target.xlsx", sheet_name=None)
target

{'Q1':     Month Class  Target
 0       1    FC  120000
 1       2    FC  130000
 2       3    FC  140000
 3       1    BC   85000
 4       2    BC   86000
 5       3    BC   87000
 6       1    PE   40000
 7       2    PE   40500
 8       3    PE   41000
 9       1     E   31000
 10      2     E   31500
 11      3     E   32000,
 'Q2':     Month Class  Target
 0       4    FC  160000
 1       5    FC  170000
 2       6    FC  180000
 3       4    BC   88000
 4       5    BC   89000
 5       6    BC   90000
 6       4    PE   42500
 7       5    PE   43000
 8       6    PE   43500
 9       4     E   34000
 10      5     E   34500
 11      6     E   35000,
 'Q3':     Month Class  Target
 0       7    FC  190000
 1       8    FC  200000
 2       9    FC  210000
 3       7    BC   92000
 4       8    BC   93000
 5       9    BC   95000
 6       7    PE   46000
 7       8    PE   46500
 8       9    PE   47000
 9       7     E   37000
 10      8     E   37500
 11      9     E   38000,
 'Q4

In [5]:
# Union all sales target data
target = pd.concat(target.values())
target

Unnamed: 0,Month,Class,Target
0,1,FC,120000
1,2,FC,130000
2,3,FC,140000
3,1,BC,85000
4,2,BC,86000
5,3,BC,87000
6,1,PE,40000
7,2,PE,40500
8,3,PE,41000
9,1,E,31000


In [6]:
# Union the two datasets for sales
sales = pd.concat((flow_card_yes, flow_card_no))
sales

Unnamed: 0,Date,Flight Number,From,To,Class,Price,Flow Card?,Bags Checked,Meal Type
0,2024-07-22,PA010,Tokyo,New York,Economy,2380.0,Yes,0,Egg Free
1,2024-04-20,PA002,New York,London,Economy,3490.0,Yes,1,Vegan
2,2024-01-23,PA010,Tokyo,New York,Premium Economy,825.0,Yes,1,Vegetarian
3,2024-06-05,PA006,Tokyo,London,First Class,618.0,Yes,3,Vegan
4,2024-03-30,PA004,Perth,London,First Class,446.0,Yes,1,Nut Free
...,...,...,...,...,...,...,...,...,...
1890,2024-03-06,PA006,Tokyo,London,Premium Economy,940.0,No,2,Vegetarian
1891,2024-05-05,PA009,New York,Tokyo,Economy,1360.0,No,3,Nut Free
1892,2024-06-14,PA008,Perth,New York,First Class,245.0,No,1,Dairy Free
1893,2024-01-16,PA010,Tokyo,New York,Economy,2410.0,No,2,Egg Free


In [7]:
# Fix incorrect `Class` names
sales = sales.replace(
    {
        "Class": {
            "Economy": "First Class",
            "First Class": "Economy",
            # "Business Class": "Premium Economy",
            # "Premium Economy": "Business Class",
        }
    }
)
sales

Unnamed: 0,Date,Flight Number,From,To,Class,Price,Flow Card?,Bags Checked,Meal Type
0,2024-07-22,PA010,Tokyo,New York,First Class,2380.0,Yes,0,Egg Free
1,2024-04-20,PA002,New York,London,First Class,3490.0,Yes,1,Vegan
2,2024-01-23,PA010,Tokyo,New York,Premium Economy,825.0,Yes,1,Vegetarian
3,2024-06-05,PA006,Tokyo,London,Economy,618.0,Yes,3,Vegan
4,2024-03-30,PA004,Perth,London,Economy,446.0,Yes,1,Nut Free
...,...,...,...,...,...,...,...,...,...
1890,2024-03-06,PA006,Tokyo,London,Premium Economy,940.0,No,2,Vegetarian
1891,2024-05-05,PA009,New York,Tokyo,First Class,1360.0,No,3,Nut Free
1892,2024-06-14,PA008,Perth,New York,Economy,245.0,No,1,Dairy Free
1893,2024-01-16,PA010,Tokyo,New York,First Class,2410.0,No,2,Egg Free


In [8]:
def initials(words):
    return "".join(word[0] for word in words.split())


# Convert `Class` to initials
sales["Class"] = sales["Class"].apply(initials)
sales

Unnamed: 0,Date,Flight Number,From,To,Class,Price,Flow Card?,Bags Checked,Meal Type
0,2024-07-22,PA010,Tokyo,New York,FC,2380.0,Yes,0,Egg Free
1,2024-04-20,PA002,New York,London,FC,3490.0,Yes,1,Vegan
2,2024-01-23,PA010,Tokyo,New York,PE,825.0,Yes,1,Vegetarian
3,2024-06-05,PA006,Tokyo,London,E,618.0,Yes,3,Vegan
4,2024-03-30,PA004,Perth,London,E,446.0,Yes,1,Nut Free
...,...,...,...,...,...,...,...,...,...
1890,2024-03-06,PA006,Tokyo,London,PE,940.0,No,2,Vegetarian
1891,2024-05-05,PA009,New York,Tokyo,FC,1360.0,No,3,Nut Free
1892,2024-06-14,PA008,Perth,New York,E,245.0,No,1,Dairy Free
1893,2024-01-16,PA010,Tokyo,New York,FC,2410.0,No,2,Egg Free


In [9]:
# Convert `Date` to the corresponding month number
sales["Date"] = sales["Date"].dt.month
sales

Unnamed: 0,Date,Flight Number,From,To,Class,Price,Flow Card?,Bags Checked,Meal Type
0,7,PA010,Tokyo,New York,FC,2380.0,Yes,0,Egg Free
1,4,PA002,New York,London,FC,3490.0,Yes,1,Vegan
2,1,PA010,Tokyo,New York,PE,825.0,Yes,1,Vegetarian
3,6,PA006,Tokyo,London,E,618.0,Yes,3,Vegan
4,3,PA004,Perth,London,E,446.0,Yes,1,Nut Free
...,...,...,...,...,...,...,...,...,...
1890,3,PA006,Tokyo,London,PE,940.0,No,2,Vegetarian
1891,5,PA009,New York,Tokyo,FC,1360.0,No,3,Nut Free
1892,6,PA008,Perth,New York,E,245.0,No,1,Dairy Free
1893,1,PA010,Tokyo,New York,FC,2410.0,No,2,Egg Free


In [10]:
# Aggregate sales by `Date` and `Class`
sales = sales.groupby(["Date", "Class"], as_index=False)["Price"].agg("sum")
sales

Unnamed: 0,Date,Class,Price
0,1,BC,48555.6
1,1,E,36081.0
2,1,FC,193960.0
3,1,PE,67297.5
4,2,BC,46335.6
5,2,E,30968.0
6,2,FC,145665.0
7,2,PE,69222.5
8,3,BC,47875.2
9,3,E,31829.0


In [11]:
# Join sales and target data
df = sales.join(target.set_index(["Month", "Class"]), on=["Date", "Class"])
df

Unnamed: 0,Date,Class,Price,Target
0,1,BC,48555.6,85000
1,1,E,36081.0,31000
2,1,FC,193960.0,120000
3,1,PE,67297.5,40000
4,2,BC,46335.6,86000
5,2,E,30968.0,31500
6,2,FC,145665.0,130000
7,2,PE,69222.5,40500
8,3,BC,47875.2,87000
9,3,E,31829.0,32000


In [12]:
# Calculate difference between sales and target
df["Difference to Target"] = df["Price"] - df["Target"]

# Reorder columns
sort_columns = ["Difference to Target", "Date", "Price", "Class", "Target"]
df = df[sort_columns].sort_values(
    sort_columns,
    ignore_index=True,
)
df

Unnamed: 0,Difference to Target,Date,Price,Class,Target
0,-75405.0,11,154595.0,FC,230000
1,-59628.0,11,39372.0,BC,99000
2,-54652.0,12,45348.0,BC,100000
3,-54310.4,10,43689.6,BC,98000
4,-52946.0,9,42054.0,BC,95000
5,-51985.0,12,188015.0,FC,240000
6,-51741.2,7,40258.8,BC,92000
7,-51531.6,8,41468.4,BC,93000
8,-49766.8,4,38233.2,BC,88000
9,-45166.4,5,43833.6,BC,89000


## Testing


In [13]:
import pandas.testing as pdt

In [14]:
expected_output = pd.read_csv("data/output.csv").sort_values(
    sort_columns,
    ignore_index=True,
)

In [15]:
pdt.assert_frame_equal(expected_output, df, check_dtype=False)