# Preppin' Data
## 2024: Week 3 Performance Against Targets
**Created by:** Carl Allchin | [Challenge Link](https://preppindata.blogspot.com/2024/01/2024-week-3-performance-against-targets.html)

This week's challenge is to link together a Quarterly Sales Target data source (an Excel Workbook) with our original sales data (Week One output).<br>
Is Prep Air meeting its targets?

In [1]:
# Input the outputs from 2024 Week 1 challenge
import pandas as pd
flow = pd.read_csv("PD 2024 Wk 1 Output Flow Card.csv", parse_dates=["Date"], date_format="%d/%m/%Y").sort_index()
nonflow = pd.read_csv("PD 2024 Wk 1 Output Non-Flow Card.csv", parse_dates=["Date"], date_format="%d/%m/%Y").sort_index()

In [2]:
flow

Unnamed: 0,Date,Flight Number,From,To,Class,Price,Flow Card?,Bags Checked,Meal Type
0,2024-07-22,PA010,Tokyo,New York,Economy,2380.0,Yes,0,Egg Free
1,2024-04-20,PA002,New York,London,Economy,3490.0,Yes,1,Vegan
2,2024-01-23,PA010,Tokyo,New York,Premium Economy,825.0,Yes,1,Vegetarian
3,2024-06-05,PA006,Tokyo,London,First Class,618.0,Yes,3,Vegan
4,2024-03-30,PA004,Perth,London,First Class,446.0,Yes,1,Nut Free
...,...,...,...,...,...,...,...,...,...
1878,2024-11-23,PA005,London,Tokyo,Economy,2070.0,Yes,2,Egg Free
1879,2024-11-04,PA003,London,Perth,First Class,210.0,Yes,3,Nut Free
1880,2024-04-29,PA012,Tokyo,Perth,Economy,3490.0,Yes,0,Dairy Free
1881,2024-09-26,PA001,London,New York,First Class,207.0,Yes,2,Vegetarian


In [3]:
nonflow

Unnamed: 0,Date,Flight Number,From,To,Class,Price,Flow Card?,Bags Checked,Meal Type
0,2024-09-28,PA008,Perth,New York,Economy,1855.0,No,2,Vegetarian
1,2024-10-01,PA008,Perth,New York,Business Class,634.8,No,0,Vegetarian
2,2024-03-04,PA007,New York,Perth,Business Class,458.4,No,3,Nut Free
3,2024-02-25,PA010,Tokyo,New York,Premium Economy,1435.0,No,0,
4,2024-03-29,PA004,Perth,London,Economy,2730.0,No,2,Vegan
...,...,...,...,...,...,...,...,...,...
1890,2024-03-06,PA006,Tokyo,London,Premium Economy,940.0,No,2,Vegetarian
1891,2024-05-05,PA009,New York,Tokyo,Economy,1360.0,No,3,Nut Free
1892,2024-06-14,PA008,Perth,New York,First Class,245.0,No,1,Dairy Free
1893,2024-01-16,PA010,Tokyo,New York,Economy,2410.0,No,2,Egg Free


In [4]:
# Union the flow and non-flow card tables together again
sales = pd.concat([flow, nonflow])

In [5]:
# Input the new targets Excel sheet (Q1 - 4) 
targets = pd.read_excel("PD 2024 Wk 3 Input.xlsx", sheet_name=None)
targets = pd.concat(targets.values(), ignore_index=True)

In [6]:
targets

Unnamed: 0,Month,Class,Target
0,1,FC,120000
1,2,FC,130000
2,3,FC,140000
3,1,BC,85000
4,2,BC,86000
5,3,BC,87000
6,1,PE,40000
7,2,PE,40500
8,3,PE,41000
9,1,E,31000


In [7]:
# Correct the Classes being incorrect as per last week:
# Economy to First
# First Class to Economy
# Business Class to Premium
# Premium Economy to Business
targets["Class"] = targets["Class"].replace({"E": "FC", 
                                            "FC": "E", 
                                            "BC": "PE",
                                            "PE": "BC"})
targets

Unnamed: 0,Month,Class,Target
0,1,E,120000
1,2,E,130000
2,3,E,140000
3,1,PE,85000
4,2,PE,86000
5,3,PE,87000
6,1,BC,40000
7,2,BC,40500
8,3,BC,41000
9,1,FC,31000


In [8]:
# Find the First Letter from each word in the Class to help with joining the Targets data to Sales data
sales.loc[sales["Class"].str.startswith("F"), "Class"] = "FC"
sales.loc[sales["Class"].str.startswith("B"), "Class"] = "BC"
sales.loc[sales["Class"].str.startswith("P"), "Class"] = "PE"
sales.loc[sales["Class"].str.startswith("E"), "Class"] = "E"
sales

Unnamed: 0,Date,Flight Number,From,To,Class,Price,Flow Card?,Bags Checked,Meal Type
0,2024-07-22,PA010,Tokyo,New York,E,2380.0,Yes,0,Egg Free
1,2024-04-20,PA002,New York,London,E,3490.0,Yes,1,Vegan
2,2024-01-23,PA010,Tokyo,New York,PE,825.0,Yes,1,Vegetarian
3,2024-06-05,PA006,Tokyo,London,FC,618.0,Yes,3,Vegan
4,2024-03-30,PA004,Perth,London,FC,446.0,Yes,1,Nut Free
...,...,...,...,...,...,...,...,...,...
1890,2024-03-06,PA006,Tokyo,London,PE,940.0,No,2,Vegetarian
1891,2024-05-05,PA009,New York,Tokyo,E,1360.0,No,3,Nut Free
1892,2024-06-14,PA008,Perth,New York,FC,245.0,No,1,Dairy Free
1893,2024-01-16,PA010,Tokyo,New York,E,2410.0,No,2,Egg Free


In [9]:
# Change the date to a month number 
sales["Date"] = sales["Date"].dt.month.astype(int)
sales

Unnamed: 0,Date,Flight Number,From,To,Class,Price,Flow Card?,Bags Checked,Meal Type
0,7,PA010,Tokyo,New York,E,2380.0,Yes,0,Egg Free
1,4,PA002,New York,London,E,3490.0,Yes,1,Vegan
2,1,PA010,Tokyo,New York,PE,825.0,Yes,1,Vegetarian
3,6,PA006,Tokyo,London,FC,618.0,Yes,3,Vegan
4,3,PA004,Perth,London,FC,446.0,Yes,1,Nut Free
...,...,...,...,...,...,...,...,...,...
1890,3,PA006,Tokyo,London,PE,940.0,No,2,Vegetarian
1891,5,PA009,New York,Tokyo,E,1360.0,No,3,Nut Free
1892,6,PA008,Perth,New York,FC,245.0,No,1,Dairy Free
1893,1,PA010,Tokyo,New York,E,2410.0,No,2,Egg Free


In [10]:
# Total up the sales at the level of: Class, Month
sales_pivot = sales.pivot_table(values="Price", index=["Class", "Date"], aggfunc="sum").reset_index()
sales_pivot.columns = ["Class", "Month", "Sales"]
sales_pivot

Unnamed: 0,Class,Month,Sales
0,BC,1,48555.6
1,BC,2,46335.6
2,BC,3,47875.2
3,BC,4,38233.2
4,BC,5,43833.6
5,BC,6,46538.4
6,BC,7,40258.8
7,BC,8,41468.4
8,BC,9,42054.0
9,BC,10,43689.6


In [11]:
# Join the Targets data on to the Sales data (note - you should have 48 rows of data after the join)
sales_targets = sales_pivot.merge(targets, how="left", on=["Month", "Class"])
sales_targets

Unnamed: 0,Class,Month,Sales,Target
0,BC,1,48555.6,40000
1,BC,2,46335.6,40500
2,BC,3,47875.2,41000
3,BC,4,38233.2,42500
4,BC,5,43833.6,43000
5,BC,6,46538.4,43500
6,BC,7,40258.8,46000
7,BC,8,41468.4,46500
8,BC,9,42054.0,47000
9,BC,10,43689.6,48000


In [12]:
# Calculate the difference between the Sales and Target values per Class and Month
sales_targets["Difference to Target"] = sales_targets["Sales"] - sales_targets["Target"]
sales_targets

Unnamed: 0,Class,Month,Sales,Target,Difference to Target
0,BC,1,48555.6,40000,8555.6
1,BC,2,46335.6,40500,5835.6
2,BC,3,47875.2,41000,6875.2
3,BC,4,38233.2,42500,-4266.8
4,BC,5,43833.6,43000,833.6
5,BC,6,46538.4,43500,3038.4
6,BC,7,40258.8,46000,-5741.2
7,BC,8,41468.4,46500,-5031.6
8,BC,9,42054.0,47000,-4946.0
9,BC,10,43689.6,48000,-4310.4


## Output
48 rows (49 including headers)<br><br>
**5 data fields:**
- Difference to Target
- Date
- Price
- Class
- Target

In [13]:
# Output the data
output = sales_targets.rename_axis(columns=None)
output

Unnamed: 0,Class,Month,Sales,Target,Difference to Target
0,BC,1,48555.6,40000,8555.6
1,BC,2,46335.6,40500,5835.6
2,BC,3,47875.2,41000,6875.2
3,BC,4,38233.2,42500,-4266.8
4,BC,5,43833.6,43000,833.6
5,BC,6,46538.4,43500,3038.4
6,BC,7,40258.8,46000,-5741.2
7,BC,8,41468.4,46500,-5031.6
8,BC,9,42054.0,47000,-4946.0
9,BC,10,43689.6,48000,-4310.4


In [14]:
# Generating csv output file
output.to_csv("output-202403.csv", index=False)