In [9]:
"""
INSTRUCTIONS:
Split the Flight Details field to form:
    Date 
    Flight Number
    From
    To
    Class
    Price
Convert the following data fields to the correct data types:
    Date to a date format
    Price to a decimal value
Change the Flow Card field to Yes / No values instead of 1 / 0
Create two tables, one for Flow Card holders and one for non-Flow Card holders
"""
#from pandas.util.testing import assert_frame_equal
from pandas.testing import assert_frame_equal
import pandas as pd
import numpy as np
df = pd.read_csv('../../raw_data/2024/week_1/PD 2024 Wk 1 Input.csv')
# Split by delimiter
df[['Date', 'Flight Number', 'From-To', 'Class', 'Price']] = df['Flight Details'].str.split('//', expand=True)

# Remove 'Flight Details' column
df = df.drop('Flight Details', axis=1)

# Split 'From-To' into 'From' and 'To'
df[['From', 'To']] = df['From-To'].str.split('-', expand=True)

# Remove 'From-To' column
df = df.drop('From-To', axis=1)

# Convert 'Date' to a date format
df['Date'] = pd.to_datetime(df['Date'], format='%Y-%m-%d')
# Convert 'Date' to string matching the solution
df['Date'] = df['Date'].dt.strftime('%d/%m/%Y')
# Convert 'Price' from 'object to a float
df['Price'] = df['Price'].astype(float)


# Round to 2 decimal places for cents
df = df.round({'Price': 2})


# Change the Flow Card field to Yes / No values instead of 1 / 0
df['Flow Card?'] = df['Flow Card?'].replace({1: 'Yes', 0: 'No'})

# Sort the columns to match the solution to verify accuracy
df = df[['Date', 'Flight Number', 'From', 'To', 'Class', 'Price', 'Flow Card?',
    'Bags Checked', 'Meal Type']]

# Create two tables, one for Flow Card holders and one for non-Flow Card holders
flow_card_holders = df[df['Flow Card?'] == 'Yes']
flow_card_holders.reset_index(drop=True, inplace=True)

non_flow_card_holders = df[df['Flow Card?'] == 'No']
non_flow_card_holders.reset_index(drop=True, inplace=True)

# Compare the dataframes to the provided solution
flow_card_holders_solution = pd.read_csv('../../solutions/2024/week_1/PD 2024 Wk 1 Output Flow Card.csv')
non_flow_card_holders_solution = pd.read_csv('../../solutions/2024/week_1/PD 2024 Wk 1 Output Non-Flow Card.csv')

print(f'The flow card dataframe is identical to the provided solution: {flow_card_holders.equals(flow_card_holders_solution)}')
print(f'The non flow card dataframe is identical to the provided solution: {non_flow_card_holders.equals(non_flow_card_holders_solution)}')

# Write the two tables to separate CSV files

# flow_card_holders.to_csv('../../prepped_data/2024/week_1/flow_card_holders.csv', index=False)
# non_flow_card_holders.to_csv('../../prepped_data/2024/week_1/non_flow_card_holders.csv', index=False)

The flow card dataframe is identical to the provided solution: True
The non flow card dataframe is identical to the provided solution: True
