In [1]:
def extract_hour(hour):
    
    if 6 <= hour < 12:
        return "Morning"
    elif 12 <= hour < 18:
        return "Afternoon"
    elif 18 <= hour < 22:
        return "Evening"
    else:
        return "Night"
    
def extract_month(date):

    month = int(date.split('-')[1])
    day = int(date.split('-')[0])
    datetime = f"2024/{month}/{day}"

    datetime = pd.to_datetime(datetime, format='%Y/%m/%d')
    snapshot_date = "2024/07/15"
    snapshot_date = pd.to_datetime(snapshot_date, format='%Y/%m/%d')
    days_left = (datetime - snapshot_date).days
    return days_left

def extract_duration(depart_hour, depart_min, arrive_hour, arrive_min):
    depart = int(depart_hour) * 60 + int(depart_min)
    arrive = int(arrive_hour) * 60 + int(arrive_min)
    duration = arrive - depart
    if duration < 0:
        duration += 1440
    duration = float(duration / 60)

    # Round to 2 decimal places
    duration = round(duration, 2)
    return duration

In [2]:
import warnings
import pandas as pd

def preprocess_input(features):
    warnings.filterwarnings("ignore")
    X = pd.DataFrame([features])
    days_left = [extract_month(date) for date in X["depart_date"]]
    X["days_left"] = days_left
    X["duration"] = extract_duration(X["depart_hour"], X["depart_min"], X["arrival_hour"], X["arrival_min"])
    X["depart_hour"] = X["depart_hour"].apply(extract_hour)
    X["arrival_hour"] = X["arrival_hour"].apply(extract_hour)

    # Drop the original columns
    X = X.drop(columns=["depart_date", "depart_min", "arrival_min"])

    # Rename depart_hour and arrival_hour into depart_time and arrival_time
    X = X.rename(columns={"depart_hour": "depart_time", "arrival_hour": "arrival_time"})

    # One hot encode based on the feature columns
    feature = pd.read_csv('features.csv')
    columns = feature.columns
    print(len(columns))

    X = pd.get_dummies(X)
    X = X.reindex(columns=columns, fill_value=False)
    print(X)
    return X

In [3]:
features = {
    'airline': 'VietjetAir', # {'VietnamAirlines', 'BambooAirways', 'VietjetAir'}
    'src': "SGN", # {'SGN', 'HAN', 'DAD', 'CXR', 'PQC'}
    'dest': "CXR", # {'SGN', 'HAN', 'DAD', 'CXR', 'PQC'}
    'fare_class': "Economy", # {'Economy', 'Business'}
    'depart_date': "01-08", # 'dd-mm'
    'depart_hour': 6, # 0-23
    'depart_min': 30, # 0-59
    'arrival_hour': 8, # 0-23
    'arrival_min': 25, # 0-59
    "cabin_luggage": 0, # int
    "hand_luggage": 7 # int
}

In [7]:
import joblib

flight_price_predictor = joblib.load('flight_price_predictor.pkl')    

X = preprocess_input(features)
fare_pred = flight_price_predictor.predict(X)[0]
fare_pred = "{:0,.0f}đ".format(fare_pred)
fare_pred

27
   cabin_luggage  hand_luggage  duration  days_left  airline_BambooAirways  \
0              0             7      1.92         17                  False   

   airline_VietJetAir  airline_VietnamAirlines  fare_class_Business  \
0               False                    False                False   

   fare_class_Economy  depart_time_Afternoon  ...  src_CXR  src_DAD  src_HAN  \
0                True                  False  ...    False    False    False   

   src_PQC  src_SGN  dest_CXR  dest_DAD  dest_HAN  dest_PQC  dest_SGN  
0    False     True      True     False     False     False     False  

[1 rows x 27 columns]


'2,219,919đ'