# A. LIBRARIES

In [1]:
import pandas as pd
import numpy as np
import pickle
import os

# B. MODEL LOAD

In [2]:
def normalize(df):
    df['lead_time'] = np.log(df['lead_time'] + 1)
    df['arrival_date_week_number'] = np.log(df['arrival_date_week_number'] + 1)
    df['arrival_date_day_of_month'] = np.log(df['arrival_date_day_of_month'] + 1)
    df['agent'] = np.log(df['agent'] + 1)
    df['adr'] = np.log(df['adr'] + 1)
    return df

In [3]:
def encode(df):
    df['hotel'] = df['hotel'].map({'Resort Hotel' : 0, 'City Hotel' : 1})
    df['meal'] = df['meal'].map({'BB' : 0, 'FB': 1, 'HB': 2, 'SC': 3, 'Undefined': 4})
    df['market_segment'] = df['market_segment'].map({'Direct': 0, 'Corporate': 1, 'Online TA': 2, 'Offline TA/TO': 3, 'Complementary': 4, 'Groups': 5, 'Undefined': 6, 'Aviation': 7})
    df['distribution_channel'] = df['distribution_channel'].map({'Direct': 0, 'Corporate': 1, 'TA/TO': 2, 'Undefined': 3, 'GDS': 4})
    df['reserved_room_type'] = df['reserved_room_type'].map({'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'J': 9, 'K': 10, 'L': 11})
    df['deposit_type'] = df['deposit_type'].map({'No Deposit': 0, 'Refundable': 1, 'Non Refund': 3})
    df['customer_type'] = df['customer_type'].map({'Transient': 0, 'Contract': 1, 'Transient-Party': 2, 'Group': 3})
    df['season'] = df['season'].map({'Spring':0, 'Summer':1, 'Fall':2, 'Winter':3})
    df['year'] = df['year'].map({2015: 0, 2014: 1, 2016: 2, 2017: 3})
    return df

In [4]:
base_path = 'deployment'

pipeline_path = os.path.join(base_path, 'pipeline_best.pkl')

with open(pipeline_path, 'rb') as file_1:
    pipeline_best = pickle.load(file_1)

# C. DATA INFERENCE

In [5]:
data_inf = {'hotel': 'Resort Hotel',
    'is_canceled': 0,
    'lead_time': 342,
    'arrival_date_year': 2015,
    'arrival_date_month': 'July',
    'arrival_date_week_number': 27,
    'arrival_date_day_of_month': 1,
    'stays_in_weekend_nights': 0,
    'stays_in_week_nights': 0,
    'adults': 2,
    'children': 0.0,
    'babies': 0,
    'meal': 'BB',
    'country': 'PRT',
    'market_segment': 'Direct',
    'distribution_channel': 'Direct',
    'is_repeated_guest': 0,
    'previous_cancellations': 0,
    'previous_bookings_not_canceled': 0,
    'reserved_room_type': 'C',
    'assigned_room_type': 'C',
    'booking_changes': 3,
    'deposit_type': 'No Deposit',
    'agent': 0.0,
    'company': 0.0,
    'days_in_waiting_list': 0,
    'customer_type': 'Transient',
    'adr': 0.0,
    'required_car_parking_spaces': 0,
    'total_of_special_requests': 0,
    'reservation_status': 'Check-Out',
    'reservation_status_date': '2015-07-01'}

data_inf = pd.DataFrame([data_inf])
data_inf

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,...,deposit_type,agent,company,days_in_waiting_list,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date
0,Resort Hotel,0,342,2015,July,27,1,0,0,2,...,No Deposit,0.0,0.0,0,Transient,0.0,0,0,Check-Out,2015-07-01


data untuk inference telah dibuat secara manual.

# D. Feature Creation

In [6]:
# Create a season column using a lambda function
data_inf["season"] = data_inf["arrival_date_month"].apply(lambda x: "Winter" if x in ["January", "February", "December"]
                                                            else "Spring" if x in ["March", "April", "May"]
                                                            else "Summer" if x in ["June", "July", "August"]
                                                            else "Fall")

data_inf['reservation_status_date'] = pd.to_datetime(data_inf['reservation_status_date'])

data_inf['year'] = data_inf['reservation_status_date'].dt.year
data_inf['month'] = data_inf['reservation_status_date'].dt.month
data_inf['day'] = data_inf['reservation_status_date'].dt.day

data_inf.drop(['reservation_status_date','arrival_date_month'] , axis = 1, inplace = True) # these column become useless

data_inf.head()

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,children,...,days_in_waiting_list,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,season,year,month,day
0,Resort Hotel,0,342,2015,27,1,0,0,2,0.0,...,0,Transient,0.0,0,0,Check-Out,Summer,2015,7,1


# E. INFERENCE

In [7]:
# Predictions on the made-up data
test_predictions = pipeline_best.predict(data_inf)

In [8]:
test_predictions

array([0], dtype=int64)

The reservation predicted to not be canceled.