In [1]:
import pandas as pd
import numpy as np
import datetime

In [46]:
pd.options.display.max_rows = 60
pd.options.display.max_columns = 60

In [3]:
h1_res = pd.read_pickle("pickle/h1_res.pick")
h2_res = pd.read_pickle("pickle/h2_res.pick")

In [4]:
h1_res.head(2)

Unnamed: 0,ResNum,IsCanceled,LeadTime,ArrivalDateYear,ArrivalDateMonth,ArrivalDateWeekNumber,ArrivalDateDayOfMonth,StaysInWeekendNights,StaysInWeekNights,Adults,Children,Babies,Meal,Country,MarketSegment,DistributionChannel,IsRepeatedGuest,PreviousCancellations,PreviousBookingsNotCanceled,ReservedRoomType,AssignedRoomType,BookingChanges,DepositType,Agent,Company,DaysInWaitingList,CustomerType,ADR,RequiredCarParkingSpaces,TotalOfSpecialRequests,ReservationStatus,ReservationStatusDate,ArrivalDate,LOS
0,0,0,342,2015,July,27,1,0,0,2,0,0,BB,PRT,Direct,Direct,0,0,0,C,C,3,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01,2015-07-01,0
1,1,0,737,2015,July,27,1,0,0,2,0,0,BB,PRT,Direct,Direct,0,0,0,C,C,4,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01,2015-07-01,0


In [5]:
h1_res.ReservationStatus.value_counts()

Check-Out    28938
Canceled     10831
No-Show        291
Name: ReservationStatus, dtype: int64

In [6]:
len(h1_res.Company.unique())

236

## One-Hot-Encoding Categorical Variables

We will encode the following variables:
* `CustomerType`
* `ReservationStatus`
* `MarketSegment`
* `DistributionChannel`
* `DepositType`
* `Agent` (will be boolean True/False for whether or not there was a travel agent)
* `Company` (will be boolean True/False for whether or not there was a Company noted on reservaton)

In [7]:
# one-hot-encode CustomerType
h1_res[['is_grp', 'is_trn', 'is_trnP']] = pd.get_dummies(h1_res.CustomerType, drop_first=True)

In [8]:
# one-hot-encode ResStatus (IsCanceled already included, so only keeping no-show (checkout can be inferred))
h1_res[['CheckOut', 'No-Show']] = pd.get_dummies(h1_res.ReservationStatus, drop_first=True)
h1_res.drop(columns=['CheckOut'], inplace=True)
h1_res.head(2)

Unnamed: 0,ResNum,IsCanceled,LeadTime,ArrivalDateYear,ArrivalDateMonth,ArrivalDateWeekNumber,ArrivalDateDayOfMonth,StaysInWeekendNights,StaysInWeekNights,Adults,Children,Babies,Meal,Country,MarketSegment,DistributionChannel,IsRepeatedGuest,PreviousCancellations,PreviousBookingsNotCanceled,ReservedRoomType,AssignedRoomType,BookingChanges,DepositType,Agent,Company,DaysInWaitingList,CustomerType,ADR,RequiredCarParkingSpaces,TotalOfSpecialRequests,ReservationStatus,ReservationStatusDate,ArrivalDate,LOS,is_grp,is_trn,is_trnP,No-Show
0,0,0,342,2015,July,27,1,0,0,2,0,0,BB,PRT,Direct,Direct,0,0,0,C,C,3,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01,2015-07-01,0,0,1,0,0
1,1,0,737,2015,July,27,1,0,0,2,0,0,BB,PRT,Direct,Direct,0,0,0,C,C,4,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01,2015-07-01,0,0,1,0,0


In [9]:
# ohe MarketSegment
h1_res[["MS_Corporate", "MS_Direct", "MS_Group", "MS_OfflineTA", "MS_OnlineTA"]] = pd.get_dummies(h1_res.MarketSegment, drop_first=True)
h1_res.head(2)

Unnamed: 0,ResNum,IsCanceled,LeadTime,ArrivalDateYear,ArrivalDateMonth,ArrivalDateWeekNumber,ArrivalDateDayOfMonth,StaysInWeekendNights,StaysInWeekNights,Adults,Children,Babies,Meal,Country,MarketSegment,DistributionChannel,IsRepeatedGuest,PreviousCancellations,PreviousBookingsNotCanceled,ReservedRoomType,AssignedRoomType,BookingChanges,DepositType,Agent,Company,DaysInWaitingList,CustomerType,ADR,RequiredCarParkingSpaces,TotalOfSpecialRequests,ReservationStatus,ReservationStatusDate,ArrivalDate,LOS,is_grp,is_trn,is_trnP,No-Show,MS_Corporate,MS_Direct,MS_Group,MS_OfflineTA,MS_OnlineTA
0,0,0,342,2015,July,27,1,0,0,2,0,0,BB,PRT,Direct,Direct,0,0,0,C,C,3,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01,2015-07-01,0,0,1,0,0,0,1,0,0,0
1,1,0,737,2015,July,27,1,0,0,2,0,0,BB,PRT,Direct,Direct,0,0,0,C,C,4,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01,2015-07-01,0,0,1,0,0,0,1,0,0,0


In [20]:
h1_res.MarketSegment.value_counts()

Online TA        17729
Offline TA/TO     7472
Direct            6513
Groups            5836
Corporate         2309
Complementary      201
Name: MarketSegment, dtype: int64

In [21]:
h2_res.MarketSegment.value_counts()

Online TA        38748
Offline TA/TO    16747
Groups           13975
Direct            6093
Corporate         2986
Complementary      542
Aviation           237
Undefined            2
Name: MarketSegment, dtype: int64

In [50]:
list(pd.get_dummies(h2_res.MarketSegment, drop_first=True).columns)

['Complementary',
 'Corporate',
 'Direct',
 'Groups',
 'Offline TA/TO',
 'Online TA',
 'Undefined']

In [30]:
mkt_seg_cols = list(pd.get_dummies(h2_res.MarketSegment, drop_first=True).columns)
h2_res[mkt_seg_cols] = pd.get_dummies(h2_res.MarketSegment, drop_first=True)

mkt_seg_cols = list(pd.get_dummies(h1_res.MarketSegment, drop_first=True).columns)
h1_res[mkt_seg_cols] = pd.get_dummies(h1_res.MarketSegment, drop_first=True)

In [32]:
pd.get_dummies(h2_res.DistributionChannel, drop_first=True).drop(columns='Undefined')

Unnamed: 0,Direct,GDS,TA/TO
0,0,0,1
1,0,0,1
2,0,0,1
3,0,0,1
4,0,0,1
...,...,...,...
79325,0,0,1
79326,0,0,1
79327,0,0,1
79328,0,0,1


In [34]:
# ohe DistributionChannel
# h1_res[["DC_Direct", "TA_TO"]] = pd.get_dummies(h1_res.DistributionChannel, drop_first=True).drop(columns='Undefined')

dist_channel_cols = list(pd.get_dummies(h1_res.DistributionChannel, drop_first=True).columns)
h1_res[dist_channel_cols] = pd.get_dummies(h1_res.DistributionChannel, drop_first=True)

In [11]:
h1_res[["DT_NonRefundable", "DT_Refundable"]] = pd.get_dummies(h1_res.DepositType, drop_first=True)

In [37]:
h1_res.ReservationStatus.value_counts()

Check-Out    28938
Canceled     10831
No-Show        291
Name: ReservationStatus, dtype: int64

In [38]:
h2_res.ReservationStatus.value_counts()

Check-Out    46228
Canceled     32186
No-Show        916
Name: ReservationStatus, dtype: int64

In [13]:
# convert these columns to simply exists/does not exist
h1_res["AgencyBooking"] = ~h1_res["Agent"].isnull()
h1_res["CompanyListed"] = ~h1_res["Company"].isnull()
h1_res

Unnamed: 0,ResNum,IsCanceled,LeadTime,ArrivalDateYear,ArrivalDateMonth,ArrivalDateWeekNumber,ArrivalDateDayOfMonth,StaysInWeekendNights,StaysInWeekNights,Adults,Children,Babies,Meal,Country,MarketSegment,DistributionChannel,IsRepeatedGuest,PreviousCancellations,PreviousBookingsNotCanceled,ReservedRoomType,AssignedRoomType,BookingChanges,DepositType,Agent,Company,DaysInWaitingList,CustomerType,ADR,RequiredCarParkingSpaces,TotalOfSpecialRequests,ReservationStatus,ReservationStatusDate,ArrivalDate,LOS,is_grp,is_trn,is_trnP,No-Show,MS_Corporate,MS_Direct,MS_Group,MS_OfflineTA,MS_OnlineTA,DC_Direct,TA_TO,DT_NonRefundable,DT_Refundable,AgencyBooking,CompanyListed
0,0,0,342,2015,July,27,1,0,0,2,0,0,BB,PRT,Direct,Direct,0,0,0,C,C,3,No Deposit,,,0,Transient,0.00,0,0,Check-Out,2015-07-01,2015-07-01,0,0,1,0,0,0,1,0,0,0,1,0,0,0,True,True
1,1,0,737,2015,July,27,1,0,0,2,0,0,BB,PRT,Direct,Direct,0,0,0,C,C,4,No Deposit,,,0,Transient,0.00,0,0,Check-Out,2015-07-01,2015-07-01,0,0,1,0,0,0,1,0,0,0,1,0,0,0,True,True
2,2,0,7,2015,July,27,1,0,1,1,0,0,BB,GBR,Direct,Direct,0,0,0,A,C,0,No Deposit,,,0,Transient,75.00,0,0,Check-Out,2015-07-02,2015-07-01,1,0,1,0,0,0,1,0,0,0,1,0,0,0,True,True
3,3,0,13,2015,July,27,1,0,1,1,0,0,BB,GBR,Corporate,Corporate,0,0,0,A,A,0,No Deposit,304,,0,Transient,75.00,0,0,Check-Out,2015-07-02,2015-07-01,1,0,1,0,0,1,0,0,0,0,0,0,0,0,True,True
4,4,0,14,2015,July,27,1,0,2,2,0,0,BB,GBR,Online TA,TA/TO,0,0,0,A,A,0,No Deposit,240,,0,Transient,98.00,0,1,Check-Out,2015-07-03,2015-07-01,2,0,1,0,0,0,0,0,0,1,0,1,0,0,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40055,40055,0,212,2017,August,35,31,2,8,2,1,0,BB,GBR,Offline TA/TO,TA/TO,0,0,0,A,A,1,No Deposit,143,,0,Transient,89.75,0,0,Check-Out,2017-09-10,2017-08-31,10,0,1,0,0,0,0,0,1,0,0,1,0,0,True,True
40056,40056,0,169,2017,August,35,30,2,9,2,0,0,BB,IRL,Direct,Direct,0,0,0,E,E,0,No Deposit,250,,0,Transient-Party,202.27,0,1,Check-Out,2017-09-10,2017-08-30,11,0,0,1,0,0,1,0,0,0,1,0,0,0,True,True
40057,40057,0,204,2017,August,35,29,4,10,2,0,0,BB,IRL,Direct,Direct,0,0,0,E,E,0,No Deposit,250,,0,Transient,153.57,0,3,Check-Out,2017-09-12,2017-08-29,14,0,1,0,0,0,1,0,0,0,1,0,0,0,True,True
40058,40058,0,211,2017,August,35,31,4,10,2,0,0,HB,GBR,Offline TA/TO,TA/TO,0,0,0,D,D,0,No Deposit,40,,0,Contract,112.80,0,1,Check-Out,2017-09-14,2017-08-31,14,0,0,0,0,0,0,0,1,0,0,1,0,0,True,True


In [14]:
h1_res.Agent.value_counts()

        240    13905
       NULL     8209
        250     2869
        241     1721
         40     1002
               ...  
        367        1
        331        1
        114        1
        406        1
        167        1
Name: Agent, Length: 186, dtype: int64

In [15]:
h1_res[h1_res.Agent != np.NaN]

Unnamed: 0,ResNum,IsCanceled,LeadTime,ArrivalDateYear,ArrivalDateMonth,ArrivalDateWeekNumber,ArrivalDateDayOfMonth,StaysInWeekendNights,StaysInWeekNights,Adults,Children,Babies,Meal,Country,MarketSegment,DistributionChannel,IsRepeatedGuest,PreviousCancellations,PreviousBookingsNotCanceled,ReservedRoomType,AssignedRoomType,BookingChanges,DepositType,Agent,Company,DaysInWaitingList,CustomerType,ADR,RequiredCarParkingSpaces,TotalOfSpecialRequests,ReservationStatus,ReservationStatusDate,ArrivalDate,LOS,is_grp,is_trn,is_trnP,No-Show,MS_Corporate,MS_Direct,MS_Group,MS_OfflineTA,MS_OnlineTA,DC_Direct,TA_TO,DT_NonRefundable,DT_Refundable,AgencyBooking,CompanyListed
0,0,0,342,2015,July,27,1,0,0,2,0,0,BB,PRT,Direct,Direct,0,0,0,C,C,3,No Deposit,,,0,Transient,0.00,0,0,Check-Out,2015-07-01,2015-07-01,0,0,1,0,0,0,1,0,0,0,1,0,0,0,True,True
1,1,0,737,2015,July,27,1,0,0,2,0,0,BB,PRT,Direct,Direct,0,0,0,C,C,4,No Deposit,,,0,Transient,0.00,0,0,Check-Out,2015-07-01,2015-07-01,0,0,1,0,0,0,1,0,0,0,1,0,0,0,True,True
2,2,0,7,2015,July,27,1,0,1,1,0,0,BB,GBR,Direct,Direct,0,0,0,A,C,0,No Deposit,,,0,Transient,75.00,0,0,Check-Out,2015-07-02,2015-07-01,1,0,1,0,0,0,1,0,0,0,1,0,0,0,True,True
3,3,0,13,2015,July,27,1,0,1,1,0,0,BB,GBR,Corporate,Corporate,0,0,0,A,A,0,No Deposit,304,,0,Transient,75.00,0,0,Check-Out,2015-07-02,2015-07-01,1,0,1,0,0,1,0,0,0,0,0,0,0,0,True,True
4,4,0,14,2015,July,27,1,0,2,2,0,0,BB,GBR,Online TA,TA/TO,0,0,0,A,A,0,No Deposit,240,,0,Transient,98.00,0,1,Check-Out,2015-07-03,2015-07-01,2,0,1,0,0,0,0,0,0,1,0,1,0,0,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40055,40055,0,212,2017,August,35,31,2,8,2,1,0,BB,GBR,Offline TA/TO,TA/TO,0,0,0,A,A,1,No Deposit,143,,0,Transient,89.75,0,0,Check-Out,2017-09-10,2017-08-31,10,0,1,0,0,0,0,0,1,0,0,1,0,0,True,True
40056,40056,0,169,2017,August,35,30,2,9,2,0,0,BB,IRL,Direct,Direct,0,0,0,E,E,0,No Deposit,250,,0,Transient-Party,202.27,0,1,Check-Out,2017-09-10,2017-08-30,11,0,0,1,0,0,1,0,0,0,1,0,0,0,True,True
40057,40057,0,204,2017,August,35,29,4,10,2,0,0,BB,IRL,Direct,Direct,0,0,0,E,E,0,No Deposit,250,,0,Transient,153.57,0,3,Check-Out,2017-09-12,2017-08-29,14,0,1,0,0,0,1,0,0,0,1,0,0,0,True,True
40058,40058,0,211,2017,August,35,31,4,10,2,0,0,HB,GBR,Offline TA/TO,TA/TO,0,0,0,D,D,0,No Deposit,40,,0,Contract,112.80,0,1,Check-Out,2017-09-14,2017-08-31,14,0,0,0,0,0,0,0,1,0,0,1,0,0,True,True


In [16]:
dates = list(h1_res.ArrivalDate)
dows = [datetime.date.strftime(date, "%a") for date in dates]
h1_res.insert(37, "ArrivalDOW", dows)

In [17]:
h1_res

Unnamed: 0,ResNum,IsCanceled,LeadTime,ArrivalDateYear,ArrivalDateMonth,ArrivalDateWeekNumber,ArrivalDateDayOfMonth,StaysInWeekendNights,StaysInWeekNights,Adults,Children,Babies,Meal,Country,MarketSegment,DistributionChannel,IsRepeatedGuest,PreviousCancellations,PreviousBookingsNotCanceled,ReservedRoomType,AssignedRoomType,BookingChanges,DepositType,Agent,Company,DaysInWaitingList,CustomerType,ADR,RequiredCarParkingSpaces,TotalOfSpecialRequests,ReservationStatus,ReservationStatusDate,ArrivalDate,LOS,is_grp,is_trn,is_trnP,ArrivalDOW,No-Show,MS_Corporate,MS_Direct,MS_Group,MS_OfflineTA,MS_OnlineTA,DC_Direct,TA_TO,DT_NonRefundable,DT_Refundable,AgencyBooking,CompanyListed
0,0,0,342,2015,July,27,1,0,0,2,0,0,BB,PRT,Direct,Direct,0,0,0,C,C,3,No Deposit,,,0,Transient,0.00,0,0,Check-Out,2015-07-01,2015-07-01,0,0,1,0,Wed,0,0,1,0,0,0,1,0,0,0,True,True
1,1,0,737,2015,July,27,1,0,0,2,0,0,BB,PRT,Direct,Direct,0,0,0,C,C,4,No Deposit,,,0,Transient,0.00,0,0,Check-Out,2015-07-01,2015-07-01,0,0,1,0,Wed,0,0,1,0,0,0,1,0,0,0,True,True
2,2,0,7,2015,July,27,1,0,1,1,0,0,BB,GBR,Direct,Direct,0,0,0,A,C,0,No Deposit,,,0,Transient,75.00,0,0,Check-Out,2015-07-02,2015-07-01,1,0,1,0,Wed,0,0,1,0,0,0,1,0,0,0,True,True
3,3,0,13,2015,July,27,1,0,1,1,0,0,BB,GBR,Corporate,Corporate,0,0,0,A,A,0,No Deposit,304,,0,Transient,75.00,0,0,Check-Out,2015-07-02,2015-07-01,1,0,1,0,Wed,0,1,0,0,0,0,0,0,0,0,True,True
4,4,0,14,2015,July,27,1,0,2,2,0,0,BB,GBR,Online TA,TA/TO,0,0,0,A,A,0,No Deposit,240,,0,Transient,98.00,0,1,Check-Out,2015-07-03,2015-07-01,2,0,1,0,Wed,0,0,0,0,0,1,0,1,0,0,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40055,40055,0,212,2017,August,35,31,2,8,2,1,0,BB,GBR,Offline TA/TO,TA/TO,0,0,0,A,A,1,No Deposit,143,,0,Transient,89.75,0,0,Check-Out,2017-09-10,2017-08-31,10,0,1,0,Thu,0,0,0,0,1,0,0,1,0,0,True,True
40056,40056,0,169,2017,August,35,30,2,9,2,0,0,BB,IRL,Direct,Direct,0,0,0,E,E,0,No Deposit,250,,0,Transient-Party,202.27,0,1,Check-Out,2017-09-10,2017-08-30,11,0,0,1,Wed,0,0,1,0,0,0,1,0,0,0,True,True
40057,40057,0,204,2017,August,35,29,4,10,2,0,0,BB,IRL,Direct,Direct,0,0,0,E,E,0,No Deposit,250,,0,Transient,153.57,0,3,Check-Out,2017-09-12,2017-08-29,14,0,1,0,Tue,0,0,1,0,0,0,1,0,0,0,True,True
40058,40058,0,211,2017,August,35,31,4,10,2,0,0,HB,GBR,Offline TA/TO,TA/TO,0,0,0,D,D,0,No Deposit,40,,0,Contract,112.80,0,1,Check-Out,2017-09-14,2017-08-31,14,0,0,0,Thu,0,0,0,0,1,0,0,1,0,0,True,True


In [18]:
h1_res.columns

Index(['ResNum', 'IsCanceled', 'LeadTime', 'ArrivalDateYear',
       'ArrivalDateMonth', 'ArrivalDateWeekNumber', 'ArrivalDateDayOfMonth',
       'StaysInWeekendNights', 'StaysInWeekNights', 'Adults', 'Children',
       'Babies', 'Meal', 'Country', 'MarketSegment', 'DistributionChannel',
       'IsRepeatedGuest', 'PreviousCancellations',
       'PreviousBookingsNotCanceled', 'ReservedRoomType', 'AssignedRoomType',
       'BookingChanges', 'DepositType', 'Agent', 'Company',
       'DaysInWaitingList', 'CustomerType', 'ADR', 'RequiredCarParkingSpaces',
       'TotalOfSpecialRequests', 'ReservationStatus', 'ReservationStatusDate',
       'ArrivalDate', 'LOS', 'is_grp', 'is_trn', 'is_trnP', 'ArrivalDOW',
       'No-Show', 'MS_Corporate', 'MS_Direct', 'MS_Group', 'MS_OfflineTA',
       'MS_OnlineTA', 'DC_Direct', 'TA_TO', 'DT_NonRefundable',
       'DT_Refundable', 'AgencyBooking', 'CompanyListed'],
      dtype='object')

## Validating above results

In [40]:
h1_res = pd.read_pickle("pickle/h2_res.pick")
h1_res

Unnamed: 0,ResNum,IsCanceled,LeadTime,ArrivalDateYear,ArrivalDateMonth,ArrivalDateWeekNumber,ArrivalDateDayOfMonth,StaysInWeekendNights,StaysInWeekNights,Adults,Children,Babies,Meal,Country,MarketSegment,DistributionChannel,IsRepeatedGuest,PreviousCancellations,PreviousBookingsNotCanceled,ReservedRoomType,AssignedRoomType,BookingChanges,DepositType,Agent,Company,...,CustomerType,ADR,RequiredCarParkingSpaces,TotalOfSpecialRequests,ReservationStatus,ReservationStatusDate,ArrivalDate,LOS,is_grp,is_trn,is_trnP,No-Show,Complementary,Corporate,Direct,Groups,Offline TA/TO,Online TA,Undefined,GDS,TA/TO,DT_NonRefundable,DT_Refundable,AgencyBooking,CompanyListed
0,0,0,6,2015,July,27,1,0,2,1,0.0,0,HB,PRT,Offline TA/TO,TA/TO,0,0,0,A,A,0,No Deposit,6,,...,Transient,0.00,0,0,Check-Out,2015-07-03,2015-07-01,2,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,True,False
1,1,1,88,2015,July,27,1,0,4,2,0.0,0,BB,PRT,Online TA,TA/TO,0,0,0,A,A,0,No Deposit,9,,...,Transient,76.50,0,1,Canceled,2015-07-01,2015-07-01,4,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,True,False
2,2,1,65,2015,July,27,1,0,4,1,0.0,0,BB,PRT,Online TA,TA/TO,0,0,0,A,A,0,No Deposit,9,,...,Transient,68.00,0,1,Canceled,2015-04-30,2015-07-01,4,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,True,False
3,3,1,92,2015,July,27,1,2,4,2,0.0,0,BB,PRT,Online TA,TA/TO,0,0,0,A,A,0,No Deposit,9,,...,Transient,76.50,0,2,Canceled,2015-06-23,2015-07-01,6,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,True,False
4,4,1,100,2015,July,27,2,0,2,2,0.0,0,BB,PRT,Online TA,TA/TO,0,0,0,A,A,0,No Deposit,9,,...,Transient,76.50,0,1,Canceled,2015-04-02,2015-07-02,2,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79325,79325,0,23,2017,August,35,30,2,5,2,0.0,0,BB,BEL,Offline TA/TO,TA/TO,0,0,0,A,A,0,No Deposit,394,,...,Transient,96.14,0,0,Check-Out,2017-09-06,2017-08-30,7,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,True,False
79326,79326,0,102,2017,August,35,31,2,5,3,0.0,0,BB,FRA,Online TA,TA/TO,0,0,0,E,E,0,No Deposit,9,,...,Transient,225.43,0,2,Check-Out,2017-09-07,2017-08-31,7,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,True,False
79327,79327,0,34,2017,August,35,31,2,5,2,0.0,0,BB,DEU,Online TA,TA/TO,0,0,0,D,D,0,No Deposit,9,,...,Transient,157.71,0,4,Check-Out,2017-09-07,2017-08-31,7,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,True,False
79328,79328,0,109,2017,August,35,31,2,5,2,0.0,0,BB,GBR,Online TA,TA/TO,0,0,0,A,A,0,No Deposit,89,,...,Transient,104.40,0,0,Check-Out,2017-09-07,2017-08-31,7,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,True,False


In [49]:
h2_res.columns

Index(['ResNum', 'IsCanceled', 'LeadTime', 'ArrivalDateYear',
       'ArrivalDateMonth', 'ArrivalDateWeekNumber', 'ArrivalDateDayOfMonth',
       'StaysInWeekendNights', 'StaysInWeekNights', 'Adults', 'Children',
       'Babies', 'Meal', 'Country', 'MarketSegment', 'DistributionChannel',
       'IsRepeatedGuest', 'PreviousCancellations',
       'PreviousBookingsNotCanceled', 'ReservedRoomType', 'AssignedRoomType',
       'BookingChanges', 'DepositType', 'Agent', 'Company',
       'DaysInWaitingList', 'CustomerType', 'ADR', 'RequiredCarParkingSpaces',
       'TotalOfSpecialRequests', 'ReservationStatus', 'ReservationStatusDate',
       'ArrivalDate', 'LOS', 'Complementary', 'Corporate', 'Direct', 'Groups',
       'Offline TA/TO', 'Online TA', 'Undefined', 'GDS', 'TA/TO'],
      dtype='object')

h2
'Complementary',
 'Corporate',
 'Direct',
 'Groups',
 'Offline TA/TO',
 'Online TA',
 'Undefined'

In [55]:
ohe_col_names = {'Complementary': 'MS_Comp', 'Corporate': 'MS_Corp', 'Direct': 'MS_Direct', 'Groups': 'MS_Grps', 'Offline TA/TO': 'MS_Offline_TA',
       'Online TA': 'MS_Online_TA', 'Undefined': np.NaN, 'TA/TO':'DC_TA_TO'}
h1_res.rename(columns=ohe_col_names, errors='ignore')

Unnamed: 0,ResNum,IsCanceled,LeadTime,ArrivalDateYear,ArrivalDateMonth,ArrivalDateWeekNumber,ArrivalDateDayOfMonth,StaysInWeekendNights,StaysInWeekNights,Adults,Children,Babies,Meal,Country,MarketSegment,DistributionChannel,IsRepeatedGuest,PreviousCancellations,PreviousBookingsNotCanceled,ReservedRoomType,AssignedRoomType,BookingChanges,DepositType,Agent,Company,DaysInWaitingList,CustomerType,ADR,RequiredCarParkingSpaces,TotalOfSpecialRequests,ReservationStatus,ReservationStatusDate,ArrivalDate,LOS,is_grp,is_trn,is_trnP,No-Show,MS_Comp,MS_Corp,MS_Direct,MS_Grps,MS_Offline_TA,MS_Online_TA,NaN,GDS,DC_TA_TO,DT_NonRefundable,DT_Refundable,AgencyBooking,CompanyListed
0,0,0,6,2015,July,27,1,0,2,1,0.0,0,HB,PRT,Offline TA/TO,TA/TO,0,0,0,A,A,0,No Deposit,6,,0,Transient,0.00,0,0,Check-Out,2015-07-03,2015-07-01,2,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,True,False
1,1,1,88,2015,July,27,1,0,4,2,0.0,0,BB,PRT,Online TA,TA/TO,0,0,0,A,A,0,No Deposit,9,,0,Transient,76.50,0,1,Canceled,2015-07-01,2015-07-01,4,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,True,False
2,2,1,65,2015,July,27,1,0,4,1,0.0,0,BB,PRT,Online TA,TA/TO,0,0,0,A,A,0,No Deposit,9,,0,Transient,68.00,0,1,Canceled,2015-04-30,2015-07-01,4,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,True,False
3,3,1,92,2015,July,27,1,2,4,2,0.0,0,BB,PRT,Online TA,TA/TO,0,0,0,A,A,0,No Deposit,9,,0,Transient,76.50,0,2,Canceled,2015-06-23,2015-07-01,6,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,True,False
4,4,1,100,2015,July,27,2,0,2,2,0.0,0,BB,PRT,Online TA,TA/TO,0,0,0,A,A,0,No Deposit,9,,0,Transient,76.50,0,1,Canceled,2015-04-02,2015-07-02,2,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79325,79325,0,23,2017,August,35,30,2,5,2,0.0,0,BB,BEL,Offline TA/TO,TA/TO,0,0,0,A,A,0,No Deposit,394,,0,Transient,96.14,0,0,Check-Out,2017-09-06,2017-08-30,7,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,True,False
79326,79326,0,102,2017,August,35,31,2,5,3,0.0,0,BB,FRA,Online TA,TA/TO,0,0,0,E,E,0,No Deposit,9,,0,Transient,225.43,0,2,Check-Out,2017-09-07,2017-08-31,7,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,True,False
79327,79327,0,34,2017,August,35,31,2,5,2,0.0,0,BB,DEU,Online TA,TA/TO,0,0,0,D,D,0,No Deposit,9,,0,Transient,157.71,0,4,Check-Out,2017-09-07,2017-08-31,7,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,True,False
79328,79328,0,109,2017,August,35,31,2,5,2,0.0,0,BB,GBR,Online TA,TA/TO,0,0,0,A,A,0,No Deposit,89,,0,Transient,104.40,0,0,Check-Out,2017-09-07,2017-08-31,7,0,1,0,0,0,0,0,0,0,1,0,0,1,0,0,True,False
