# Data Wrangling

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime

## Data Collection

In [2]:
#The data I'll be using is stored in the hotel_bookings.csv file. I'll load it into a dataframe now

bookings = pd.read_csv('hotel_bookings.csv')

#Inspecting my dataframe
bookings.head()

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,...,deposit_type,agent,company,days_in_waiting_list,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date
0,Resort Hotel,0,342,2015,July,27,1,0,0,2,...,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01
1,Resort Hotel,0,737,2015,July,27,1,0,0,2,...,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01
2,Resort Hotel,0,7,2015,July,27,1,0,1,1,...,No Deposit,,,0,Transient,75.0,0,0,Check-Out,2015-07-02
3,Resort Hotel,0,13,2015,July,27,1,0,1,1,...,No Deposit,304.0,,0,Transient,75.0,0,0,Check-Out,2015-07-02
4,Resort Hotel,0,14,2015,July,27,1,0,2,2,...,No Deposit,240.0,,0,Transient,98.0,0,1,Check-Out,2015-07-03


In [3]:
#Taking a look at the shape of my dataframe
bookings.shape

(119390, 32)

## Data Definition
I'm going to take a closer look at my data. I'll examine the columns, data types and any missing values 

In [7]:
#Inspecting the column names.
#The column names seem to be properly formated. No spaces and the capitalization is the same for every column
bookings.columns

Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
       'arrival_date_month', 'arrival_date_week_number',
       'arrival_date_day_of_month', 'stays_in_weekend_nights',
       'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
       'country', 'market_segment', 'distribution_channel',
       'is_repeated_guest', 'previous_cancellations',
       'previous_bookings_not_canceled', 'reserved_room_type',
       'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
       'company', 'days_in_waiting_list', 'customer_type', 'adr',
       'required_car_parking_spaces', 'total_of_special_requests',
       'reservation_status', 'reservation_status_date'],
      dtype='object')

In [8]:
#Moving on to the data types of each column
bookings.dtypes

hotel                              object
is_canceled                         int64
lead_time                           int64
arrival_date_year                   int64
arrival_date_month                 object
arrival_date_week_number            int64
arrival_date_day_of_month           int64
stays_in_weekend_nights             int64
stays_in_week_nights                int64
adults                              int64
children                          float64
babies                              int64
meal                               object
country                            object
market_segment                     object
distribution_channel               object
is_repeated_guest                   int64
previous_cancellations              int64
previous_bookings_not_canceled      int64
reserved_room_type                 object
assigned_room_type                 object
booking_changes                     int64
deposit_type                       object
agent                             

In [9]:
#Taking a closer look
bookings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119390 entries, 0 to 119389
Data columns (total 32 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   hotel                           119390 non-null  object 
 1   is_canceled                     119390 non-null  int64  
 2   lead_time                       119390 non-null  int64  
 3   arrival_date_year               119390 non-null  int64  
 4   arrival_date_month              119390 non-null  object 
 5   arrival_date_week_number        119390 non-null  int64  
 6   arrival_date_day_of_month       119390 non-null  int64  
 7   stays_in_weekend_nights         119390 non-null  int64  
 8   stays_in_week_nights            119390 non-null  int64  
 9   adults                          119390 non-null  int64  
 10  children                        119386 non-null  float64
 11  babies                          119390 non-null  int64  
 12  meal            

In [10]:
#Most of the columns are complete. It looks like I'm missing values for the country, agent and company columns

In order to better understand what these columns represent I'll build a table with the column descriptions 

|Name|Description|
|----|-----------|
|hotel|Specifies if the reservation was made for the Resort or the City Hotel|
|is_canceled|Indicates if the booking was canceled (1) or not (0)|
|lead_time|Number of days between entering reservation into database and arrival date|
|arrival_date_year|Year of arrival date|
|arrival_date_month|Month of arrival date|
|arrival_date_week_number|Week number of arrival year|
|arrival_date_day_of_month|Day of arrival month|
|stays_in_weekend_nights|Number of weekend nights (Sat or Sun) included in the reservation|
|stays_in_week_nights|Number of weeknights (Mon-Fri) included in the reservation|
|adults|Number of adults in reservation|
|children|Number of children in reservation|
|babies|Number of babies in reservation|
|meal|Type of meal package included in the reservation. Undefined/SC – no meal package; BB – Bed & Breakfast; HB – Half board (breakfast and one other meal – usually dinner); FB – Full board (breakfast, lunch and dinner)|
|country|Country of origin|
|market_segment|Market segment designation. In categories, the term “TA” means “Travel Agents” and “TO” means “Tour Operators”|
|distribution_channel|Booking distribution channel. The term “TA” means “Travel Agents” and “TO” means “Tour Operators”|
|is_repeated_guest|Indicates if reservation includes a repeated guest (1) or not (0)|
|previous_cancellations|Number of previous reservations canceled by the customer|
|previous_bookings_not_canceled|Number of previous bookings not cancelled by the customer|
|reserved_room_type|Code of room type reserved|
|assigned_room_type|Code of room type actually assigned upon arrival|
|booking_changes|Number of changes/amendments made to the reservation from the time the reservation was created up until the check-in or cancelation|
|deposit_type|Describes whether reservation was guaranteed through a deposit. Values include: No Deposit – no deposit was made; Non Refund – a deposit was made in the value of the total stay cost; Refundable – a deposit was made with a value under the total cost of stay|
|agent |ID of travel agency that made the booking|
|company|ID of the company/entity that made the booking or responsible for paying the booking|
|days_in_waiting_list|Number of days the booking was in the waiting list before it was confirmed to the customer|
|customer_type|Type of reservation. Variables include: Contract - when the booking has an allotment or other type of contract associated to it; Group – when the booking is associated to a group; Transient – when the booking is not part of a group or contract, and is not associated to other transient booking; Transient-party – when the booking is transient, but is associated to at least other transient booking|
|adr|Average daily rate. Defined as the sum of all transactions divided by the total number of staying nights|
|required_car_parking_spaces|Number of parking spaces required on the reservation|
|total_of_special_requests|Number of special requests made by the customer (e.g. twin bed or high floor)|
|reservation_status|Final status of the reservation. Three possible categories: Canceled – booking was canceled by the customer; Check-Out – customer has checked in but already departed; No-Show – customer did not check-in and did inform the hotel of the reason why|
|reservation_status_date|Date on which the final status was set|



In [11]:
#reservation_status_date was meant to be a datetime and if fed as an object. Will be addressed below

In [12]:
#Moving on I'll look into the number of unique elements in my dataframe and the percetange of unique elements

bookings.nunique()

hotel                                2
is_canceled                          2
lead_time                          479
arrival_date_year                    3
arrival_date_month                  12
arrival_date_week_number            53
arrival_date_day_of_month           31
stays_in_weekend_nights             17
stays_in_week_nights                35
adults                              14
children                             5
babies                               5
meal                                 5
country                            177
market_segment                       8
distribution_channel                 5
is_repeated_guest                    2
previous_cancellations              15
previous_bookings_not_canceled      73
reserved_room_type                  10
assigned_room_type                  12
booking_changes                     21
deposit_type                         3
agent                              333
company                            352
days_in_waiting_list     

In [13]:
#Makes sense considering how each columns has been categorized 
bookings.nunique()/len(bookings)

hotel                             0.000017
is_canceled                       0.000017
lead_time                         0.004012
arrival_date_year                 0.000025
arrival_date_month                0.000101
arrival_date_week_number          0.000444
arrival_date_day_of_month         0.000260
stays_in_weekend_nights           0.000142
stays_in_week_nights              0.000293
adults                            0.000117
children                          0.000042
babies                            0.000042
meal                              0.000042
country                           0.001483
market_segment                    0.000067
distribution_channel              0.000042
is_repeated_guest                 0.000017
previous_cancellations            0.000126
previous_bookings_not_canceled    0.000611
reserved_room_type                0.000084
assigned_room_type                0.000101
booking_changes                   0.000176
deposit_type                      0.000025
agent      

In [14]:
#Since the data is split between the Resort Hotel and the City hotel it'd be good to put a number on it 
bookings['hotel'].value_counts()

City Hotel      79330
Resort Hotel    40060
Name: hotel, dtype: int64

In [15]:
#Looking at the min and max of each column seems useful for many of them. Make note of meal, here undefined doesn't mean null
bookings.agg(['min','max']).T

Unnamed: 0,min,max
hotel,City Hotel,Resort Hotel
is_canceled,0,1
lead_time,0,737
arrival_date_year,2015,2017
arrival_date_month,April,September
arrival_date_week_number,1,53
arrival_date_day_of_month,1,31
stays_in_weekend_nights,0,19
stays_in_week_nights,0,50
adults,0,55


In [16]:
#Taking a closer look at the numeric columns 
#As seen above the min and the max of adr stand out
bookings.describe()

Unnamed: 0,is_canceled,lead_time,arrival_date_year,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,children,babies,is_repeated_guest,previous_cancellations,previous_bookings_not_canceled,booking_changes,agent,company,days_in_waiting_list,adr,required_car_parking_spaces,total_of_special_requests
count,119390.0,119390.0,119390.0,119390.0,119390.0,119390.0,119390.0,119390.0,119386.0,119390.0,119390.0,119390.0,119390.0,119390.0,103050.0,6797.0,119390.0,119390.0,119390.0,119390.0
mean,0.370416,104.011416,2016.156554,27.165173,15.798241,0.927599,2.500302,1.856403,0.10389,0.007949,0.031912,0.087118,0.137097,0.221124,86.693382,189.266735,2.321149,101.831122,0.062518,0.571363
std,0.482918,106.863097,0.707476,13.605138,8.780829,0.998613,1.908286,0.579261,0.398561,0.097436,0.175767,0.844336,1.497437,0.652306,110.774548,131.655015,17.594721,50.53579,0.245291,0.792798
min,0.0,0.0,2015.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,6.0,0.0,-6.38,0.0,0.0
25%,0.0,18.0,2016.0,16.0,8.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,62.0,0.0,69.29,0.0,0.0
50%,0.0,69.0,2016.0,28.0,16.0,1.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,14.0,179.0,0.0,94.575,0.0,0.0
75%,1.0,160.0,2017.0,38.0,23.0,2.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,229.0,270.0,0.0,126.0,0.0,1.0
max,1.0,737.0,2017.0,53.0,31.0,19.0,50.0,55.0,10.0,10.0,1.0,26.0,72.0,21.0,535.0,543.0,391.0,5400.0,8.0,5.0


## Data Cleaning

Now I'll handle missing data and duplicates

In [17]:
#First I want to take care of the reservation_status_date
bookings['reservation_status_date'] = pd.to_datetime(bookings['reservation_status_date'])

In [18]:
#Now I'm going to look into which columns need attention due to missing values
bookings.isna().sum()

hotel                                  0
is_canceled                            0
lead_time                              0
arrival_date_year                      0
arrival_date_month                     0
arrival_date_week_number               0
arrival_date_day_of_month              0
stays_in_weekend_nights                0
stays_in_week_nights                   0
adults                                 0
children                               4
babies                                 0
meal                                   0
country                              488
market_segment                         0
distribution_channel                   0
is_repeated_guest                      0
previous_cancellations                 0
previous_bookings_not_canceled         0
reserved_room_type                     0
assigned_room_type                     0
booking_changes                        0
deposit_type                           0
agent                              16340
company         

In [19]:
bookings.isna().sum()/len(bookings)

hotel                             0.000000
is_canceled                       0.000000
lead_time                         0.000000
arrival_date_year                 0.000000
arrival_date_month                0.000000
arrival_date_week_number          0.000000
arrival_date_day_of_month         0.000000
stays_in_weekend_nights           0.000000
stays_in_week_nights              0.000000
adults                            0.000000
children                          0.000034
babies                            0.000000
meal                              0.000000
country                           0.004087
market_segment                    0.000000
distribution_channel              0.000000
is_repeated_guest                 0.000000
previous_cancellations            0.000000
previous_bookings_not_canceled    0.000000
reserved_room_type                0.000000
assigned_room_type                0.000000
booking_changes                   0.000000
deposit_type                      0.000000
agent      

In [20]:
#Alright, looks like children and country won't be that big of an issue. I'll have to take a closer look at 
#agent and company 

In [21]:
bookings['children'].isna().sum()

4

In [22]:
#Since there's only four missing values for children it'd be safe to assume there were no children at all for 
#those reservations. Replacing NaN's with zero
bookings['children'].fillna(0, inplace=True)

In [23]:
#The country of origin might not always be disclosed at the time the reservation was made. It's possible the countries
#are confirmed after the guest checks-in. Replacing NaN's with 'Undisclosed'
bookings['country'].fillna('Undisclosed', inplace=True)

In [28]:
#Before I make changes to the agent and the company columns I want to look at the 
#market_segment and the distribution_chanel 

print(bookings['market_segment'].unique())

bookings['market_segment'].value_counts()/len(bookings)

['Direct' 'Corporate' 'Online TA' 'Offline TA/TO' 'Complementary' 'Groups'
 'Undefined' 'Aviation']


Online TA        0.473046
Offline TA/TO    0.202856
Groups           0.165935
Direct           0.105587
Corporate        0.044350
Complementary    0.006223
Aviation         0.001985
Undefined        0.000017
Name: market_segment, dtype: float64

In [30]:
bookings['distribution_channel'].unique()

bookings['distribution_channel'].value_counts()/len(bookings)

TA/TO        0.819750
Direct       0.122665
Corporate    0.055926
GDS          0.001617
Undefined    0.000042
Name: distribution_channel, dtype: float64

In [31]:
#The agent column holds the ID number of the travel agency used to book the reservation. It's possible that some of the 
#reservations that were made directly through the hotels were left without a value for this column. Going to use 0 here 
#and consider those reservations to also be direct bookings going forward
bookings['agent'].fillna(0, inplace=True)

In [32]:
#The company column has specifc id's for companies that make reservations through corporate accounts.
#Considering 94% of the column is missing it's possible these corporate bookings aren't common. 
#The market_segment and distribution_channel columns show that corporate bookings only represent close to 5% of all the 
#reservations, which explains why the company column is mostly empty.Using a single value to fill 94% of the column would be 
#problematic. Seeing how these corporate bookings are rare events it would be safer to #drop the entire column at this point.
bookings.drop(columns='company', inplace=True)

In [33]:
#Taking a look to make sure I got rid of all the nulls
bookings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119390 entries, 0 to 119389
Data columns (total 31 columns):
 #   Column                          Non-Null Count   Dtype         
---  ------                          --------------   -----         
 0   hotel                           119390 non-null  object        
 1   is_canceled                     119390 non-null  int64         
 2   lead_time                       119390 non-null  int64         
 3   arrival_date_year               119390 non-null  int64         
 4   arrival_date_month              119390 non-null  object        
 5   arrival_date_week_number        119390 non-null  int64         
 6   arrival_date_day_of_month       119390 non-null  int64         
 7   stays_in_weekend_nights         119390 non-null  int64         
 8   stays_in_week_nights            119390 non-null  int64         
 9   adults                          119390 non-null  int64         
 10  children                        119390 non-null  float64

In [34]:
#For the meal column, Undefined and SC mean the same thing. It would be good to consolidate these under a single value
bookings['meal']= bookings['meal'].str.replace('Undefined', 'SC')

bookings['meal'].unique()

array(['BB', 'FB', 'HB', 'SC'], dtype=object)

In [35]:
#Time to check for duplicate rows
duplicate_rows = bookings[bookings.duplicated()]
duplicate_rows

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,...,booking_changes,deposit_type,agent,days_in_waiting_list,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date
5,Resort Hotel,0,14,2015,July,27,1,0,2,2,...,0,No Deposit,240.0,0,Transient,98.00,0,1,Check-Out,2015-07-03
22,Resort Hotel,0,72,2015,July,27,1,2,4,2,...,1,No Deposit,250.0,0,Transient,84.67,0,1,Check-Out,2015-07-07
43,Resort Hotel,0,70,2015,July,27,2,2,3,2,...,0,No Deposit,250.0,0,Transient,137.00,0,1,Check-Out,2015-07-07
138,Resort Hotel,1,5,2015,July,28,5,1,0,2,...,0,No Deposit,240.0,0,Transient,97.00,0,0,Canceled,2015-07-01
200,Resort Hotel,0,0,2015,July,28,7,0,1,1,...,0,No Deposit,240.0,0,Transient,109.80,0,3,Check-Out,2015-07-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119349,City Hotel,0,186,2017,August,35,31,0,3,2,...,0,No Deposit,9.0,0,Transient,126.00,0,2,Check-Out,2017-09-03
119352,City Hotel,0,63,2017,August,35,31,0,3,3,...,0,No Deposit,9.0,0,Transient-Party,195.33,0,2,Check-Out,2017-09-03
119353,City Hotel,0,63,2017,August,35,31,0,3,3,...,0,No Deposit,9.0,0,Transient-Party,195.33,0,2,Check-Out,2017-09-03
119354,City Hotel,0,63,2017,August,35,31,0,3,3,...,0,No Deposit,9.0,0,Transient-Party,195.33,0,2,Check-Out,2017-09-03


In [36]:
#Trying to see if there's a pattern here. Is it possible that every time a reservation was modified it would be
#recorded again as a new reservation rather than being updated?
duplicate_rows[duplicate_rows['booking_changes']==0]

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,...,booking_changes,deposit_type,agent,days_in_waiting_list,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date
5,Resort Hotel,0,14,2015,July,27,1,0,2,2,...,0,No Deposit,240.0,0,Transient,98.00,0,1,Check-Out,2015-07-03
43,Resort Hotel,0,70,2015,July,27,2,2,3,2,...,0,No Deposit,250.0,0,Transient,137.00,0,1,Check-Out,2015-07-07
138,Resort Hotel,1,5,2015,July,28,5,1,0,2,...,0,No Deposit,240.0,0,Transient,97.00,0,0,Canceled,2015-07-01
200,Resort Hotel,0,0,2015,July,28,7,0,1,1,...,0,No Deposit,240.0,0,Transient,109.80,0,3,Check-Out,2015-07-08
219,Resort Hotel,1,1,2015,July,28,8,0,1,2,...,0,No Deposit,0.0,0,Transient,104.72,0,1,Canceled,2015-07-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119349,City Hotel,0,186,2017,August,35,31,0,3,2,...,0,No Deposit,9.0,0,Transient,126.00,0,2,Check-Out,2017-09-03
119352,City Hotel,0,63,2017,August,35,31,0,3,3,...,0,No Deposit,9.0,0,Transient-Party,195.33,0,2,Check-Out,2017-09-03
119353,City Hotel,0,63,2017,August,35,31,0,3,3,...,0,No Deposit,9.0,0,Transient-Party,195.33,0,2,Check-Out,2017-09-03
119354,City Hotel,0,63,2017,August,35,31,0,3,3,...,0,No Deposit,9.0,0,Transient-Party,195.33,0,2,Check-Out,2017-09-03


In [37]:
duplicate_rows[duplicate_rows['total_of_special_requests']==0]

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,...,booking_changes,deposit_type,agent,days_in_waiting_list,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date
138,Resort Hotel,1,5,2015,July,28,5,1,0,2,...,0,No Deposit,240.0,0,Transient,97.0,0,0,Canceled,2015-07-01
415,Resort Hotel,0,57,2015,July,29,15,0,2,2,...,0,No Deposit,305.0,0,Transient-Party,107.0,0,0,Check-Out,2015-07-17
416,Resort Hotel,0,57,2015,July,29,15,0,2,2,...,0,No Deposit,305.0,0,Transient-Party,107.0,0,0,Check-Out,2015-07-17
425,Resort Hotel,0,57,2015,July,29,15,0,2,2,...,0,No Deposit,305.0,0,Transient,107.0,0,0,Check-Out,2015-07-17
426,Resort Hotel,0,57,2015,July,29,15,0,2,2,...,0,No Deposit,305.0,0,Transient,107.0,0,0,Check-Out,2015-07-17
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119271,City Hotel,0,213,2017,August,35,28,1,3,1,...,1,No Deposit,19.0,0,Transient-Party,104.0,0,0,Check-Out,2017-09-01
119275,City Hotel,0,213,2017,August,35,28,1,3,1,...,1,No Deposit,19.0,0,Transient-Party,104.0,0,0,Check-Out,2017-09-01
119276,City Hotel,0,213,2017,August,35,28,1,3,1,...,1,No Deposit,19.0,0,Transient-Party,104.0,0,0,Check-Out,2017-09-01
119277,City Hotel,0,213,2017,August,35,28,1,3,1,...,1,No Deposit,19.0,0,Transient-Party,104.0,0,0,Check-Out,2017-09-01


In [38]:
#It doesn't look like the duplicates came from updates being recorded as a new reservations. The columns that would display 
#a change to the reservations are mostly 0 in the duplicated dataframe. 
#I'm going to get rid of the duplicates now

bookings_clean = bookings.drop_duplicates()

bookings_clean.head()

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,...,booking_changes,deposit_type,agent,days_in_waiting_list,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date
0,Resort Hotel,0,342,2015,July,27,1,0,0,2,...,3,No Deposit,0.0,0,Transient,0.0,0,0,Check-Out,2015-07-01
1,Resort Hotel,0,737,2015,July,27,1,0,0,2,...,4,No Deposit,0.0,0,Transient,0.0,0,0,Check-Out,2015-07-01
2,Resort Hotel,0,7,2015,July,27,1,0,1,1,...,0,No Deposit,0.0,0,Transient,75.0,0,0,Check-Out,2015-07-02
3,Resort Hotel,0,13,2015,July,27,1,0,1,1,...,0,No Deposit,304.0,0,Transient,75.0,0,0,Check-Out,2015-07-02
4,Resort Hotel,0,14,2015,July,27,1,0,2,2,...,0,No Deposit,240.0,0,Transient,98.0,0,1,Check-Out,2015-07-03


In [39]:
bookings_clean.shape

(87389, 31)

## Exporting data to new csv file

In [40]:
bookings_clean.to_csv('processed_hotel_bookings.csv', index=False)