## 1. Import relevant libraries

In [37]:
import pandas as pd
import numpy as np
import os
import random

## 2. Open csv files

In [38]:
df_guest = pd.read_csv("guests.csv")
df_hotel = pd.read_csv("hotels.csv")
df_pref = pd.read_csv("preferences.csv")

#### Reset hotel and guest index 



In [39]:
df_pref['guest_index'] = df_pref.guest.str.split("_").apply(lambda x: int(x[1])-1)
df_pref['hotel_index'] = df_pref.hotel.str.split("_").apply(lambda x: int(x[1])-1)

In [40]:
df_hotel['hotel_index'] = df_hotel.hotel.str.split("_").apply(lambda x: int(x[1])-1)

In [41]:
df_guest['guest_index'] = df_guest.guest.str.split("_").apply(lambda x: int(x[1])-1)

## 3. Initializations

In [42]:
pref_by_guest = df_pref.groupby("guest_index").agg({"hotel_index":list}).to_dict()["hotel_index"]

#### Initialization matrix of preferences

In [43]:
pref_matrix = np.zeros((len(df_guest), len(df_hotel)))

#### Creating hotel matrix

In [44]:
hotel_matrix = df_hotel.drop(columns=['hotel']).set_index("hotel_index").values

#### Adding third column in hotel_matrix to keep track of booked rooms

In [45]:
hotel_matrix = np.c_[hotel_matrix, np.zeros(len(df_hotel))]

## 4. Guest distribution algorithms

### 4.1 Casual distribution

- Algorithm

In [46]:
Avg_Satis = []
Avg_bus_vol = []

for simulation in range(100):
    initial_guest_list = random.sample(list(pref_by_guest.keys()), len(list(pref_by_guest.keys())))
    pref_by_guest_random = {guest: random.sample(pref_by_guest[guest], len(pref_by_guest[guest])) for guest in initial_guest_list}
    # Inizializzo hotel_matrix e pref_matrix per permettere più calcoli in serie
    hotel_matrix[:,2] = 0
    pref_matrix[:,:] = 0 
    for guest in initial_guest_list:
        hotel_list_per_guest = pref_by_guest_random[guest]  # lista contenente le preferenze del guest i-esimo
        for hotel in hotel_list_per_guest:
            if hotel_matrix[hotel, 2] < hotel_matrix[hotel, 0]:
                pref_matrix[guest, hotel] = 1
                hotel_matrix[hotel, 2] += 1
                break
                
    # Trova il massimo indice (index dell'hotel) per colonna per ogni guest. Se il guest non è stato assegnato viene generato un indice non indicato nelle tabelle degli hotel
    df_pref_matrix = pd.DataFrame(np.argmax(np.c_[pref_matrix, np.array([1]*len(df_guest),)], axis=1), columns=['hotel_index'])
    df_pref_matrix = df_pref_matrix.reset_index().rename(columns={'index': 'guest_index'})

    # # Assegno sconto per ogni cliente
    df_pref_matrix = df_pref_matrix.merge(df_guest, on='guest_index')

    # # Assegno prezzo per ogni hotel e filtra via i guest che non hanno una prenotazione (non trova hotel con ID 400)
    df_pref_matrix = df_pref_matrix.merge(df_hotel.drop(columns=["rooms"]), on='hotel_index')

    # # Calcola il prezzo da applicare ad ogni guest
    df_pref_matrix['price_to_set'] = df_pref_matrix.price * (1-df_pref_matrix.discount)

    # # Trasformiamo hotel_matrix in un dataframe per calcolare numero di hotel liberi/pieni
    df_hotel_matrix = pd.DataFrame(hotel_matrix,columns=['Rooms', 'Price', 'Occupied Rooms'])

    # Definiamo qual era la priorità definita dal guest per ogni hotel assegnato

    hotel_pref_indeces = []
    guest_list = []
    index_h = 0

    for i,j in zip(df_pref_matrix['guest_index'], df_pref_matrix['hotel_index']):
        for hotel in pref_by_guest[i]:
            if hotel == j:
                index_h = pref_by_guest[i].index(hotel)
                hotel_pref_indeces.append(index_h)
                guest_list.append(i)
                break

    df_hotel_pref_indeces = pd.DataFrame(hotel_pref_indeces).rename(columns={0:'Hotel_pref_index'})
    df_hotel_pref_indeces['guest_index'] = pd.DataFrame(guest_list)
    df_pref_matrix = df_pref_matrix.merge(df_hotel_pref_indeces,on='guest_index')

    prio_len_list=[]
    guest_list2 = []
    for guest in initial_guest_list:
        prio_len_list.append(len(pref_by_guest[guest]))
        guest_list2.append(guest)

    df_num_pref_guest = pd.DataFrame(guest_list2,prio_len_list).reset_index()
    df_num_pref_guest = df_num_pref_guest.rename(columns={'index':'num_pref', 0 :'guest_index'})
    df_num_pref_guest

    df_pref_matrix = df_pref_matrix.merge(df_num_pref_guest,on='guest_index')

    # Aggiungiamo livello di soddisfazione

    df_pref_matrix['Satisfaction'] = (df_pref_matrix.num_pref - df_pref_matrix.Hotel_pref_index)/df_pref_matrix.num_pref
    Satifaction_level = df_pref_matrix.Satisfaction.sum()/df_guest.shape[0]
    Avg_Satis.append(Satifaction_level)
    
    Tot_business_volume = df_pref_matrix.price_to_set.sum()
    Avg_bus_vol.append(Tot_business_volume)
    

- Statistics

In [47]:
print ('Total business volume:','%.2f'% df_pref_matrix.price_to_set.sum(),'Euros')
print ('Assigned guests:',df_pref_matrix.shape[0])
print ('Vacant rooms:',df_hotel.rooms.sum() - df_pref_matrix.shape[0])
print ('Number of fully booked hotels:', df_hotel_matrix[(df_hotel_matrix['Rooms'] == df_hotel_matrix['Occupied Rooms'])].shape[0])
print ('Number of completely vacant hotels:', df_hotel_matrix[(df_hotel_matrix['Occupied Rooms'] == 0)].shape[0])
print ('Number of neither full nor vacant hotels:',df_hotel_matrix.shape[0]
       -df_hotel_matrix[(df_hotel_matrix['Rooms'] ==df_hotel_matrix['Occupied Rooms'])].shape[0] 
       - df_hotel_matrix[(df_hotel_matrix['Occupied Rooms'] == 0)].shape[0])
print ('Overall satisfaction is:','%.2f'%(Satifaction_level*100),'%' )

Total business volume: 637325.53 Euros
Assigned guests: 3976
Vacant rooms: 641
Number of fully booked hotels: 259
Number of completely vacant hotels: 0
Number of neither full nor vacant hotels: 141
Overall satisfaction is: 55.21 %


In [48]:
print ('Average overall satisfaction:','%.2f'%(sum(Satis)/len(Satis)*100),'%')
print ('Average business volume:','%.2f'%(sum(Tot_bus_vol)/len(Tot_bus_vol)),'Euros')

Average overall satisfaction: 55.03 %
Average business volume: 637200.13 Euros


### 4.2 Distribution by booking order and priorities declared by the guests

- The distribution of the guests in the hotels follows the booking order (guest list shall be sorted numerically) and the indicated priorities (priority order shall be the one declared in preferences.csv).  

- Initializations

In [49]:
# Inizializzo hotel_matrix epref_matrix per permettere più calcoli in serie
hotel_matrix[:,2] = 0
pref_matrix[:,:] = 0 

In [50]:
initial_guest_list = sorted(list(pref_by_guest.keys()))

- Algorithm

In [51]:
for guest in initial_guest_list:
    hotel_list_per_guest = pref_by_guest[guest]  # lista contenente le preferenze del guest i-esimo
    for hotel in hotel_list_per_guest:
        if hotel_matrix[hotel, 2] < hotel_matrix[hotel, 0]:
            pref_matrix[guest, hotel] = 1
            hotel_matrix[hotel, 2] += 1
            break

- Statistics

In [52]:
# Trova il massimo indice (index dell'hotel) per colonna per ogni guest. Se il guest non è stato assegnato viene generato un indice non indicato nelle tabelle degli hotel
df_pref_matrix = pd.DataFrame(np.argmax(np.c_[pref_matrix, np.array([1]*len(df_guest),)], axis=1), columns=['hotel_index'])
df_pref_matrix = df_pref_matrix.reset_index().rename(columns={'index': 'guest_index'})

# # Assegno sconto per ogni cliente
df_pref_matrix = df_pref_matrix.merge(df_guest, on='guest_index')

# # Assegno prezzo per ogni hotel e filtra via i guest che non hanno una prenotazione (non trova hotel con ID 400)
df_pref_matrix = df_pref_matrix.merge(df_hotel.drop(columns=["rooms"]), on='hotel_index')

# # Calcola il prezzo da applicare ad ogni guest
df_pref_matrix['price_to_set'] = df_pref_matrix.price * (1-df_pref_matrix.discount)

# # Trasformiamo hotel_matrix in un dataframe per calcolare numero di hotel liberi/pieni
df_hotel_matrix = pd.DataFrame(hotel_matrix,columns=['Rooms', 'Price', 'Occupied Rooms'])

# Definiamo qual era la priorità definita dal guest per ogni hotel assegnato

hotel_pref_indeces = []
guest_list = []
index_h = 0

for i,j in zip(df_pref_matrix['guest_index'], df_pref_matrix['hotel_index']):
    for hotel in pref_by_guest[i]:
        if hotel == j:
            index_h = pref_by_guest[i].index(hotel)
            hotel_pref_indeces.append(index_h)
            guest_list.append(i)
            break

df_hotel_pref_indeces = pd.DataFrame(hotel_pref_indeces).rename(columns={0:'Hotel_pref_index'})
df_hotel_pref_indeces['guest_index'] = pd.DataFrame(guest_list)
df_pref_matrix = df_pref_matrix.merge(df_hotel_pref_indeces,on='guest_index')

prio_len_list=[]
guest_list2 = []
for guest in initial_guest_list:
    prio_len_list.append(len(pref_by_guest[guest]))
    guest_list2.append(guest)

df_num_pref_guest = pd.DataFrame(guest_list2,prio_len_list).reset_index()
df_num_pref_guest = df_num_pref_guest.rename(columns={'index':'num_pref', 0 :'guest_index'})
df_num_pref_guest

df_pref_matrix = df_pref_matrix.merge(df_num_pref_guest,on='guest_index')

# Aggiungiamo livello di soddisfazione

df_pref_matrix['Satisfaction'] = (df_pref_matrix.num_pref - df_pref_matrix.Hotel_pref_index)/df_pref_matrix.num_pref
Satifaction_level = df_pref_matrix.Satisfaction.sum()/df_guest.shape[0]

In [53]:
print ('Total business volume:','%.2f'% df_pref_matrix.price_to_set.sum(),'Euros')
print ('Assigned guests:',df_pref_matrix.shape[0])
print ('Vacant rooms:',df_hotel.rooms.sum() - df_pref_matrix.shape[0])
print ('Number of fully booked hotels:', df_hotel_matrix[(df_hotel_matrix['Rooms'] == df_hotel_matrix['Occupied Rooms'])].shape[0])
print ('Number of completely vacant hotels:', df_hotel_matrix[(df_hotel_matrix['Occupied Rooms'] == 0)].shape[0])
print ('Number of neither full nor vacant hotels:',df_hotel_matrix.shape[0]
       -df_hotel_matrix[(df_hotel_matrix['Rooms'] ==df_hotel_matrix['Occupied Rooms'])].shape[0] 
       - df_hotel_matrix[(df_hotel_matrix['Occupied Rooms'] == 0)].shape[0])
print ('Overall satisfaction is:','%.2f'%(Satifaction_level*100),'%' )

Total business volume: 643597.53 Euros
Assigned guests: 3975
Vacant rooms: 642
Number of fully booked hotels: 255
Number of completely vacant hotels: 0
Number of neither full nor vacant hotels: 145
Overall satisfaction is: 97.53 %


### 4.3 Distribution by room price

- The distribution will start from the cheapest hotel to the most expensive one.

- Initializations

In [54]:
# Inizializzo hotel_matrix epref_matrix per permettere più calcoli in serie
hotel_matrix[:,2] = 0
pref_matrix[:,:] = 0 

In [55]:
initial_guest_list = sorted(list(pref_by_guest.keys()))
hotel_price = df_hotel.sort_values('price').drop(columns = ['hotel']).values
hotel_price_list = list(hotel_price[:,2])

- Algorithm

In [56]:
for hotel in hotel_price_list:
    for guest in initial_guest_list:
        if hotel_matrix[hotel, 2] == hotel_matrix[hotel, 0]:
            break
        else:
            if pref_matrix[guest,:].sum() == 0:
                hotel_list_per_guest = pref_by_guest[guest]  # lista contenente le preferenze del guest i-esimo
                if hotel in hotel_list_per_guest:
                    pref_matrix[guest, hotel] = 1
                    hotel_matrix[hotel, 2] += 1
                    

- Statistics

In [57]:
# Trova il massimo indice (index dell'hotel) per colonna per ogni guest. Se il guest non è stato assegnato viene generato un indice non indicato nelle tabelle degli hotel
df_pref_matrix = pd.DataFrame(np.argmax(np.c_[pref_matrix, np.array([1]*len(df_guest),)], axis=1), columns=['hotel_index'])
df_pref_matrix = df_pref_matrix.reset_index().rename(columns={'index': 'guest_index'})

# # Assegno sconto per ogni cliente
df_pref_matrix = df_pref_matrix.merge(df_guest, on='guest_index')

# # Assegno prezzo per ogni hotel e filtra via i guest che non hanno una prenotazione (non trova hotel con ID 400)
df_pref_matrix = df_pref_matrix.merge(df_hotel.drop(columns=["rooms"]), on='hotel_index')

# # Calcola il prezzo da applicare ad ogni guest
df_pref_matrix['price_to_set'] = df_pref_matrix.price * (1-df_pref_matrix.discount)

# # Trasformiamo hotel_matrix in un dataframe per calcolare numero di hotel liberi/pieni
df_hotel_matrix = pd.DataFrame(hotel_matrix,columns=['Rooms', 'Price', 'Occupied Rooms'])

# Definiamo qual era la priorità definita dal guest per ogni hotel assegnato

hotel_pref_indeces = []
guest_list = []
index_h = 0

for i,j in zip(df_pref_matrix['guest_index'], df_pref_matrix['hotel_index']):
    for hotel in pref_by_guest[i]:
        if hotel == j:
            index_h = pref_by_guest[i].index(hotel)
            hotel_pref_indeces.append(index_h)
            guest_list.append(i)
            break

df_hotel_pref_indeces = pd.DataFrame(hotel_pref_indeces).rename(columns={0:'Hotel_pref_index'})
df_hotel_pref_indeces['guest_index'] = pd.DataFrame(guest_list)
df_pref_matrix = df_pref_matrix.merge(df_hotel_pref_indeces,on='guest_index')

prio_len_list=[]
guest_list2 = []
for guest in initial_guest_list:
    prio_len_list.append(len(pref_by_guest[guest]))
    guest_list2.append(guest)
    
df_num_pref_guest = pd.DataFrame(guest_list2,prio_len_list).reset_index()
df_num_pref_guest = df_num_pref_guest.rename(columns={'index':'num_pref', 0 :'guest_index'})
df_num_pref_guest

df_pref_matrix = df_pref_matrix.merge(df_num_pref_guest,on='guest_index')

# Aggiungiamo livello di soddisfazione

df_pref_matrix['Satisfaction'] = (df_pref_matrix.num_pref - df_pref_matrix.Hotel_pref_index)/df_pref_matrix.num_pref
Satifaction_level = df_pref_matrix.Satisfaction.sum()/df_guest.shape[0]


In [58]:
print ('Total business volume:','%.2f'% df_pref_matrix.price_to_set.sum(),'Euros')
print ('Assigned guests:',df_pref_matrix.shape[0])
print ('Vacant rooms:',df_hotel.rooms.sum() - df_pref_matrix.shape[0])
print ('Number of fully booked hotels:', df_hotel_matrix[(df_hotel_matrix['Rooms'] == df_hotel_matrix['Occupied Rooms'])].shape[0])
print ('Number of completely vacant hotels:', df_hotel_matrix[(df_hotel_matrix['Occupied Rooms'] == 0)].shape[0])
print ('Number of neither full nor vacant hotels:',df_hotel_matrix.shape[0]
       -df_hotel_matrix[(df_hotel_matrix['Rooms'] ==df_hotel_matrix['Occupied Rooms'])].shape[0] 
       - df_hotel_matrix[(df_hotel_matrix['Occupied Rooms'] == 0)].shape[0])
print ('Overall satisfaction is:','%.2f'%(Satifaction_level*100),'%' )

Total business volume: 564085.08 Euros
Assigned guests: 3925
Vacant rooms: 692
Number of fully booked hotels: 319
Number of completely vacant hotels: 12
Number of neither full nor vacant hotels: 69
Overall satisfaction is: 54.23 %


### 4.4 Distribution by room availability

- The distribution will start from the largest hotel to the smallest one (big/small in terms of room availability). 

- Initializations 

In [59]:
# Inizializzo hotel_matrix epref_matrix per permettere più calcoli in serie
hotel_matrix[:,2] = 0
pref_matrix[:,:] = 0 

In [60]:
initial_guest_list = sorted(list(pref_by_guest.keys()))
pref_by_guest_random = pref_by_guest

In [61]:
hotel_avail = df_hotel.sort_values('rooms', ascending=False).drop(columns = ['hotel']).values
hotel_avail_list = list(hotel_avail[:,2])

- Algorithm 

In [62]:
for hotel in hotel_avail_list:
    for guest in initial_guest_list:
        #se tutte le stanze dell'hotel sono prenotate, interrompi il ciclo e passa all'hotel successivo
        if hotel_matrix[hotel, 2] == hotel_matrix[hotel, 0]: 
            break
        else:
            # se il guest non è stato ancora assegnato, cerca tra le sue preferenze  
            if pref_matrix[guest,:].sum() == 0:
                hotel_list_per_guest = pref_by_guest_random[guest]  # lista contenente le preferenze del guest i-esimo
                if hotel in hotel_list_per_guest:
                    pref_matrix[guest, hotel] = 1
                    hotel_matrix[hotel, 2] += 1
                    

- Statistics

In [63]:
# Trova il massimo indice (index dell'hotel) per colonna per ogni guest. Se il guest non è stato assegnato viene generato un indice non indicato nelle tabelle degli hotel
df_pref_matrix = pd.DataFrame(np.argmax(np.c_[pref_matrix, np.array([1]*len(df_guest),)], axis=1), columns=['hotel_index'])
df_pref_matrix = df_pref_matrix.reset_index().rename(columns={'index': 'guest_index'})

# # Assegno sconto per ogni cliente
df_pref_matrix = df_pref_matrix.merge(df_guest, on='guest_index')

# # Assegno prezzo per ogni hotel e filtra via i guest che non hanno una prenotazione (non trova hotel con ID 400)
df_pref_matrix = df_pref_matrix.merge(df_hotel.drop(columns=["rooms"]), on='hotel_index')

# # Calcola il prezzo da applicare ad ogni guest
df_pref_matrix['price_to_set'] = df_pref_matrix.price * (1-df_pref_matrix.discount)

# # Trasformiamo hotel_matrix in un dataframe per calcolare numero di hotel liberi/pieni
df_hotel_matrix = pd.DataFrame(hotel_matrix,columns=['Rooms', 'Price', 'Occupied Rooms'])

# Definiamo qual era la priorità definita dal guest per ogni hotel assegnato

hotel_pref_indeces = []
guest_list = []
index_h = 0

for i,j in zip(df_pref_matrix['guest_index'], df_pref_matrix['hotel_index']):
    for hotel in pref_by_guest[i]:
        if hotel == j:
            index_h = pref_by_guest[i].index(hotel)
            hotel_pref_indeces.append(index_h)
            guest_list.append(i)
            break


df_hotel_pref_indeces = pd.DataFrame(hotel_pref_indeces).rename(columns={0:'Hotel_pref_index'})
df_hotel_pref_indeces['guest_index'] = pd.DataFrame(guest_list)
df_pref_matrix = df_pref_matrix.merge(df_hotel_pref_indeces,on='guest_index')

prio_len_list=[]
guest_list2 = []
for guest in initial_guest_list:
    prio_len_list.append(len(pref_by_guest[guest]))
    guest_list2.append(guest)
    
df_num_pref_guest = pd.DataFrame(guest_list2,prio_len_list).reset_index()
df_num_pref_guest = df_num_pref_guest.rename(columns={'index':'num_pref', 0 :'guest_index'})
df_num_pref_guest

df_pref_matrix = df_pref_matrix.merge(df_num_pref_guest,on='guest_index')

# Aggiungiamo livello di soddisfazione

df_pref_matrix['Satisfaction'] = (df_pref_matrix.num_pref - df_pref_matrix.Hotel_pref_index)/df_pref_matrix.num_pref
Satifaction_level = df_pref_matrix.Satisfaction.sum()/df_guest.shape[0]

In [64]:
print ('Total business volume:','%.2f'% df_pref_matrix.price_to_set.sum(),'Euros')
print ('Assigned guests:',df_pref_matrix.shape[0])
print ('Vacant rooms:',df_hotel.rooms.sum() - df_pref_matrix.shape[0])
print ('Number of fully booked hotels:', df_hotel_matrix[(df_hotel_matrix['Rooms'] == df_hotel_matrix['Occupied Rooms'])].shape[0])
print ('Number of completely vacant hotels:', df_hotel_matrix[(df_hotel_matrix['Occupied Rooms'] == 0)].shape[0])
print ('Number of neither full nor vacant hotels:',df_hotel_matrix.shape[0]
       -df_hotel_matrix[(df_hotel_matrix['Rooms'] ==df_hotel_matrix['Occupied Rooms'])].shape[0] 
       - df_hotel_matrix[(df_hotel_matrix['Occupied Rooms'] == 0)].shape[0])
print ('Overall satisfaction is:','%.2f'%(Satifaction_level*100),'%' )

Total business volume: 630956.80 Euros
Assigned guests: 3954
Vacant rooms: 663
Number of fully booked hotels: 266
Number of completely vacant hotels: 38
Number of neither full nor vacant hotels: 96
Overall satisfaction is: 53.78 %


In [65]:
df_pref_matrix

Unnamed: 0,guest_index,hotel_index,guest,discount,hotel,price,price_to_set,Hotel_pref_index,num_pref,Satisfaction
0,0,128,guest_1,0.09,hotel_129,85,77.35,6,18,0.666667
1,11,128,guest_12,0.17,hotel_129,85,70.55,18,36,0.500000
2,55,128,guest_56,0.19,hotel_129,85,68.85,5,8,0.375000
3,62,128,guest_63,0.10,hotel_129,85,76.50,3,45,0.933333
4,90,128,guest_91,0.03,hotel_129,85,82.45,2,30,0.933333
...,...,...,...,...,...,...,...,...,...,...
3949,3952,176,guest_3953,0.18,hotel_177,79,64.78,2,5,0.600000
3950,3956,214,guest_3957,0.08,hotel_215,133,122.36,11,15,0.266667
3951,3992,214,guest_3993,0.06,hotel_215,133,125.02,5,11,0.545455
3952,3995,214,guest_3996,0.00,hotel_215,133,133.00,28,44,0.363636
