## 1. Import relevant libraries

In [1]:
import pandas as pd
import numpy as np
import os
import random

## 2. Open csv files

In [2]:
df_guest = pd.read_csv("guests.csv")
df_hotel = pd.read_csv("hotels.csv")
df_pref = pd.read_csv("preferences.csv")

In [3]:
df_pref.head()

Unnamed: 0,guest,hotel,priority
0,guest_1,hotel_168,1
1,guest_1,hotel_207,2
2,guest_1,hotel_222,3
3,guest_1,hotel_124,4
4,guest_1,hotel_223,5


In [4]:
df_guest.head()

Unnamed: 0,guest,discount
0,guest_1,0.09
1,guest_2,0.0
2,guest_3,0.07
3,guest_4,0.0
4,guest_5,0.1


In [5]:
df_hotel

Unnamed: 0,hotel,rooms,price
0,hotel_1,13,273
1,hotel_2,18,92
2,hotel_3,12,141
3,hotel_4,18,157
4,hotel_5,7,298
...,...,...,...
395,hotel_396,5,212
396,hotel_397,12,68
397,hotel_398,14,102
398,hotel_399,16,284


#### Reset hotel and guest index 



In [6]:
df_pref['guest_index'] = df_pref.guest.str.split("_").apply(lambda x: int(x[1])-1)
df_pref['hotel_index'] = df_pref.hotel.str.split("_").apply(lambda x: int(x[1])-1)

In [7]:
df_hotel['hotel_index'] = df_hotel.hotel.str.split("_").apply(lambda x: int(x[1])-1)

In [8]:
df_guest['guest_index'] = df_guest.guest.str.split("_").apply(lambda x: int(x[1])-1)

1. Definire un ordinamento di assegnazione basato sulla randomizzazione degli utenti e delle loro preferenze
2. Allocazione guest - hotel. Per ogni guest e fintantoché ho hotel da assegnare:
    - sorteggiare un hotel nelle preferenze
    - se la disponibilità delle camere dell'hotel è > 0 assegnare guest all'hotel
    - altrimenti ripetere 2
    - se non ci sono più hotel per un dato guest. Il guest non va in vacanza! 

## 3. Initializations

#### Define a dictionary where the keys are the guests and the values are the lists of hotels chosen by each guest.  

{"guest_x": 
    
    \["hotel_c", "hotel_a", ..., "hotel_s"\],
    
    ...}
    
Dove  \["hotel_c", "hotel_a", ..., "hotel_s"\] è una lista di alberghi ordinata per preferenza

In [9]:
pref_by_guest = df_pref.groupby("guest_index").agg({"hotel_index":list}).to_dict()["hotel_index"]

#### Initialization matrix of preferences

In [10]:
pref_matrix = np.zeros((len(df_guest), len(df_hotel)))

#### Creating hotel matrix

In [11]:
hotel_matrix = df_hotel.drop(columns=['hotel']).set_index("hotel_index").values

#### Creating discount matrix

In [12]:
discount_matrix = df_guest.drop(columns = ['guest']).set_index('guest_index').values
discount_matrix_compl = 1-discount_matrix
discount_matrix_compl

array([[0.91],
       [1.  ],
       [0.93],
       ...,
       [0.93],
       [0.92],
       [0.93]])

#### Adding third column in hotel_matrix to keep track of booked rooms

In [13]:
hotel_matrix = np.c_[hotel_matrix, np.zeros(len(df_hotel))]
hotel_matrix

array([[ 13., 273.,   0.],
       [ 18.,  92.,   0.],
       [ 12., 141.,   0.],
       ...,
       [ 14., 102.,   0.],
       [ 16., 284.,   0.],
       [ 14., 145.,   0.]])

## 4. Guest distribution algorithms

### 4.1 Casual distribution

- Shuffle randomly the list of guests and the lists of hotels related to each guest. 

In [14]:
# Inizializzo hotel_matrix epref_matrix per permettere più calcoli in serie
hotel_matrix[:,2] = 0
pref_matrix[:,:] = 0 

In [15]:
initial_guest_list = random.sample(list(pref_by_guest.keys()), len(list(pref_by_guest.keys())))

pref_by_guest_random = {guest: random.sample(pref_by_guest[guest], len(pref_by_guest[guest])) for guest in initial_guest_list}


In [16]:
hotel_random = df_hotel.drop(columns = ['hotel']).values
hotel_random_list = list(hotel_random[:,2])

In [17]:
for guest in initial_guest_list:
    hotel_list_per_guest = pref_by_guest_random[guest]  # lista contenente le preferenze del guest i-esimo
    for hotel in hotel_list_per_guest:
         # Verifica se c'è posto
         # if pref_matrix[:,hotel].sum() < hotel_matrix[hotel, 0]: #Soluzione più lenta
        if hotel_matrix[hotel, 2] < hotel_matrix[hotel, 0]:
            pref_matrix[guest, hotel] = 1
            hotel_matrix[hotel, 2] += 1
            break

In [18]:
#Moltiplico gli 1 della matrice delle preferenze per lo sconto relativo ad ogni guest. Poi moltiplico 
#il risultato per il prezzo di ogni stanza 
#facendo la somma di tutti i termini, ottengo il volume d'affari complessivo 
pref_disc_matrix = pref_matrix * discount_matrix_compl
pref_amount_matrix = pref_disc_matrix * hotel_matrix[:,1]

x = np.sum(pref_amount_matrix,axis=1)
y = np.sum(pref_amount_matrix,axis=0)
xtot = np.sum(x,axis=0)

HM_sum = (np.sum(hotel_matrix,axis=0))

In [92]:

# Trova il massimo indice (index dell'hotel) per colonna per ogni guest. Se il guset non è stato assegnato viene generato un indice non indicato nelle tabelle degli hotel
df_pref_matrix = pd.DataFrame(np.argmax(np.c_[pref_matrix, np.array([1]*len(df_guest),)], axis=1), columns=['hotel_index'])
df_pref_matrix = df_pref_matrix.reset_index().rename(columns={'index': 'guest_index'})

# Assegno sconto per ogni cliente
df_pref_matrix = df_pref_matrix.merge(df_guest, on='guest_index')

# Assegno prezzo per ogni hotel e filtra via i guest che non hanno una prenotazione
df_pref_matrix = df_pref_matrix.merge(df_hotel.drop(columns=["rooms"]), on='hotel_index')

# Calcola il prezzo da applicare ad ogni guest
df_pref_matrix['price_to_set'] = df_pref_matrix.price * (1-df_pref_matrix.discount)

df_pref_matrix.price_to_set.sum()

638768.21

- Summary

In [19]:
print ('Occupied rooms:',int(HM_sum[2]))
print('Empty rooms:',int(HM_sum[0]-HM_sum[2]))
print('Assigned guests:',int(HM_sum[2]))
print('Guests staying at home:',int(df_guest.shape[0]-HM_sum[2]))
print('Hotels with no guests:', np.count_nonzero(y==0))
print ('The total business volume amounts to',xtot,'Euros')

Occupied rooms: 3976
Empty rooms: 641
Assigned guests: 3976
Guests staying at home: 24
Hotels with no guests: 0
The total business volume amounts to 638768.21 Euros


### 4.2 Distribution by booking order and priorities declared by the guests

- The distribution of the guests in the hotels follows the booking order (guest list shall be sorted numerically) and the indicated priorities (priority order shall be the one declared in preferences.csv).  

In [893]:
# Inizializzo hotel_matrix epref_matrix per permettere più calcoli in serie
hotel_matrix[:,2] = 0
pref_matrix[:,:] = 0 

In [894]:
initial_guest_list = sorted(list(pref_by_guest.keys()))

pref_by_guest_random = pref_by_guest

In [895]:
hotel_prio = df_hotel.drop(columns = ['hotel']).values
hotel_prio_list = list(hotel_prio[:,2])

In [896]:
for guest in initial_guest_list:
    hotel_list_per_guest = pref_by_guest_random[guest]  # lista contenente le preferenze del guest i-esimo
    for hotel in hotel_list_per_guest:
     # Verifica se c'è posto
         # if pref_matrix[:,hotel].sum() < hotel_matrix[hotel, 0]: #Soluzione più lenta
        if hotel_matrix[hotel, 2] < hotel_matrix[hotel, 0]:
            pref_matrix[guest, hotel] = 1
            hotel_matrix[hotel, 2] += 1
            break

In [897]:
#Moltiplico gli 1 della matrice delle preferenze per lo sconto relativo ad ogni guest. Poi moltiplico 
#il risultato per il prezzo di ogni stanza 
#facendo la somma di tutti i termini, ottengo il volume d'affari complessivo 

pref_amount_matrix = pref_disc_matrix * hotel_matrix[:,1]

x = np.sum(pref_amount_matrix,axis=1)
y = np.sum(pref_amount_matrix,axis=0)
xtot = np.sum(x,axis=0)
HM_sum = (np.sum(hotel_matrix,axis=0))

In [924]:
df_disc_matrix = pd.DataFrame(discount_matrix_compl)
df_pref_matrix = pd.DataFrame(pref_matrix)

In [930]:
df_disc_matrix = df_disc_matrix.rename(columns={0:'Discount_compl'})
df_disc_matrix

Unnamed: 0,Discount_compl
0,0.91
1,1.00
2,0.93
3,1.00
4,0.90
...,...
3995,1.00
3996,0.85
3997,0.93
3998,0.92


In [931]:
df_pref_matrix

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,390,391,392,393,394,395,396,397,398,399
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3996,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [922]:
(discount_matrix_compl)

array([[0.91],
       [1.  ],
       [0.93],
       ...,
       [0.93],
       [0.92],
       [0.93]])

In [923]:
(pref_matrix)

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

- Summary

In [898]:
print ('Occupied rooms:',int(HM_sum[2]))
print('Empty rooms:',int(HM_sum[0]-HM_sum[2]))
print('Assigned guests:',int(HM_sum[2]))
print('Guests staying at home:',int(df_guest.shape[0]-HM_sum[2]))
print('Hotels with no guests:', np.count_nonzero(y==0))

print ('The total business volume amounts to',xtot,'Euros')

Occupied rooms: 3975
Empty rooms: 642
Assigned guests: 3975
Guests staying at home: 25
Hotels with no guests: 0
The total business volume amounts to 643597.5299999999 Euros


### 4.3 Distribution by room price

- The distribution will start from the cheapest hotel to the most expensive one.

In [899]:
# Inizializzo hotel_matrix epref_matrix per permettere più calcoli in serie
hotel_matrix[:,2] = 0
pref_matrix[:,:] = 0 

In [900]:
initial_guest_list = sorted(list(pref_by_guest.keys()))
pref_by_guest_random = pref_by_guest

In [901]:
hotel_price = df_hotel.sort_values('price').drop(columns = ['hotel']).values
hotel_price_list = list(hotel_price[:,2])

In [902]:
from timeit import default_timer as timer

start= timer()

In [903]:
hotel_matrix

array([[ 13., 273.,   0.],
       [ 18.,  92.,   0.],
       [ 12., 141.,   0.],
       ...,
       [ 14., 102.,   0.],
       [ 16., 284.,   0.],
       [ 14., 145.,   0.]])

In [904]:
for hotel_ch in hotel_price_list:
    for guest in initial_guest_list:
        if hotel_matrix[hotel_ch, 2] == hotel_matrix[hotel_ch, 0]:
            break
        else:
            if pref_matrix[guest,:].sum() == 0:
                hotel_list_per_guest = pref_by_guest_random[guest]  # lista contenente le preferenze del guest i-esimo
                #for hotel in hotel_list_per_guest:
                #    if hotel == hotel_ch:
                #        pref_matrix[guest, hotel_ch] = 1
                #        hotel_matrix[hotel_ch, 2] += 1
                #        break
                if hotel_ch in hotel_list_per_guest:
                    pref_matrix[guest, hotel_ch] = 1
                    hotel_matrix[hotel_ch, 2] += 1
                    break
                    
                

In [905]:
end = timer()

print("Time taken:", end-start)

Time taken: 0.34856730001047254


In [906]:
#Moltiplico gli 1 della matrice delle preferenze per lo sconto relativo ad ogni guest. Poi moltiplico 
#il risultato per il prezzo di ogni stanza 
#facendo la somma di tutti i termini, ottengo il volume d'affari complessivo 
pref_disc_matrix = pref_matrix * discount_matrix_compl
pref_amount_matrix = pref_disc_matrix * hotel_matrix[:,1]

x = np.sum(pref_amount_matrix,axis=1)
y = np.sum(pref_amount_matrix,axis=0)
xtot = np.sum(x,axis=0)

HM_sum = (np.sum(hotel_matrix,axis=0))

- Summary

In [907]:
print ('Occupied rooms:',int(HM_sum[2]))
print('Empty rooms:',int(HM_sum[0]-HM_sum[2]))
print('Assigned guests:',int(HM_sum[2]))
print('Guests staying at home:',int(df_guest.shape[0]-HM_sum[2]))
print('Hotels with no guests:', np.count_nonzero(y==0))

print ('The total business volume amounts to',xtot,'Euros')

Occupied rooms: 400
Empty rooms: 4217
Assigned guests: 400
Guests staying at home: 3600
Hotels with no guests: 0
The total business volume amounts to 64366.31 Euros


### 4.4 Distribution by room availability

- The distribution will start from the largest hotel to the smallest one (big/small in terms of room availability). 

In [908]:
# Inizializzo hotel_matrix epref_matrix per permettere più calcoli in serie
hotel_matrix[:,2] = 0
pref_matrix[:,:] = 0 

In [909]:
initial_guest_list = sorted(list(pref_by_guest.keys()))
pref_by_guest_random = pref_by_guest

In [910]:
hotel_avail = df_hotel.sort_values('rooms', ascending=False).drop(columns = ['hotel']).values
hotel_avail_list = list(hotel_avail[:,2])

In [911]:
for hotel_ch in hotel_avail_list:
    for guest in initial_guest_list:
        #se tutte le stanze dell'hotel sono prenotate, interrompi il ciclo e passa all'hotel successivo
        if hotel_matrix[hotel_ch, 2] == hotel_matrix[hotel_ch, 0]: 
            break
        else:
            # se il guest non è stato ancora assegnato, cerca tra le sue preferenze  
            if pref_matrix[guest,:].sum() == 0:
                hotel_list_per_guest = pref_by_guest_random[guest]  # lista contenente le preferenze del guest i-esimo
                #for hotel in hotel_list_per_guest:
                #    if hotel == hotel_ch:
                #        pref_matrix[guest, hotel_ch] = 1
                #        hotel_matrix[hotel_ch, 2] += 1
                #        break
                if hotel_ch in hotel_list_per_guest:
                    pref_matrix[guest, hotel_ch] = 1
                    hotel_matrix[hotel_ch, 2] += 1
                    break

In [912]:
#Moltiplico gli 1 della matrice delle preferenze per lo sconto relativo ad ogni guest. Poi moltiplico 
#il risultato per il prezzo di ogni stanza 
#facendo la somma di tutti i termini, ottengo il volume d'affari complessivo 
pref_disc_matrix = pref_matrix * discount_matrix_compl
pref_amount_matrix = pref_disc_matrix * hotel_matrix[:,1]

x = np.sum(pref_amount_matrix,axis=1)
y = np.sum(pref_amount_matrix,axis=0)
xtot = np.sum(x,axis=0)

HM_sum = (np.sum(hotel_matrix,axis=0))

- Summary

In [913]:
print ('Occupied rooms:',int(HM_sum[2]))
print('Empty rooms:',int(HM_sum[0]-HM_sum[2]))
print('Assigned guests:',int(HM_sum[2]))
print('Guests staying at home:',int(df_guest.shape[0]-HM_sum[2]))
print('Hotels with no guests:', np.count_nonzero(y==0))

print ('The total business volume amounts to',xtot,'Euros')

Occupied rooms: 400
Empty rooms: 4217
Assigned guests: 400
Guests staying at home: 3600
Hotels with no guests: 0
The total business volume amounts to 64380.649999999994 Euros


In [914]:
df_pref_matrix[df_pref_matrix.sum(1) == 0]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,390,391,392,393,394,395,396,397,398,399
728,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
773,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1703,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1882,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1936,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3935,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3952,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3957,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3969,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [915]:
hotel_matrix

array([[ 13., 273.,   1.],
       [ 18.,  92.,   1.],
       [ 12., 141.,   1.],
       ...,
       [ 14., 102.,   1.],
       [ 16., 284.,   1.],
       [ 14., 145.,   1.]])

In [916]:
hotel_matrix[:,2] = 0
hotel_matrix

array([[ 13., 273.,   0.],
       [ 18.,  92.,   0.],
       [ 12., 141.,   0.],
       ...,
       [ 14., 102.,   0.],
       [ 16., 284.,   0.],
       [ 14., 145.,   0.]])

### Missing codes
1 Guest satisfaction

2 Number of hotels full

3 Number of hotels not completely full