## 1. Import relevant libraries

In [146]:
import pandas as pd
import numpy as np
import os
import random

## 2. Open csv files

In [147]:
df_guest = pd.read_csv("guests.csv")
df_hotel = pd.read_csv("hotels.csv")
df_pref = pd.read_csv("preferences.csv")

In [148]:
df_pref.head()

Unnamed: 0,guest,hotel,priority
0,guest_1,hotel_168,1
1,guest_1,hotel_207,2
2,guest_1,hotel_222,3
3,guest_1,hotel_124,4
4,guest_1,hotel_223,5


In [149]:
df_guest.head()

Unnamed: 0,guest,discount
0,guest_1,0.09
1,guest_2,0.0
2,guest_3,0.07
3,guest_4,0.0
4,guest_5,0.1


In [150]:
df_hotel

Unnamed: 0,hotel,rooms,price
0,hotel_1,13,273
1,hotel_2,18,92
2,hotel_3,12,141
3,hotel_4,18,157
4,hotel_5,7,298
...,...,...,...
395,hotel_396,5,212
396,hotel_397,12,68
397,hotel_398,14,102
398,hotel_399,16,284


#### Reset hotel and guest index 



In [151]:
df_pref['guest_index'] = df_pref.guest.str.split("_").apply(lambda x: int(x[1])-1)
df_pref['hotel_index'] = df_pref.hotel.str.split("_").apply(lambda x: int(x[1])-1)

In [152]:
df_hotel['hotel_index'] = df_hotel.hotel.str.split("_").apply(lambda x: int(x[1])-1)

1. Definire un ordinamento di assegnazione basato sulla randomizzazione degli utenti e delle loro preferenze
2. Allocazione guest - hotel. Per ogni guest e fintantoché ho hotel da assegnare:
    - sorteggiare un hotel nelle preferenze
    - se la disponibilità delle camere dell'hotel è > 0 assegnare guest all'hotel
    - altrimenti ripetere 2
    - se non ci sono più hotel per un dato guest. Il guest non va in vacanza! 

## 3. Initializations

#### Define a dictionary where the keys are the guests and the values are the lists of hotels chosen by each guest.  

{"guest_x": 
    
    \["hotel_c", "hotel_a", ..., "hotel_s"\],
    
    ...}
    
Dove  \["hotel_c", "hotel_a", ..., "hotel_s"\] è una lista di alberghi ordinata per preferenza

In [153]:
pref_by_guest = df_pref.groupby("guest_index").agg({"hotel_index":list}).to_dict()["hotel_index"]

#### Initialization matrix of preferences

In [154]:
pref_matrix = np.zeros((len(df_guest), len(df_hotel)))

#### Creating hotel matrix

In [155]:
hotel_matrix = df_hotel.drop(columns=['hotel']).set_index("hotel_index").values

#### Adding third column in hotel_matrix to keep track of booked rooms

In [156]:
hotel_matrix = np.c_[hotel_matrix, np.zeros(len(df_hotel))]
hotel_matrix

array([[ 13., 273.,   0.],
       [ 18.,  92.,   0.],
       [ 12., 141.,   0.],
       ...,
       [ 14., 102.,   0.],
       [ 16., 284.,   0.],
       [ 14., 145.,   0.]])

## 4. Guest distribution algorithms

### 4.1 Casual distribution

- Shuffle randomly the list of guests and the lists of hotels related to each guest. 

In [157]:
initial_guest_list = random.sample(list(pref_by_guest.keys()), len(list(pref_by_guest.keys())))

pref_by_guest_random = {guest: random.sample(pref_by_guest[guest], len(pref_by_guest[guest])) for guest in initial_guest_list}


In [158]:
# for guest in initial_guest_list:
#     hotel_list_per_guest = pref_by_guest_random[guest]  # lista contenente le preferenze del guest i-esimo
#     for hotel in hotel_list_per_guest:
#          # Verifica se c'è posto
#          # if pref_matrix[:,hotel].sum() < hotel_matrix[hotel, 0]: #Soluzione più lenta
#         if hotel_matrix[hotel, 2] < hotel_matrix[hotel, 0]:
#             pref_matrix[guest, hotel] = 1
#             hotel_matrix[hotel, 2] += 1
#             break

### 4.2 Distribution by booking order and priorities declared by the guests

- The distribution of the guests in the hotels follows the booking order (guest list shall be sorted numerically) and the indicated priorities (priority order shall be the one declared in preferences.csv).  

In [159]:
initial_guest_list = sorted(list(pref_by_guest.keys()))

pref_by_guest_random = pref_by_guest

In [160]:
# for guest in initial_guest_list:
#     hotel_list_per_guest = pref_by_guest_random[guest]  # lista contenente le preferenze del guest i-esimo
#     for hotel in hotel_list_per_guest:
#      # Verifica se c'è posto
#          # if pref_matrix[:,hotel].sum() < hotel_matrix[hotel, 0]: #Soluzione più lenta
#         if hotel_matrix[hotel, 2] < hotel_matrix[hotel, 0]:
#             pref_matrix[guest, hotel] = 1
#             hotel_matrix[hotel, 2] += 1
#             break

### 4.3 Distribution by room price

- The distribution will start from the cheapest hotel to the most expensive one.

In [161]:
initial_guest_list = sorted(list(pref_by_guest.keys()))
pref_by_guest_random = pref_by_guest

In [162]:
hotel_price = df_hotel.sort_values('price').drop(columns = ['hotel']).values
hotel_price_list = list(hotel_price[:,2])

In [163]:
hotel_price

array([[ 10,  50, 189],
       [ 19,  50,  48],
       [ 12,  52, 353],
       ...,
       [ 13, 298, 167],
       [  9, 299, 229],
       [ 10, 299,  26]], dtype=int64)

In [164]:
from timeit import default_timer as timer

start= timer()

In [165]:
# for hotel_ch in hotel_price_list:
#     for guest in initial_guest_list:
#         if hotel_matrix[hotel_ch, 2] == hotel_matrix[hotel_ch, 0]:
#             break
#         else:
#             if pref_matrix[guest,:].sum() == 0:
#                 hotel_list_per_guest = pref_by_guest_random[guest]  # lista contenente le preferenze del guest i-esimo
#                 for hotel in hotel_list_per_guest:
#                     if hotel == hotel_ch:
#                         pref_matrix[guest, hotel_ch] = 1
#                         hotel_matrix[hotel_ch, 2] += 1
#                         break

In [166]:
end = timer()

print("Time taken:", end-start)

Time taken: 0.032124700002896134


### 4.4 Distribution by room availability

- The distribution will start from the largest hotel to the smallest one (big/small in terms of room availability). 

In [167]:
initial_guest_list = sorted(list(pref_by_guest.keys()))
pref_by_guest_random = pref_by_guest

In [168]:
hotel_avail = df_hotel.sort_values('rooms', ascending=False).drop(columns = ['hotel']).values
hotel_avail_list = list(hotel_avail[:,2])

In [169]:
for hotel_ch in hotel_avail_list:
    for guest in initial_guest_list:
        if hotel_matrix[hotel_ch, 2] == hotel_matrix[hotel_ch, 0]:
            break
        else:
            if pref_matrix[guest,:].sum() == 0:
                hotel_list_per_guest = pref_by_guest_random[guest]  # lista contenente le preferenze del guest i-esimo
                for hotel in hotel_list_per_guest:
                    if hotel == hotel_ch:
                        pref_matrix[guest, hotel_ch] = 1
                        hotel_matrix[hotel_ch, 2] += 1
                        break

In [170]:
pref_matrix

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [171]:
hotel_matrix

array([[ 13., 273.,  13.],
       [ 18.,  92.,  18.],
       [ 12., 141.,  12.],
       ...,
       [ 14., 102.,  14.],
       [ 16., 284.,  16.],
       [ 14., 145.,  14.]])

In [172]:
hotel_dataframe1 = pd.DataFrame(hotel_matrix).sort_values(1)
hotel_dataframe1.tail(30)

Unnamed: 0,0,1,2
398,16.0,284.0,16.0
230,9.0,285.0,9.0
92,11.0,285.0,11.0
163,5.0,286.0,0.0
311,15.0,286.0,15.0
387,17.0,286.0,17.0
17,8.0,287.0,8.0
201,10.0,288.0,10.0
96,16.0,288.0,16.0
138,8.0,289.0,2.0


In [173]:
x = hotel_dataframe1.sum(0)
x

0     4617.0
1    71058.0
2     3954.0
dtype: float64

In [174]:
df_pref_matrix[df_pref_matrix.sum(1) == 0]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,390,391,392,393,394,395,396,397,398,399
728,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
773,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1703,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1882,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1936,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3935,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3952,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3957,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3969,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
