In [20]:
import pandas as pd
import numpy as np
import os
import random

In [21]:
df_guest = pd.read_csv("guests.csv")
df_hotel = pd.read_csv("hotels.csv")
df_pref = pd.read_csv("preferences.csv")

In [22]:
df_pref.head()

Unnamed: 0,guest,hotel,priority
0,guest_1,hotel_168,1
1,guest_1,hotel_207,2
2,guest_1,hotel_222,3
3,guest_1,hotel_124,4
4,guest_1,hotel_223,5


In [23]:
df_guest.head()

Unnamed: 0,guest,discount
0,guest_1,0.09
1,guest_2,0.0
2,guest_3,0.07
3,guest_4,0.0
4,guest_5,0.1


In [24]:
df_hotel

Unnamed: 0,hotel,rooms,price
0,hotel_1,13,273
1,hotel_2,18,92
2,hotel_3,12,141
3,hotel_4,18,157
4,hotel_5,7,298
...,...,...,...
395,hotel_396,5,212
396,hotel_397,12,68
397,hotel_398,14,102
398,hotel_399,16,284


In [25]:
df_pref['guest_index'] = df_pref.guest.str.split("_").apply(lambda x: int(x[1])-1)
df_pref['hotel_index'] = df_pref.hotel.str.split("_").apply(lambda x: int(x[1])-1)

In [26]:
df_hotel['hotel_index'] = df_hotel.hotel.str.split("_").apply(lambda x: int(x[1])-1)

# Ordinamento Casuale

1. Definire un ordinamento di assegnazione basato sulla randomizzazione degli utenti e delle loro preferenze
2. Allocazione guest - hotel. Per ogni guest e fintantoché ho hotel da assegnare:
    - sorteggiare un hotel nelle preferenze
    - se la disponibilità delle camere dell'hotel è > 0 assegnare guest all'hotel
    - altrimenti ripetere 2
    - se non ci sono più hotel per un dato guest. Il guest non va in vacanza! 

## Definire un ordinamento di assegnazione iniziale

{"guest_x": 
    
    \["hotel_c", "hotel_a", ..., "hotel_s"\],
    
    ...}
    
Dove  \["hotel_c", "hotel_a", ..., "hotel_s"\] è una lista di alberghi ordinata per preferenza

In [27]:
df_pref.head()

Unnamed: 0,guest,hotel,priority,guest_index,hotel_index
0,guest_1,hotel_168,1,0,167
1,guest_1,hotel_207,2,0,206
2,guest_1,hotel_222,3,0,221
3,guest_1,hotel_124,4,0,123
4,guest_1,hotel_223,5,0,222


### Inizializzazione

In [28]:
pref_by_guest = df_pref.groupby("guest_index").agg({"hotel_index":list}).to_dict()["hotel_index"]

In [29]:
# Creazione matrice delle preferenze
pref_matrix = np.zeros((len(df_guest), len(df_hotel)))

In [30]:
hotel_matrix = df_hotel.drop(columns=['hotel']).set_index("hotel_index").values

In [31]:
# Terza colonna per tenere traccia del numero di stanze occupate
hotel_matrix = np.c_[hotel_matrix, np.zeros(len(df_hotel))]

### Assegnazione

In [13]:
# Crea un ordinamento casuale di guest
initial_guest_list = list(pref_by_guest.keys())

pref_by_guest_random = {guest: random.sample(pref_by_guest[guest], len(pref_by_guest[guest])) for guest in initial_guest_list}


In [32]:
# Mantieni l'ordinamento di prenotazione
initial_guest_list = sorted(list(pref_by_guest.keys()))

pref_by_guest_random = pref_by_guest

In [None]:
import geopanda

In [33]:
for guest in initial_guest_list:
    hotel_list_per_guest = pref_by_guest_random[guest]  # lista contenente le preferenze del guest i-esimo
    for hotel in hotel_list_per_guest:
        # Verifica se c'è posto
        if pref_matrix[:,hotel].sum() < hotel_matrix[hotel, 0]:
            pref_matrix[guest, hotel] = 1
            hotel_matrix[hotel, 2] += 1
            break

In [34]:
len(initial_guest_list)

4000

In [35]:
df_pref_matrix = pd.DataFrame(pref_matrix)

In [36]:
df_pref_matrix[df_pref_matrix.sum(1) == 0]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,390,391,392,393,394,395,396,397,398,399
1315,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1689,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2651,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2736,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2751,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2793,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2910,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2937,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3009,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3026,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [37]:
hotel_matrix.sum(0)

array([ 4617., 71058.,  3975.])