In [4]:
import pandas as pd
import numpy as np
import os
import random
from tqdm.notebook import tqdm
from collections import defaultdict

In [5]:
df_guests = pd.read_csv("guests.csv")
df_hotels = pd.read_csv("hotels.csv")
df_prefs = pd.read_csv("preferences.csv")

In [6]:
class BaseAllocation():
    
    def __init__(self, df_guests: pd.DataFrame, df_hotels: pd.DataFrame, df_preferences: pd.DataFrame, assign_all :bool):
        
        self.assign_all = assign_all
        
        # Create a column based on the position of guest and hotel in the table [0, len of table-1]
        df_guests = df_guests.reset_index().rename(columns={'index': 'guest_index'})
        df_hotels = df_hotels.reset_index().rename(columns={'index': 'hotel_index'})
        
        df_preferences = df_preferences.merge(df_guests[['guest', 'guest_index']], on='guest')
        df_preferences = df_preferences.merge(df_hotels[['hotel', 'hotel_index', 'price', 'rooms']], on='hotel')
        
        self.guests = df_guests
        self.hotels = df_hotels
        self.preferences = df_preferences
        
        self.number_hotel = len(df_hotels)
        self.number_guest = len(df_guests)
        
        # initialize the preferences matrix (# guest, # hotel) where 0 if the guest is not assigned to the specific hotel and 1 if he is assigned
        self.pref_matrix = np.zeros((self.number_guest, self.number_hotel))
        
        # initialize the hotel availability rooms matrix column 1 -> room availability, column 2 -> price, column 3 -> number of rooms assign 
        hotel_matrix = df_hotels.drop(columns=['hotel']).set_index("hotel_index").values
        self.hotel_matrix = np.c_[hotel_matrix, np.zeros(self.number_hotel)]
                
        # initialize stats
        self.df_pref_matrix = None
        self.df_hotel_matrix = None
        
        self.all_guest_assign = False
        self.all_room_assign = False
        
        self.flag_stats = False  # True if called self._compute_stats()
        self.satifaction_level = None
        self.business_volume = None
        self.number_ass_guest = None
        self.number_vacant_rooms = None
        self.number_fully_book_hotels = None
        self.number_empty_hotels = None
        self.number_not_full_empty_hotel = None
        self.partially_vacant_hotels = list()


    def _preprocessing_results(self):
        # Trova il massimo indice (index dell'hotel) per colonna per ogni guest. Se il guest non è stato assegnato viene generato un indice non indicato nelle tabelle degli hotel
        df_pref_matrix = pd.DataFrame(np.argmax(np.c_[self.pref_matrix, np.array([1]*self.number_guest,)], axis=1), columns=['hotel_index'])
        df_pref_matrix = df_pref_matrix.reset_index().rename(columns={'index': 'guest_index'})

        # # Assegno sconto per ogni cliente
        df_pref_matrix = df_pref_matrix.merge(self.guests, on='guest_index')

        # # Assegno prezzo per ogni hotel e filtra via i guest che non hanno una prenotazione (non trova hotel con ID 400)
        df_pref_matrix = df_pref_matrix.merge(self.hotels.drop(columns=["rooms"]), on='hotel_index')

        # # Calcola il prezzo da applicare ad ogni guest
        df_pref_matrix['price_to_set'] = df_pref_matrix.price * (1 - df_pref_matrix.discount)

        # # Trasformiamo hotel_matrix in un dataframe per calcolare numero di hotel liberi/pieni
        self.df_hotel_matrix = pd.DataFrame(self.hotel_matrix, columns=['rooms', 'price', 'occupied_rooms'])
        
        df_pref_matrix = df_pref_matrix.merge(self.preferences[["guest_index", "hotel_index", "priority"]], on=["guest_index", "hotel_index"], how='left')
        df_pref_matrix['priority'] = df_pref_matrix['priority'] - 1

        number_of_priority = self.preferences.groupby("guest_index", as_index=False)['priority'].count().rename(columns={'priority': 'number_of_priority'})
        df_pref_matrix = df_pref_matrix.merge(number_of_priority, on='guest_index')

        df_pref_matrix['satisfaction'] = (df_pref_matrix.number_of_priority - df_pref_matrix.priority)/df_pref_matrix.number_of_priority
        
        df_pref_matrix['satisfaction'] = df_pref_matrix.satisfaction.fillna(0)
        
        self.df_pref_matrix = df_pref_matrix
            
    
    def _compute_stats(self):
        
        self._preprocessing_results()
        
        self.satifaction_level = self.df_pref_matrix.satisfaction.sum() / self.number_guest
        self.business_volume = self.df_pref_matrix.price_to_set.sum()
        self.number_ass_guest = len(self.df_pref_matrix)
        self.number_vacant_rooms = self.hotels.rooms.sum() - self.number_ass_guest
        self.number_fully_book_hotels = len(self.df_hotel_matrix[(self.df_hotel_matrix['rooms'] == self.df_hotel_matrix['occupied_rooms'])])
        self.number_empty_hotels = len(self.df_hotel_matrix[(self.df_hotel_matrix['occupied_rooms'] == 0)])
        self.number_not_full_empty_hotel = self.number_hotel - self.number_fully_book_hotels - self.number_empty_hotels
        
        self.flag_stats = True
    
    def partial_hotel_booking_ratio(self):
        for h in range(len(self.hotels)):
            if self.hotel_matrix[h, 2] < self.hotel_matrix[h, 0] and self.hotel_matrix[h, 2]!=0:
                self.partially_vacant_hotels.append(self.hotel_matrix[h, 2]/self.hotel_matrix[h, 0])
        return self.partially_vacant_hotels
        
    def __str__(self):
        if self.flag_stats:
            result = [
                f'Total business volume: {round(self.business_volume, 2):,} €',
                f'Assigned guests: {self.number_ass_guest}',
                f'Vacant rooms: {self.number_vacant_rooms}',
                f'Number of fully booked hotels: {self.number_fully_book_hotels}',
                f'Number of completely vacant hotels: {self.number_empty_hotels}',
                f'Number of neither full nor vacant hotels: {self.number_not_full_empty_hotel}',
                f'Overall satisfaction is: {round(self.satifaction_level*100, 2)}%'
            ]
            return '\n'.join(result)
        else:
            return "Not available statistics. Launch the method assign()"
   
    
    def _access_pref_matrix(self, guest, hotel):
        self.pref_matrix[guest, hotel] = 1
        self.hotel_matrix[hotel, 2] += 1

        
    def assign(self): 
        self._define_guest_order()
        for guest in self.guest_order:
            for hotel in self.pref_by_guest[guest]:
                if self.hotel_matrix[hotel, 2] < self.hotel_matrix[hotel, 0]:
                    self._access_pref_matrix(guest, hotel)
                    break
            if self.hotel_matrix[:, 0].sum() == self.hotel_matrix[:, 2].sum():
                break
                
        if self.assign_all:
            
            guest_not_found = np.argwhere(self.pref_matrix.sum(1) == 0)[:,0].tolist()
            guest_not_found = [x for x in self.guest_order if x in guest_not_found]
            
            
            for guest in guest_not_found:
                
                available_hotel = list(np.argwhere((self.hotel_matrix[:,0] - self.hotel_matrix[:,2]) > 0)[:,0].tolist())
                
                guest_pref = self.preferences[self.preferences.guest_index == guest].copy(deep=True)
                guest_pref['key'] = 1
                
                hotel_donors = self.hotels.loc[self.hotels.hotel_index.isin(available_hotel), ['hotel_index', 'price']]
                hotel_donors['key'] = 1
                
                guest_pref = guest_pref.merge(hotel_donors, how='outer', on='key', suffixes=('_recipient', '_donor'))
                guest_pref['delta'] = (guest_pref.price_donor - guest_pref.price_recipient).abs()
                hotel_index = guest_pref.sort_values(['delta', 'priority'])['hotel_index_donor'].iloc[0]
                self._access_pref_matrix(guest, hotel_index)
        self._compute_stats()


In [7]:
class RandomGuestAllocation(BaseAllocation):
    
    def __init__(self, df_guests: pd.DataFrame, df_hotels: pd.DataFrame, df_preferences: pd.DataFrame, assign_all: bool=False, random_on:str =None):
        super(RandomGuestAllocation, self).__init__(df_guests, df_hotels, df_preferences, assign_all)
        self.random_on = random_on
                
    def _define_guest_order(self):
        # Create a dictionary with the raw association between guest and the hotels preferences
        # like {0: [21, 32, 65, ...], 1: [...], ...}
        if self.random_on == 'guest':
            
            self.preferences = self.preferences.sort_values(['guest_index', 'priority'])
            self.pref_by_guest = self.preferences.groupby("guest_index").agg({"hotel_index":list}).to_dict()["hotel_index"]
            self.guest_order = random.sample(list(self.pref_by_guest.keys()), len(self.pref_by_guest))
        
        elif self.random_on == 'guest_priority':
            
            self.pref_by_guest = self.preferences.groupby("guest_index").agg({"hotel_index":list}).to_dict()["hotel_index"]
            self.pref_by_guest = {key: random.sample(value, len(value)) for key, value in self.pref_by_guest.items()}    
            self.guest_order = random.sample(list(self.pref_by_guest.keys()), len(self.pref_by_guest))
            
        else:
            
            key = list(range(self.number_guest))
            values = map(lambda x: random.sample(x, len(x)), [range(self.number_hotel)]*self.number_guest)
            self.pref_by_guest = dict (zip(key, values))
            self.guest_order = random.sample(list(self.pref_by_guest.keys()), len(self.pref_by_guest))
            

In [8]:
class OrderGustAllocation(BaseAllocation):
    
    def __init__(self, df_guests: pd.DataFrame, df_hotels: pd.DataFrame, df_preferences: pd.DataFrame, assign_all: bool=False):
        super(OrderGustAllocation, self).__init__(df_guests, df_hotels, df_preferences, assign_all)
        
        
    def _define_guest_order(self):
        # Create a dictionary with the raw association between guest and the hotels preferences
        # like {0: [21, 32, 65, ...], 1: [...], ...}
        self.preferences = self.preferences.sort_values(['guest_index', 'priority'])
        self.pref_by_guest = self.preferences.groupby("guest_index").agg({"hotel_index":list}).to_dict()["hotel_index"]
        self.guest_order = sorted(self.pref_by_guest.keys())


In [9]:
class PriceHotelAllocation(BaseAllocation):
    
    def __init__(self, df_guests: pd.DataFrame, df_hotels: pd.DataFrame, df_preferences: pd.DataFrame, assign_all: bool=False):
        super(PriceHotelAllocation, self).__init__(df_guests, df_hotels, df_preferences, assign_all)
        
    def _define_guest_order(self):
        # Create a dictionary with the raw association between guest and the hotels preferences
        # like {0: [21, 32, 65, ...], 1: [...], ...}
        self.preferences = self.preferences.sort_values(['guest_index', 'price'])
        self.pref_by_guest = self.preferences.groupby("guest_index").agg({"hotel_index":list}).to_dict()["hotel_index"]
        self.guest_order = sorted(self.pref_by_guest.keys())
                 
        
    

In [10]:
class AvailabilityHotelAllocation(BaseAllocation):
    
    def __init__(self, df_guests: pd.DataFrame, df_hotels: pd.DataFrame, df_preferences: pd.DataFrame, assign_all: bool=False):
        super(AvailabilityHotelAllocation, self).__init__(df_guests, df_hotels, df_preferences, assign_all)
        
    def _define_guest_order(self):
        # Create a dictionary with the raw association between guest and the hotels preferences
        # like {0: [21, 32, 65, ...], 1: [...], ...}
        self.preferences = self.preferences.sort_values(['guest_index', 'rooms'], ascending=[True, False])
        self.pref_by_guest = self.preferences.groupby("guest_index").agg({"hotel_index":list}).to_dict()["hotel_index"]
        self.guest_order = sorted(self.pref_by_guest.keys())
    

In [11]:
c =PriceHotelAllocation(df_guests, df_hotels, df_prefs)
c.assign()

In [12]:
c.preferences

Unnamed: 0,guest,hotel,priority,guest_index,hotel_index,price,rooms
3638,guest_1,hotel_22,16,0,21,61,17
1446,guest_1,hotel_129,7,0,128,85,19
2865,guest_1,hotel_90,13,0,89,85,17
3092,guest_1,hotel_269,14,0,268,127,5
238,guest_1,hotel_207,2,0,206,128,8
...,...,...,...,...,...,...,...
84447,guest_4000,hotel_81,3,3999,80,252,16
2114,guest_4000,hotel_1,9,3999,0,273,13
42238,guest_4000,hotel_352,2,3999,351,276,8
84661,guest_4000,hotel_278,12,3999,277,279,18


In [13]:
c.number_ass_guest

3926

In [14]:
c.number_fully_book_hotels

321

In [15]:
c.business_volume

564283.0599999999

In [16]:
d =AvailabilityHotelAllocation(df_guests, df_hotels, df_prefs)
d.assign()

In [17]:
d.number_ass_guest

3955