In [7]:
import pandas as pd
import numpy as np
import os
import random
from abc import ABCMeta, abstractmethod
from tqdm.notebook import tqdm

In [20]:
class BaseAllocation(metaclass=ABCMeta):
    """
    Create an abstract class where define the principal common parameters and the principal statistics.
    """
    def __init__(self, df_guests: pd.DataFrame, df_hotels: pd.DataFrame, df_preferences: pd.DataFrame, assign_all: bool):
        
        # If True the allocation strategy will allocate all the available guests 
        self.assign_all = assign_all
        
        # Create a column based on the position of guest and hotel in the source tables [0, len of table-1]
        df_guests = df_guests.reset_index().rename(columns={'index': 'guest_index'})
        df_hotels = df_hotels.reset_index().rename(columns={'index': 'hotel_index'})
        
        df_preferences = df_preferences.merge(df_guests[['guest', 'guest_index']], on='guest')
        df_preferences = df_preferences.merge(df_hotels[['hotel', 'hotel_index', 'price']], on='hotel')
        
        self.guests = df_guests
        self.hotels = df_hotels
        self.preferences = df_preferences
        
        self.number_hotel = len(df_hotels)
        self.number_guest = len(df_guests)
        
        # initialize the preferences matrix (# guest, # hotel) where 0 if the guest is not assigned to the specific hotel and 1 otherwise
        self.pref_matrix = np.zeros((self.number_guest, self.number_hotel))
        
        # initialize the hotel availability rooms matrix.
        # column 0 -> room availability, column 1 -> price, column 2 -> number of rooms assign 
        hotel_matrix = df_hotels.drop(columns=['hotel']).set_index("hotel_index").values
        self.hotel_matrix = np.c_[hotel_matrix, np.zeros(self.number_hotel)]
                
        ## Initialize stats   
        # Do not return stats info if flag_stats is False (True if call self._compute_stats())
        self.flag_stats = False  
        self.df_pref_matrix = None
        self.df_hotel_matrix = None
        
        self.all_guest_assign = False
        self.all_room_assign = False
        
        self.satifaction_level = None
        self.business_volume = None
        self.number_ass_guest = None
        self.number_vacant_rooms = None
        self.number_fully_book_hotels = None
        self.number_empty_hotels = None
        self.number_not_full_empty_hotel = None


    def _preprocessing_results(self):
        """
        The function processes / formats the raw outputs of the allocation method to facilitate the analysis part.
        """
        
        # For each guest find the hotel index associated
        ## Add an additional column with all 1 to the pref_matrix. 
        ## This is useful to prevent situation where no hotels are assign to single guest. 
        ## In this case, the guest will be associated to an hotel index not present in the table
        df_pref_matrix = pd.DataFrame(np.argmax(np.c_[self.pref_matrix, np.array([1]*self.number_guest,)], axis=1), 
                                      columns=['hotel_index'])
        df_pref_matrix = df_pref_matrix.reset_index().rename(columns={'index': 'guest_index'})

        # Retrive hotel price. If the hotel index is not in the hotels table the guest will be drop (no hotel assigned)
        df_pref_matrix = df_pref_matrix.merge(self.hotels.drop(columns=["rooms"]), on='hotel_index')

        # Retrive discount for each guest
        df_pref_matrix = df_pref_matrix.merge(self.guests, on='guest_index')

        # Compute the price after the discount
        df_pref_matrix['price_to_set'] = df_pref_matrix.price * (1 - df_pref_matrix.discount)

        # Create resulting table for hotels [rooms available, price per room and occupied rooms]
        self.df_hotel_matrix = pd.DataFrame(self.hotel_matrix, columns=['rooms', 'price', 'occupied_rooms'])
        
        # Compute the satisfaction index
        ## Retrive for each guest the specific priority of the hotel 
        df_pref_matrix = df_pref_matrix.merge(self.preferences[["guest_index", "hotel_index", "priority"]], 
                                              on=["guest_index", "hotel_index"], how='left')
        df_pref_matrix['priority'] = df_pref_matrix['priority'] - 1
        
        # Compute the number of priority declaire by a guest
        number_of_priority = self.preferences.groupby("guest_index", 
                                                      as_index=False)['priority'].count().rename(columns = {'priority': 'number_of_priority'})
        df_pref_matrix = df_pref_matrix.merge(number_of_priority, on='guest_index')
        
        # Compute index
        df_pref_matrix['satisfaction'] = (df_pref_matrix.number_of_priority - df_pref_matrix.priority)/df_pref_matrix.number_of_priority
        df_pref_matrix['satisfaction'] = df_pref_matrix.satisfaction.fillna(0)
        
        self.df_pref_matrix = df_pref_matrix
            
    
    def _compute_stats(self):
        """
        Compute the kpis
        """
        
        # Preprocess the resulting data
        self._preprocessing_results()
        
        # Compute the kpis
        self.satifaction_level = self.df_pref_matrix.satisfaction.sum() / self.number_guest
        self.business_volume = self.df_pref_matrix.price_to_set.sum()
        self.number_ass_guest = len(self.df_pref_matrix)
        self.number_vacant_rooms = self.hotels.rooms.sum() - self.number_ass_guest
        self.number_fully_book_hotels = len(self.df_hotel_matrix[(self.df_hotel_matrix['rooms'] == self.df_hotel_matrix['occupied_rooms'])])
        self.number_empty_hotels = len(self.df_hotel_matrix[(self.df_hotel_matrix['occupied_rooms'] == 0)])
        self.number_not_full_empty_hotel = self.number_hotel - self.number_fully_book_hotels - self.number_empty_hotels
        
        # Create a dict where the key is the variable name and the value is the value of the variable
        stats_dict = {
            f"{self.business_volume=}".split('=')[0].split('.')[1]: self.business_volume,
            f"{self.satifaction_level=}".split('=')[0].split('.')[1]: self.satifaction_level,
            f"{self.number_ass_guest=}".split('=')[0].split('.')[1]: self.number_ass_guest,
            f"{self.number_vacant_rooms=}".split('=')[0].split('.')[1]: self.number_vacant_rooms,
            f"{self.number_fully_book_hotels=}".split('=')[0].split('.')[1]: self.number_fully_book_hotels,
            f"{self.number_empty_hotels=}".split('=')[0].split('.')[1]: self.number_empty_hotels,
            f"{self.number_not_full_empty_hotel=}".split('=')[0].split('.')[1]: self.number_not_full_empty_hotel
        }
        
        # Create the stats table
        self.stats_table = pd.DataFrame.from_dict(stats_dict, orient='index').T
        
        self.flag_stats = True
        
          
    def __str__(self):
        """
        Print the results
        """
        if self.flag_stats:
            result = [
                f'Total business volume: {round(self.business_volume, 2):,} €',
                f'Assigned guests: {self.number_ass_guest}',
                f'Vacant rooms: {self.number_vacant_rooms}',
                f'Number of fully booked hotels: {self.number_fully_book_hotels}',
                f'Number of completely vacant hotels: {self.number_empty_hotels}',
                f'Number of neither full nor vacant hotels: {self.number_not_full_empty_hotel}',
                f'Overall satisfaction is: {round(self.satifaction_level*100, 2)}%'
            ]
            return '\n'.join(result)
        else:
            return "Not available statistics. Launch method assign()"
   
    
    def _access_pref_matrix(self, guest, hotel):
        """
        Assign guest to hotel and increase the number of rooms occupied
        """
        
        # Assign guest to hotel
        self.pref_matrix[guest, hotel] = 1
        
        # Increase room assigned for hotel
        self.hotel_matrix[hotel, 2] += 1
        
    
    @abstractmethod
    def assign(self):
        pass
        #self._compute_stats()

In [21]:
class GuestAllocation(BaseAllocation):
    
    def __init__(self, df_guests: pd.DataFrame, df_hotels: pd.DataFrame, df_preferences: pd.DataFrame, assign_all: bool):
        super(GuestAllocation, self).__init__(df_guests, df_hotels, df_preferences, assign_all)
    
    @abstractmethod
    def _define_guest_order(self):
        pass
        
    def assign_strategy(self): 
        self._define_guest_order()
        for guest in self.guest_order:
            for hotel in self.pref_by_guest[guest]:
                if self.hotel_matrix[hotel, 2] < self.hotel_matrix[hotel, 0]:
                    self._access_pref_matrix(guest, hotel)
                    break
            if self.hotel_matrix[:, 0].sum() == self.hotel_matrix[:, 2].sum():
                break
                
        if self.assign_all:
            
            guest_not_found = np.argwhere(self.pref_matrix.sum(1) == 0)[:,0].tolist()
            guest_not_found = [x for x in self.guest_order if x in guest_not_found]
            
            for guest in guest_not_found:
                
                available_hotel = list(np.argwhere((self.hotel_matrix[:,0] - self.hotel_matrix[:,2]) > 0)[:,0].tolist())
                
                guest_pref = self.preferences[self.preferences.guest_index == guest].copy(deep=True)
                guest_pref['key'] = 1
                
                hotel_donors = self.hotels.loc[self.hotels.hotel_index.isin(available_hotel), ['hotel_index', 'price']]
                hotel_donors['key'] = 1
                
                guest_pref = guest_pref.merge(hotel_donors, how='outer', on='key', suffixes=('_recipient', '_donor'))
                guest_pref['delta'] = (guest_pref.price_donor - guest_pref.price_recipient).abs()
                hotel_index = guest_pref.sort_values(['delta', 'priority'])['hotel_index_donor'].iloc[0]
                self._access_pref_matrix(guest, hotel_index)
        self._compute_stats()

In [15]:
class HotelAllocation(BaseAllocation):
    
    def __init__(self, df_guests: pd.DataFrame, df_hotels: pd.DataFrame, df_preferences: pd.DataFrame, assign_all: bool):
        super(HotelAllocation, self).__init__(df_guests, df_hotels, df_preferences, assign_all)
            
    @abstractmethod
    def _define_hotel_order():
        pass
    
    def assign(self):
        self._define_hotel_order()
        for hotel in self.hotel_order:
            for guest in self.guest_by_hotel[hotel]:
                if self.hotel_matrix[hotel, 2] == self.hotel_matrix[hotel, 0]:
                    break
                elif self.pref_matrix[guest,:].sum() == 0:
                    self._access_pref_matrix(guest, hotel)
            if self.pref_matrix.sum() == self.number_guest:
                break

        if self.assign_all:
            
            guest_not_found = sorted(np.argwhere(self.pref_matrix.sum(1) == 0)[:,0].tolist())

            for guest in guest_not_found:

                available_hotel = list(np.argwhere((self.hotel_matrix[:,0] - self.hotel_matrix[:,2]) > 0)[:,0].tolist())
                hotel_index = self.hotels.loc[self.hotels[self.hotels.hotel_index.isin(available_hotel)].price.idxmin(), 'hotel_index']
                self._access_pref_matrix(guest, hotel_index)

        self._compute_stats()

In [16]:
class RandomGuestAllocation(GuestAllocation):
    
    def __init__(self, df_guests: pd.DataFrame, df_hotels: pd.DataFrame, df_preferences: pd.DataFrame, assign_all: bool=False, random_on:str =None):
        super(RandomGuestAllocation, self).__init__(df_guests, df_hotels, df_preferences, assign_all)
        self.random_on = random_on
                
    def _define_guest_order(self):
        # Create a dictionary with the raw association between guest and the hotels preferences
        # like {0: [21, 32, 65, ...], 1: [...], ...}
        if self.random_on == 'guest':
            
            self.preferences = self.preferences.sort_values(['guest_index', 'priority'])
            self.pref_by_guest = self.preferences.groupby("guest_index").agg({"hotel_index":list}).to_dict()["hotel_index"]
            self.guest_order = random.sample(list(self.pref_by_guest.keys()), len(self.pref_by_guest))
        
        elif self.random_on == 'guest_priority':
            
            self.pref_by_guest = self.preferences.groupby("guest_index").agg({"hotel_index":list}).to_dict()["hotel_index"]
            self.pref_by_guest = {key: random.sample(value, len(value)) for key, value in self.pref_by_guest.items()}    
            self.guest_order = random.sample(list(self.pref_by_guest.keys()), len(self.pref_by_guest))
            
        else:
            
            key = list(range(self.number_guest))
            values = map(lambda x: random.sample(x, len(x)), [range(self.number_hotel)]*self.number_guest)
            self.pref_by_guest = dict (zip(key, values))
            self.guest_order = random.sample(list(self.pref_by_guest.keys()), len(self.pref_by_guest))
            

In [17]:
class OrderGustAllocation(GuestAllocation):
    
    def __init__(self, df_guests: pd.DataFrame, df_hotels: pd.DataFrame, df_preferences: pd.DataFrame, assign_all: bool=False):
        super(OrderGustAllocation, self).__init__(df_guests, df_hotels, df_preferences, assign_all)
        
        
    def _define_guest_order(self):
        # Create a dictionary with the raw association between guest and the hotels preferences
        # like {0: [21, 32, 65, ...], 1: [...], ...}
        self.preferences = self.preferences.sort_values(['guest_index', 'priority'])
        self.pref_by_guest = self.preferences.groupby("guest_index").agg({"hotel_index":list}).to_dict()["hotel_index"]
        self.guest_order = sorted(self.pref_by_guest.keys())


In [18]:
class PriceHotelAllocation(HotelAllocation):
    
    def __init__(self, df_guests: pd.DataFrame, df_hotels: pd.DataFrame, df_preferences: pd.DataFrame, assign_all: bool=False):
        super(PriceHotelAllocation, self).__init__(df_guests, df_hotels, df_preferences, assign_all)
        
    def _define_hotel_order(self):  
        # Create a dictionary with the raw association between guest and the hotels preferences
        # like {21: [0, 2, 5, ...], 65: [...], ...}
        guest_by_hotel = self.preferences.groupby("hotel_index").agg({"guest_index":list}).to_dict()["guest_index"]
        self.guest_by_hotel = {key: sorted(values) for key, values in guest_by_hotel.items()}
        
        self.hotel_order = self.hotels.sort_values('price')['hotel_index'].tolist()
    

In [19]:
class AvailabilityHotelAllocation(HotelAllocation):
    
    def __init__(self, df_guests: pd.DataFrame, df_hotels: pd.DataFrame, df_preferences: pd.DataFrame, assign_all: bool=False):
        super(AvailabilityHotelAllocation, self).__init__(df_guests, df_hotels, df_preferences, assign_all)
        
    def _define_hotel_order(self): 
        # Create a dictionary with the raw association between guest and the hotels preferences
        # like {21: [0, 2, 5, ...], 65: [...], ...}
        guest_by_hotel = self.preferences.groupby("hotel_index").agg({"guest_index":list}).to_dict()["guest_index"]
        self.guest_by_hotel = {key: sorted(values) for key, values in guest_by_hotel.items()}
        
        self.hotel_order = self.hotels.sort_values('rooms', ascending=False)['hotel_index'].tolist()
    