In [31]:
import pandas as pd
import numpy as np
import warnings
from datetime import datetime, timedelta

warnings.filterwarnings('ignore')
df_clean = pd.read_csv('data/cleanData.csv')

In [33]:
import pickle
with open(f'rf.sav', 'rb') as f:
    rf = pickle.load(f)

In [432]:
class createModel():
    def __init__(self, model, df_clean):
        self.model = model
        self.df = df_clean
        self.testBase = self.testBaseInit()
        self.testDict = self.testDictInit()
    
    def testBaseInit(self):
        # add categorical columns
        to_categorical = ['zipcode', 'property_type', 'room_type', 'bed_type', 'peak_month']
        list_zipcode = list(set(df_clean.zipcode.values[~df_clean.zipcode.values.isna()]))
        list_property_type = list(set(df_clean.property_type))
        list_room_type = list(set(df_clean.room_type))
        list_bed_type = list(set(df_clean.bed_type))
        list_peak_month = list(set(df_clean.peak_month))
        testBase = pd.MultiIndex.from_product([list_zipcode,list_property_type,list_room_type,list_bed_type, list_peak_month], 
                                              names=to_categorical)
        testBase = pd.DataFrame(index = testBase).reset_index()
        # add continous and binary columns
        not_features = ['listing_id','date','dayWeek','month','host_since','city', 'year', 'day','unavailable'] + to_categorical
        contvar = df_clean.drop(not_features, axis = 1).columns
        for var in contvar: testBase[var] = 0
        # add empty row on top
        testBase.loc[-1] = 0
        testBase.index = testBase.index + 1  # shifting index
        testBase = testBase.sort_index()  # sorting by index
        # save
        return testBase
    
    def testDictInit(self):
        testDict = {'date': '2019-11-11',
                    'zipcode': '02138',
                    'property_type': 'Apartment',
                    'room_type': 'Entire home/apt',
                    'bed_type': 'Real Bed',
                    'accommodates': '2',
                    'guests_included': '2',
                    'extra_people': '0',
                    'bathrooms': '1',
                    'bedrooms': '1',
                    'beds': '1',
                    'security_deposit': '0',
                    'cleaning_fee': '75',
                    'prob_lower': '0'}
        # amenities
        amenities = ['Wifi', 'Heating', 'Smoke detector', 'Essentials', 'Kitchen', 
                     'Carbon monoxide detector', 'Hangers', 'Air conditioning', 'Shampoo', 'Hair dryer',
                     'Iron', 'Laptop friendly workspace', 'TV', 'Washer', 'Dryer', 
                     'Hot water','Fire extinguisher', 'Refrigerator', 'Microwave', 'Self check-in']
        for a in amenities: testDict[a] = 0
        return testDict
    
    def generate_testCase(self, input_dict):
        # edit testDict
        self.testDict = self.testDictInit()
        for k, v in input_dict.items():
            if (len(v) != 0) & (k not in ['date_start','date_end','dynamic']) :
                self.testDict[k] = v
        # insert into testBase
        for k, v in self.testDict.items():
            self.testBase.loc[0,k] = v
            # date feature engineering
            if k in ['zipcode','accommodates', 'guests_included', 'extra_people','bathrooms','bedrooms','beds','security_deposit','cleaning_fee']:
                self.testBase.loc[0, k] = float(v)
            if k == 'date':
                weekday = datetime.strptime(v, '%Y-%m-%d').strftime("%A")
                self.testBase.loc[0,'weekend'] = weekday in (['Friday', 'Saturday'])
                month = datetime.strptime(v, '%Y-%m-%d').month
                self.testBase.loc[0,'peak_month'] = self.classify_month(month)
        self.testCase = self.testBase.loc[[0]]
        return self.testCase      
    
    def get_range(self):    
        columns_needed = ['zipcode','property_type','room_type','bathrooms','bedrooms','beds',
                          'guests_included','weekend','peak_month']
        # standard 1
        index_Flag = True
        for i in columns_needed:
            index_Flag &= self.df[i] == self.testCase[i].values[0]
        # standard 2
        if np.sum(index_Flag) < 10:
            self.df['zipcode_test'] = self.testCase['zipcode'].values[0]
            self.df['zipcode_diff'] = self.df['zipcode_test'] - self.df['zipcode']
            index_Flag = self.df['zipcode_diff'] <= 3
            for i in columns_needed[1:]:
                index_Flag &= self.df[i] == self.testCase[i].values[0]
        # standard 3
        if np.sum(index_Flag) < 10:
            index_Flag = True
            for i in columns_needed[1:-3]:
                index_Flag &= self.df[i] == self.testCase[i].values[0]
        lb,ub = np.quantile(self.df[index_Flag].price_daily, (0.05, 0.95))
        return lb,ub
        
    def dataProcessing(self, df_orig):
        df = df_orig.copy()
        df['zipcode'] = df['zipcode'].astype('category')
        to_categorical = ['zipcode', 'property_type', 'room_type', 'bed_type', 'peak_month']
        df = pd.concat([df, pd.get_dummies(df[to_categorical],
                                           prefix=['zipcode_', 'propertyType_', 'roomType_', 'bedType_', 'peakMonth_'], 
                                           drop_first=True)], axis = 1)
        not_features = ['date','zipcode_test','zipcode_diff','prob_lower'] + to_categorical
        df = df.drop(not_features, axis = 1)
        return df
    
    def get_prob(self, testCase, price):
        testCase.loc[:,'price_daily'] = price
        prob = self.model.predict_proba(testCase)[0][-1] # use the model here
        return prob

    def optimization(self, testCase, lb, ub, prob_lower):
        self.optima_sol = {'Suggested Price':0, 'Probability of Getting Booked':0, 'Expected Return':0}
        for p in np.arange(lb, ub+1, 1):
            prob = self.get_prob(testCase, p)
            if prob >= prob_lower:
                if p* prob >= self.optima_sol['Expected Return']:
                    self.optima_sol = {'Suggested Price': p, 'Probability of Getting Booked': prob, 'Expected Return':p* prob}
        if self.optima_sol['Suggested Price'] == 0: # interpolate where we have little data points
            a = (1-self.get_prob(lb))/lb**2
            p = int(((1-prob_lower)/a)**(1/2))
            self.optima_sol = {'Suggested Price': p, 'Probability of Getting Booked': prob_lower, 'Expected Return':p* prob_lower}
        return self.optima_sol
        
    def generate_dates(date_start, date_end):
        d = datetime.strptime(date_start, '%Y-%m-%d')
        d_end = datetime.strptime(date_end, '%Y-%m-%d')
        dates = [date_start]
        while d != d_end:
            d += timedelta(days=1)
            dates.append(datetime.strftime(d,'%Y-%m-%d'))
        return dates

    def whole_process(self, input_dict, prob_lower = 0):
        dynamic = input_dict['dynamic']
        date_start = input_dict['date_start']
        date_end = input_dict['date_end']
        dates = generate_dates(date_start, date_end)
        if dynamic == '1':
            prices,book_rates,earnings = [],[],[]
            for date in dates:
                input_dict['date'] = date
                # update based on input_dict
                self.generate_testCase(input_dict)
                # process data
                self.testCase_X = self.dataProcessing(self.testBase).loc[[0]]
                # get range
                lb,ub = self.get_range()
                # run optimization
                result = self.optimization(self.testCase_X, lb, ub, prob_lower)
                # organize output
                prices.append(result['Suggested Price'])
                book_rates.append(result['Probability of Getting Booked'])
                earnings.append(result['Expected Return'])
            df_pricing = pd.DataFrame([prices]).T
            df_pricing.columns = ['Suggested Price']
            df_pricing.index = dates
            final_earning, final_pricing = np.sum(earnings), df_pricing
        else: #static
            # get list of candidate prices
            lbs = []
            ubs = []
            for d in dates:
                input_dict['date'] = d
                self.generate_testCase(input_dict)
                lb,ub = self.get_range()
                lbs.append(lb)
                ubs.append(ub)
            lb = np.min(lbs)
            ub = np.max(ubs)
            prices_cand = np.arange(lb,ub+1,1)
            # calculate matrix of book_rates
            book_rates = np.zeros((len(dates),len(prices_cand)))
            for i,d in enumerate(dates):
                input_dict['date'] = d
                self.generate_testCase(input_dict)
                testCase_X = self.dataProcessing(self.testBase).loc[[0]]
                for j,p in enumerate(prices_cand):
                    book_rates[i,j] = self.get_prob(testCase_X, p)
            earnings = np.sum(book_rates*np.array(prices_cand),axis=0)
            suggested_i = np.argmax(earnings)
            final_earning, final_pricing = earnings[suggested_i],prices_cand[suggested_i]
        print('You are expected to earn ${0:4.2f} over the {1} days with the following suggested pricing:'.
              format(final_earning, len(dates)))
        print(final_pricing)
        return final_earning, final_pricing
                        
    def classify_month(self,x):
        if x in [5,6,7,8,9,10]:
            return 'Peak'
        elif x in[3,4,11]:
            return 'Middle'
        else:
            return 'Slack'
        

In [438]:
input_dict = {'dynamic': '1',
              'date_start': '2019-11-07',
              'date_end': '2019-11-10',
             'zipcode': '02128',
             'property_type': 'Apartment',
             'room_type': 'Entire home/apt',
             'bed_type': 'Real Bed',
             'accommodates': '1',
             'guests_included': '',
             'extra_people': '',
             'bathrooms': '',
             'bedrooms': '',
             'beds': '',
             'security_deposit': '',
             'cleaning_fee': '',
             'prob_lower': '',
             'Wifi': '', 
             'Heating': '',
             'Smoke detector': '',
             'Essentials': '', 
             'Kitchen': '', 
             'Carbon monoxide detector': '',
             'Hangers': '', 
             'Air conditioning': '', 
             'Shampoo': '',
             'Hair dryer': '',
             'Iron': '',
             'Laptop friendly workspace': '', 
             'TV': '', 
             'Washer': '', 
             'Dryer': '', 
             'Hot water': '',
             'Fire extinguisher': '',
             'Refrigerator': '', 
             'Microwave': '', 
             'Self check-in': ''}

In [439]:
a = createModel(rf, df_clean)

In [440]:
a.whole_process(input_dict) # Run the Optimization

You are expected to earn $342.00 over the 4 days with the following suggested pricing:
            Suggested Price
2019-11-07            135.0
2019-11-08            150.0
2019-11-09            150.0
2019-11-10            135.0


(342.0,             Suggested Price
 2019-11-07            135.0
 2019-11-08            150.0
 2019-11-09            150.0
 2019-11-10            135.0)