## Predicting Airbnb Prices


## Model 1

In [34]:
# Import gurobi and numpy
from gurobipy import *
import numpy as np
import pandas as pd

In [35]:
# Load training and test data
train = pd.read_csv('AirbnbTrain.csv')
test = pd.read_csv('AirbnbTest.csv')

In [36]:
shape_train = train.shape
# number of listings i 
listings = shape_train[0]
# number of features j (indepdent variables)
features = shape_train[1] - 1 
# price y
price = train['price']
# all x (independent variables)
X = train.drop('price', axis=1)

In [37]:
# Define Model
mod = Model()


# Define variables - beta coefficient (we are optimizing beta)
# allow beta to go negative.
beta = mod.addVars(features, lb=-GRB.INFINITY, vtype=GRB.CONTINUOUS)
intercept = mod.addVar(vtype=GRB.CONTINUOUS)

# dummy variable for least absolute deviations
abs_dev = mod.addVars(listings, lb=0, vtype=GRB.CONTINUOUS)


# Add Constraints 
# absolute value pair
for i in range(listings):
    estimate = intercept + sum(beta[j] * X.iloc[i, j] for j in range(features))
    mod.addConstr(-(price[i] - estimate) <= abs_dev[i])
    mod.addConstr((price[i] - estimate) <= abs_dev[i])


# Objective function
abs_dev_sum = sum(abs_dev[i] for i in range(listings))
mod.setObjective(abs_dev_sum / listings, GRB.MINIMIZE)

mod.update()

mod.optimize()

Gurobi Optimizer version 11.0.0 build v11.0.0rc2 (mac64[arm] - Darwin 22.6.0 22G320)

CPU model: Apple M2 Pro
Thread count: 10 physical cores, 10 logical processors, using up to 10 threads

Optimize a model with 3400 rows, 1713 columns and 44772 nonzeros
Model fingerprint: 0xa312114c
Coefficient statistics:
  Matrix range     [5e-01, 5e+02]
  Objective range  [6e-04, 6e-04]
  Bounds range     [0e+00, 0e+00]
  RHS range        [1e+01, 2e+03]
Presolve time: 0.01s
Presolved: 3400 rows, 1713 columns, 44772 nonzeros

Concurrent LP optimizer: primal simplex, dual simplex, and barrier
Showing barrier log only...

Ordering time: 0.00s

Barrier statistics:
 Dense cols : 13
 Free vars  : 12
 AA' NZ     : 3.252e+04
 Factor NZ  : 3.519e+04 (roughly 2 MB of memory)
 Factor Ops : 4.819e+05 (less than 1 second per iteration)
 Threads    : 1

                  Objective                Residual
Iter       Primal          Dual         Primal    Dual     Compl     Time
   0   2.17619797e+03  0.00000000e+

### Model 1 Coefficients

In [38]:
if mod.status == GRB.OPTIMAL:
    for j in range(features):
        print(f'{X.columns[j]:<21}:{beta[j].X:>10.5f}')
else:
    print('No optimal solution found')


latitude             : 292.92731
longitude            :  84.73530
Entire home          :  33.22413
accommodates         :  10.58708
bathrooms            :  28.74156
bedrooms             :  20.21744
beds                 :  -2.81048
cleaning_fee         :   0.41825
minimum_nights       :  -1.81668
number_of_reviews    :  -0.02919
review_scores_rating :   0.27330
instant_bookable     :   3.95364


### Model 1 Prediction Error

In [39]:
# Prediction error for each observation in the test set
test['prediction'] = sum(beta[j].X * test.iloc[:, j] for j in range(features))

# Prediction error in $/night
pred_error = (test['price'] - test['prediction']).abs().mean()

print(f'$ {pred_error:.2f}/night')

$ 34.61/night


## Model 2

In [40]:
# a dummy variable z for selection (1 if selected, else 0)
z = mod.addVars(features, vtype=GRB.BINARY)

# when not selected, force beta = 0
for j in range(features):
    mod.addConstr(beta[j] <= 10000 * z[j])
    mod.addConstr(beta[j] >= -10000 * z[j])

# maximum of 3 features selected
mod.addConstr(z.sum() <= 3)

mod.update()

mod.optimize()

Gurobi Optimizer version 11.0.0 build v11.0.0rc2 (mac64[arm] - Darwin 22.6.0 22G320)

CPU model: Apple M2 Pro
Thread count: 10 physical cores, 10 logical processors, using up to 10 threads

Optimize a model with 3425 rows, 1725 columns and 44832 nonzeros
Variable types: 1713 continuous, 12 integer (12 binary)
Coefficient statistics:
  Matrix range     [5e-01, 1e+04]
  Objective range  [6e-04, 6e-04]
  Bounds range     [1e+00, 1e+00]
  RHS range        [3e+00, 2e+03]
Found heuristic solution: objective 144.9682353
Presolve removed 828 rows and 414 columns
Presolve time: 0.03s
Presolved: 2597 rows, 1311 columns, 33870 nonzeros
Variable types: 1299 continuous, 12 integer (12 binary)

Root relaxation: objective 3.573002e+01, 1516 iterations, 0.12 seconds (0.40 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0   35.73002    0    9  144.96824   35.73002  75.4%     - 

### Three Variables Selected

In [41]:
if mod.status == GRB.OPTIMAL:
    for j in range(features):
        if z[j].X != 0:
            print(f'{X.columns[j]:<21}:{beta[j].X:>10.5f}')
else:
    print('No optimal solution found')

Entire home          :  49.16667
accommodates         :  13.33333
bedrooms             :  32.50000


### New Prediction Error ($/night)

In [42]:
# Prediction error for each observation in the test set
test['prediction'] = sum(beta[j].X * test.iloc[:, j] for j in range(features))

# Prediction error in $/night
pred_error = (test['price'] - test['prediction']).abs().mean()

print(f'$ {pred_error:.2f}/night')

$ 38.19/night


## Model 3

In [43]:
# Define Model
mod = Model()


# Define variables - beta coefficient (we are optimizing beta)
# allow beta to go negative.
beta = mod.addVars(features, lb=-GRB.INFINITY, vtype=GRB.CONTINUOUS)

# dummy variable for least absolute deviations
abs_dev = mod.addVars(listings, lb=0, vtype=GRB.CONTINUOUS)


# Add Constraints 
# absolute value pair
for i in range(listings):
    estimate = sum(beta[j] * X.iloc[i, j] for j in range(features))
    mod.addConstr(-(price[i] - estimate) <= abs_dev[i])
    mod.addConstr((price[i] - estimate) <= abs_dev[i])



# a dummy variable z for selection (1 if selected, else 0)
z = mod.addVars(features, vtype=GRB.BINARY)


# for beds, z  = 1
mod.addConstr(z[6] == 1)

# all other variables: when not selected, force beta = 0
for j in range(features):
    mod.addConstr(beta[j] <= 10000 * z[j])
    mod.addConstr(beta[j] >= -10000 * z[j])

# maximum of 3 features selected
mod.addConstr(z.sum() == 3)


# Objective function
abs_dev_sum = sum(abs_dev[i] for i in range(listings))
mod.setObjective(abs_dev_sum / listings, GRB.MINIMIZE)

mod.update()

mod.optimize()

Gurobi Optimizer version 11.0.0 build v11.0.0rc2 (mac64[arm] - Darwin 22.6.0 22G320)

CPU model: Apple M2 Pro
Thread count: 10 physical cores, 10 logical processors, using up to 10 threads

Optimize a model with 3426 rows, 1724 columns and 41433 nonzeros
Model fingerprint: 0x3b102675
Variable types: 1712 continuous, 12 integer (12 binary)
Coefficient statistics:
  Matrix range     [5e-01, 1e+04]
  Objective range  [6e-04, 6e-04]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 2e+03]
Found heuristic solution: objective 144.9682353
Presolve removed 831 rows and 415 columns
Presolve time: 0.02s
Presolved: 2595 rows, 1309 columns, 31293 nonzeros
Variable types: 1298 continuous, 11 integer (11 binary)

Root relaxation: objective 3.573002e+01, 1503 iterations, 0.12 seconds (0.40 work units)

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0   35.73002    0    9  144.

### Variables selected

In [44]:
if mod.status == GRB.OPTIMAL:
    for j in range(features):
        if z[j].X != 0:
            print(f'{X.columns[j]:<21}:{beta[j].X:>10.5f}')
else:
    print('No optimal solution found')

Entire home          :  67.87500
bedrooms             :  47.37500
beds                 :  12.12500


### New Prediction Error

In [45]:
# Prediction error for each observation in the test set
test['prediction'] = sum(beta[j].X * test.iloc[:, j] for j in range(features))

# Prediction error in $/night
pred_error = (test['price'] - test['prediction']).abs().mean()

print(f'$ {pred_error:.2f}/night')

$ 38.60/night
