# Analysis of Airbnb data for Amsterdam, Athens, Berlin
##### Data sourced from:
Gyódi, K., & Nawaro, Ł. (2021). Determinants of Airbnb prices in European cities: A spatial econometrics approach (Supplementary Material) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.4446043

In [None]:
library(tidyverse)
library(repr)
library(infer)
library(cowplot)
library(broom)
library(GGally)
library(AER)

In [None]:
## Initial loading and wrangling. Ensure directory matches. 
amsterdam_weekdays <- read.csv("amsterdam_weekdays.csv") %>% as_tibble() %>% select(-X) %>% mutate(city = "amsterdam", day_type = "weekday")
amsterdam_weekends <- read.csv("amsterdam_weekends.csv") %>% as_tibble() %>% select(-X) %>% mutate(city = "amsterdam", day_type = "weekend")

athens_weekdays <- read.csv("athens_weekdays.csv") %>% as_tibble() %>% select(-X) %>% mutate(city = "athens", day_type = "weekday")
athens_weekends <- read.csv("athens_weekends.csv") %>% as_tibble() %>% select(-X) %>% mutate(city = "athens", day_type = "weekend")

berlin_weekdays <- read.csv("berlin_weekdays.csv") %>% as_tibble() %>% select(-X) %>% mutate(city = "berlin", day_type = "weekday")
berlin_weekends <- read.csv("berlin_weekends.csv") %>% as_tibble() %>% select(-X) %>% mutate(city = "berlin", day_type = "weekend")

airbnb <- bind_rows(amsterdam_weekdays, amsterdam_weekends, 
                   athens_weekdays, athens_weekends, 
                   berlin_weekdays, berlin_weekends) %>% 
                        mutate(room_type = as.factor(room_type), room_shared = as.factor(room_shared), 
                               multi = as.factor(multi), biz = as.factor(biz),
                               room_private = as.factor(room_private), host_is_superhost = as.factor(host_is_superhost), 
                                city = as.factor(city), day_type = as.factor(day_type))
head(airbnb)

In [None]:
# Main developer: Zhuo Liu

# Packages needed for LASSO

library(tidymodels)
library(glmnet)

In [None]:
# Main developer: Zhuo Liu

# Use LASSO

set.seed(5033)

# convert categorical variables to dummy variables manually
airbnb_clean <- airbnb |>
    na.omit() |>
    mutate(room_sharedYes = if_else(room_shared == "True",1,0),
          room_privateYes = if_else(room_private == "True",1,0),
          host_is_superhostYes = if_else(host_is_superhost == "True",1,0),
          cityathens = if_else(city == "athens",1,0),
          cityberlin = if_else(city == "berlin",1,0),
          day_typeweekend = if_else(day_type == "weekend",1,0)) |>
    select(-room_type, -room_shared, -room_private, -host_is_superhost, -city, -day_type)

head(airbnb_clean)

split <- initial_split(data = airbnb_clean, prop = 0.7)
training_df <- training(split)
testing_df <- testing(split)

X_train <- as.matrix(training_df[,-1])
Y_train <- as.matrix(training_df[,1])

X_test <- as.matrix(testing_df[,-1])
Y_test <- as.matrix(testing_df[,1])

cv_LASSO <- cv.glmnet(
  x = X_train, y = Y_train,
  alpha = 1,
  lambda = exp(seq(-21, 21, 0.1))
)

lambda <- cv_LASSO$lambda.min
lambda

coef(cv_LASSO, s = "lambda.min")

test_pred_LASSO_min <- 
            predict(cv_LASSO, 
            newx = X_test, 
            s = "lambda.min")
prediction <- tibble(Y_test,LASSO_prediction = test_pred_LASSO_min) %>% head()
prediction

In [None]:
# Calculate test_RMSE of LASSO
test_RMSE_LASSO <- summary(prediction,RMSE = sqrt(sum((LASSO_prediction - Y_test) ^ 2) / nrow(prediction)))
test_RMSE_LASSO