# Analysis of Airbnb data for Amsterdam, Athens, Berlin
##### Data sourced from:
Gyódi, K., & Nawaro, Ł. (2021). Determinants of Airbnb prices in European cities: A spatial econometrics approach (Supplementary Material) [Data set]. Zenodo. https://doi.org/10.5281/zenodo.4446043

In [None]:
library(tidyverse)
library(repr)
library(infer)
library(cowplot)
library(broom)
library(GGally)
library(AER)

In [None]:
## Initial loading and wrangling. Ensure directory matches. 
amsterdam_weekdays <- read.csv("amsterdam_weekdays.csv") %>% as_tibble() %>% select(-X) %>% mutate(city = "amsterdam", day_type = "weekday")
amsterdam_weekends <- read.csv("amsterdam_weekends.csv") %>% as_tibble() %>% select(-X) %>% mutate(city = "amsterdam", day_type = "weekend")

athens_weekdays <- read.csv("athens_weekdays.csv") %>% as_tibble() %>% select(-X) %>% mutate(city = "athens", day_type = "weekday")
athens_weekends <- read.csv("athens_weekends.csv") %>% as_tibble() %>% select(-X) %>% mutate(city = "athens", day_type = "weekend")

berlin_weekdays <- read.csv("berlin_weekdays.csv") %>% as_tibble() %>% select(-X) %>% mutate(city = "berlin", day_type = "weekday")
berlin_weekends <- read.csv("berlin_weekends.csv") %>% as_tibble() %>% select(-X) %>% mutate(city = "berlin", day_type = "weekend")

airbnb <- bind_rows(amsterdam_weekdays, amsterdam_weekends, 
                   athens_weekdays, athens_weekends, 
                   berlin_weekdays, berlin_weekends) %>% 
                        mutate(room_type = as.factor(room_type), room_shared = as.factor(room_shared), 
                               multi = as.factor(multi), biz = as.factor(biz),
                               room_private = as.factor(room_private), host_is_superhost = as.factor(host_is_superhost), 
                                city = as.factor(city), day_type = as.factor(day_type))

In [None]:
library(tidymodels)
library(glmnet)

In [None]:
#Use lasso
set.seed(5033)

split <- initial_split(data = airbnb, prop = 0.3)
training_df <- training(split)
testing_df <- testing(split)

Housing_X_train <- as.matrix(training_Housing[,-20])
Housing_Y_train <- as.matrix(training_Housing[,20])

Housing_X_test <- as.matrix(testing_Housing[,-20])
Housing_Y_test <- as.matrix(testing_Housing[,20])

Housing_LASSO <- glmnet(
  x = Housing_X_train, y = Housing_Y_train,
  alpha = 1,
  lambda = exp(seq(5, 12, 0.1))
)

Housing_cv_LASSO <- cv.glmnet(
  x = Housing_X_train, y = Housing_Y_train,
  alpha = 1,
  lambda = exp(seq(5, 12, 0.1))
)

coef(Housing_cv_LASSO, s = "lambda.min")

Housing_test_pred_LASSO_min <- 
            predict(Housing_cv_LASSO, 
            newx = Housing_X_test, 
            s = "lambda.min")
tibble(Housing_Y_test,LASSO_prediction = Housing_test_pred_LASSO_min) %>% head()