<div >
<img src = "../banner.jpg" />
</div>

# Spatial Data

In [None]:
require("pacman")
p_load("tidyverse","sf","modeldata","geojsonio")

In [None]:
data("ames", package = "modeldata")

In [None]:
dim(ames)

In [None]:
class(ames)

![](figs/mercator.gif)

In [None]:
ames_sf <- sf::st_as_sf(
  ames,
  # "coords" is in x/y order -- so longitude goes first!
  coords = c("Longitude", "Latitude"),
  # Set our coordinate reference system to EPSG:4326,
  # the standard WGS84 geodetic coordinate reference system
  crs = 4326
)

In [None]:
class(ames_sf)

In [None]:
head(ames_sf)

In [None]:
ggplot() +
    geom_sf(data=ames_sf)+
    theme_bw()

In [None]:
p_load("leaflet")

In [None]:
map1<-leaflet()  %>% 
        addTiles()  %>% 
        addCircleMarkers(data=ames_sf)

In [None]:
#workaround to show in Jupyter Notebook (not needed in Rstudio)
p_load("htmlwidgets","IRdisplay")

saveWidget(map1, 'demo1.html', selfcontained = FALSE)
display_html('<iframe src="demo1.html" width="800" height="800"></iframe>')

In [None]:
#Different Tiles
map2<-leaflet()  %>% 
    addProviderTiles(providers$Stamen.Toner)  %>% 
    addCircles(data=ames_sf)

In [None]:
#workaround to show in Jupyter Notebook (not needed in Rstudio)
p_load("htmlwidgets","IRdisplay")

saveWidget(map2, 'demo2.html', selfcontained = FALSE)
display_html('<iframe src="demo2.html" width="800" height="800"></iframe>')

# Clustering

In [None]:
set.seed(101011)
ames_sample<-ames_sf  %>% sample_frac(size=1/3) 
db<- ames_sample  %>%  select(geometry)
head(db)

In [None]:
db<-st_distance(db)
head(db)

In [None]:
db<-units::drop_units(db)

In [None]:
k3 <- kmeans(db, centers = 3, nstart = 25)
str(k3)

In [None]:
ames_sample<- ames_sample %>% mutate(clusters=factor(k3$cluster))

In [None]:
ggplot() +
  geom_sf(data=ames_sample,aes(col=clusters)) + #graficamos las predicciones
  theme_bw()

## ¿Cuántos K (clusters) debemos elegir?

### Método del codo

In [None]:
# función que calcula la SSR within-cluster 
wss <- function(k) {
  kmeans(db, k, nstart = 25 )$tot.withinss
}

# Calculamos y graficamos para k = 1 hasta k = 12
wss_values <- sapply(1:12,wss)

plot(1:12, wss_values,
       type="b", pch = 19, frame = FALSE, 
       xlab="Número de clusters (K)",
       ylab="SSR within-clusters total")

### Coeficiente de Silhouette

In [None]:
p_load("cluster")
# función para extraer el coeficiente de silhouette

avg_sil <- function(k) {
  km.res <- kmeans(db, centers = k, nstart = 25)
  ss <- cluster::silhouette(km.res$cluster, dist(db))
  mean(ss[, 3])
}


# Calcular el coeficiente de silhouette para  k = 2 hasta k = 12
valores_sil <-  sapply(2:12,avg_sil)

plot(2:12, valores_sil,
       type = "b", pch = 19, frame = FALSE, 
       xlab="Número de clusters (K)",
       ylab = "Coeficiente de Silhouette")

In [None]:
k4 <- kmeans(db, centers = 4, nstart = 25)

ames_sample<- ames_sample %>% mutate(clusters=factor(k4$cluster))

In [None]:
ggplot() +
  geom_sf(data=ames_sample,aes(col=clusters)) + #graficamos las predicciones
  theme_bw()

# Super learner

In [None]:
ames<- ames  %>% mutate(logprice=log(Sale_Price))


In [None]:
p_load("caret")
set.seed(1011)
inTrain <- createDataPartition(
  y = ames$logprice,## La variable dependiente u objetivo 
  p = .7, ## Usamos 70%  de los datos en el conjunto de entrenamiento 
  list = FALSE)


train <- ames[ inTrain,]
test  <- ames[-inTrain,]
colnames(train)

In [None]:
p_load("SuperLearner")


In [None]:
# Review available models.
listWrappers()

In [None]:
y<-train$logprice
X<- train  %>% select(Year_Built, Bldg_Type, Gr_Liv_Area)

In [None]:
sl.lib <- c("SL.randomForest", "SL.lm")

# Fit using the SuperLearner package,

fitY <- SuperLearner(Y = y, X = data.frame(X),
    method = "method.NNLS", SL.library = sl.lib)

fitY

In [None]:
yhat_Sup <- predict(fitY, newdata = data.frame(test), onlySL = T)$pred
head(yhat_Sup)

In [None]:
# Customize the defaults for random forest.
custon_ranger = create.Learner("SL.ranger", params = list(num.trees = 1000))

# Look at the object.
custon_ranger$names


In [None]:
custom_rf = create.Learner("SL.randomForest",
                     tune = list(mtry = round(c(1, sqrt(3), 3))))
custom_rf$names

In [None]:
# Customize the defaults for random forest.
custon_glmnet = create.Learner("SL.glmnet", tune = list(alpha = seq(0, 1, length.out=5)))

# Look at the object.
custon_glmnet$names

In [None]:

sl.lib <- c("SL.randomForest", "SL.lm",custon_ranger$names,custon_glmnet$names,custom_rf$names)

# Fit using the SuperLearner package,

fitY <- SuperLearner(Y = y, X = data.frame(X),
    method = "method.NNLS", SL.library = sl.lib)

fitY