<div >
<img src = "figs/banner.png" />
</div>

# Clase 04: Prepocesamiento de un conjunto de datos

##  Instalar/llamar las librerías de la clase

In [None]:
require(pacman) 
p_load(tidyverse,rio,skimr,viridis,
       gstat, #variogram
       sf, # Leer/escribir/manipular datos espaciales
       leaflet, # Visualizaciones dinámicas
       tmaptools, # geocode_OSM()
       nngeo, # st_nn function
       spdep, # Construct neighbours list from polygon list 
       osmdata) # Get OSM's data

## Cargando los Datos


### Cargando Datos de Propiedades

In [None]:
houses <- import("input/house_prices.rds")

In [None]:
class(houses)

In [None]:
skim(houses)

#### To spatial

##### La Tierra no es plana.

![](figs/Projections.gif)

![](figs/mercator.gif)

![](figs/Mexico_Greenland.gif)

In [None]:
## dataframe to sf
houses <- st_as_sf(x = houses, ## datos
                    # "coords" is in x/y order -- so longitude goes first!
                   coords=c("lon","lat"), ## coordenadas
                   remove=FALSE,
  # Set our coordinate reference system to EPSG:4326,
  # the standard WGS84 geodetic coordinate reference system
                   crs=4326) ## CRS

In [None]:
map1<- leaflet() %>% addTiles() %>% addCircleMarkers(data=houses[1:100,])

In [None]:
#workaround to show in Jupyter Notebook (not needed in Rstudio)
p_load("htmlwidgets","IRdisplay")

saveWidget(map1, 'demo1.html', selfcontained = FALSE)
display_html('<iframe src="demo1.html" width="800" height="800"></iframe>')

### Cargando Datos de Ciclovias

In [None]:
ciclovias <-st_read("input/Ciclovia")

In [None]:
head(ciclovias)

In [None]:
ggplot()+
  geom_sf(data=ciclovias) +
  theme_bw() +
  theme(axis.title =element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.text = element_text(size=6))

### Cargando Datos del Censo

In [None]:
mnz <- import("input/mgn_censo_2018.rds")

In [None]:
map2<- leaflet() %>% addTiles() %>% addPolygons(data=mnz[1:100,])

In [None]:
#workaround to show in Jupyter Notebook (not needed in Rstudio)
saveWidget(map2, 'demo2.html', selfcontained = FALSE)
display_html('<iframe src="demo2.html" width="800" height="800"></iframe>')

### Cargando Datos de Colegios

In [None]:
## Colegios
browseURL("https://datosabiertos.bogota.gov.co/dataset/resultados-pruebas-saber-11-bogota-d-c")

In [None]:
colegio <- st_read("input/col_saber_11.shp") %>% select(COD_DANE12,COLEGIO_SE,P_PUNTAJE)

colegio

summary(colegio$P_PUNTAJE)

In [None]:
colegio <- colegio %>% subset(P_PUNTAJE>=quantile(P_PUNTAJE,0.9))

### CBD de Bogota

In [None]:

cbd <- geocode_OSM("Centro Internacional, Bogotá", as.sf=T) 

cbd

## Descargar datos de OSM

In [None]:
## parques
parques <- opq(bbox = getbb("Bogota Colombia")) %>%
           add_osm_feature(key = "leisure", value = "park") %>%
           osmdata_sf() %>% .$osm_polygons %>% select(osm_id,name)




In [None]:
leaflet() %>% addTiles() %>% addPolygons(data=parques)

In [None]:

## restaurantes
restaurantes <- opq(bbox = getbb("Bogota Colombia")) %>%
                add_osm_feature(key = "amenity", value = "restaurant") %>%
                osmdata_sf() %>% .$osm_points %>% select(osm_id,name)



In [None]:
leaflet() %>% addTiles() %>% addCircles(data=restaurantes)

In [None]:
## bancos
bancos <- opq(bbox = getbb("Bogota Colombia")) %>%
          add_osm_feature(key = "amenity", value = "bank") %>%
          osmdata_sf() %>% .$osm_points %>% select(osm_id,name)



In [None]:
leaflet() %>% addTiles() %>% addCircles(data=bancos)

In [None]:

## malls
mall <- opq(bbox = getbb("Bogota Colombia")) %>%
        add_osm_feature(key = "shop", value = "mall") %>%
        osmdata_sf() %>% .$osm_polygons %>% select(osm_id,name)



In [None]:
leaflet() %>% addTiles() %>% addPolygons(data=mall)

## Operaciones geometrías

Puede acceder a las viñetas de la librería [sf](https://github.com/r-spatial/sf) 

###  Filtrar datos

In [None]:
## usando los valores de una variable
houses1 <- houses %>% subset(l3=="Bogotá D.C") %>% subset(l4=="Zona Chapinero")

In [None]:
houses1

In [None]:
leaflet() %>% addTiles() %>% addCircles(data=houses1)

In [None]:
## usando la geometría
chapinero <- getbb(place_name = "UPZ Chapinero, Bogota", 
                   featuretype = "boundary:administrative", 
                   format_out = "sf_polygon") %>% .$multipolygon

leaflet() %>% addTiles() %>% addPolygons(data=chapinero)


In [None]:
## crop puntos con poligono (opcion 1)
house_chapi <- st_crop(x = houses , y = chapinero) 

leaflet() %>% addTiles() %>% addPolygons(data=chapinero,col="red") %>% addCircles(data=house_chapi)



In [None]:
## crop puntos con poligono (opcion 2)
house_chapi <- st_intersection(x = houses , y = chapinero)

leaflet() %>% addTiles() %>% addPolygons(data=chapinero,col="red") %>% addCircles(data=house_chapi)


In [None]:
## crop puntos con poligono (opcion 3)
house_chapi <- houses[chapinero,]

leaflet() %>% addTiles() %>% addPolygons(data=chapinero,col="red") %>% addCircles(data=house_chapi)


In [None]:
## crop poligonos con poligono
mnz_chapi <- mnz[chapinero,]

leaflet() %>% addTiles() %>% addPolygons(data=chapinero,col="red") %>% addPolygons(data=mnz_chapi)

### Midiendo distancias



In [None]:
db<-data.frame(place=c("Uniandes","Banco de La Republica"),lat=c(4.601590,4.602151), long=c(-74.066391,-74.072350), nudge_y=c(-0.001,0.001))

db<-st_as_sf(db,coords=c('long','lat'),crs=4326)
db



In [None]:

db<-st_transform(db,4686)
st_crs(db)


In [None]:
st_distance(db)

In [None]:
st_distance(db,ciclovias)

In [None]:
ciclovias<-st_transform(ciclovias, 4686)
db<-st_transform(db, 4686)

In [None]:
st_distance(db,ciclovias)

#### Distancia a amenities

##### Distancia a un punto

In [None]:

house_chapi$dist_cbd <- st_distance(x=house_chapi , y=cbd)

house_chapi$dist_cbd %>% hist()



##### Distancia a muchos puntos

In [None]:

matrix_dist_cole <- st_distance(x=house_chapi , y=colegio)

matrix_dist_cole %>% head()

min_dist_cole <- apply(matrix_dist_cole , 1 , min)

min_dist_cole %>% hist()

house_chapi$dist_cole = min_dist_cole


##### Distancia a muchos polygonos

In [None]:


matrix_dist_parque <- st_distance(x=house_chapi , y=parques)

matrix_dist_parque %>% head()

mean_dist_parque <- apply(matrix_dist_parque , 1 , mean)

mean_dist_parque %>% hist()

house_chapi$dist_parque = mean_dist_parque



### Unir objetos usando la geometría

In [None]:
## definir sub-muestra
new_chapi <- house_chapi[st_buffer(house_chapi[100,],200),]


In [None]:

leaflet() %>% addTiles() %>%
addPolygons(data=mnz_chapi[new_chapi,],col="red") %>%
addCircles(data=new_chapi)


In [None]:

## unir dos conjuntos de datos basados en la distancia
new_chapi <- st_join(x = new_chapi , y = mnz_chapi[new_chapi,] , join = st_nn , maxdist = 20 , k = 1)

new_chapi


In [None]:

leaflet() %>% addTiles() %>% 
addPolygons(data=mnz_chapi[new_chapi,] , col="red" , label=mnz_chapi[new_chapi,]$MANZ_CCNCT) %>% 
addCircles(data=new_chapi , label=new_chapi$MANZ_CCNCT)


In [None]:

## unir dos conjuntos de datos basados en la geometría
house_chapi <- st_join(x=house_chapi , y=mnz_chapi)

house_chapi %>% head()
