In [22]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
#pd.set_option('display.max_colwidth', -1)
import numpy as np

In [23]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(style = "whitegrid")

In [24]:
import geopandas
from shapely.geometry import LineString, Point, Polygon
import shapely.wkt
import folium

### Lectura del dataset

In [25]:
df = pd.read_csv('palermo_deptos.csv')
df.drop(['Unnamed: 0'], axis=1, inplace=True)

In [26]:
df.head()

Unnamed: 0,id,superficie,cubierta,precio,piso,expensas,descripcion,ambientes,pileta,seguridad,parrilla,balcon,cochera,geometry,dist_comisarias,dist_subte,dist_trenes,dist_metrobus,antiguedad
0,19,104.0,96.0,3365.384615,,,Excelente semipiso al contra frente en Bulnes ...,3,0,0,0,1,0,POINT (-58.4058744847 -34.580503566),719.300376,991.604946,794.459912,1500.489635,usado
1,209,68.0,48.0,3455.882353,,,"Excelente 2 Ambientes orientacion Este, Muy lu...",2,1,0,0,0,0,POINT (-58.4372870022 -34.56457657510001),980.716124,906.891789,260.100239,876.358223,estrenar
2,220,69.0,53.0,3447.753623,,,El edificio está situado en el barrio de las ...,2,1,0,0,0,1,POINT (-58.43823280199999 -34.5652650561),894.771967,791.276622,371.547893,766.818862,usado
3,241,39.0,35.0,3776.923077,,,IMPECABLE TORRE COY III – DEPARTAMENTO DE UN A...,1,0,1,1,1,0,POINT (-58.41230720000001 -34.5888617),354.987173,115.947579,1737.834489,1285.978768,estrenar
4,266,47.0,41.0,2872.340426,,1800.0,CODIGO: ubicado en: Avenida Santa Fe 5000 - ...,2,0,0,0,1,0,POINT (-58.4314684478 -34.5765042465),773.123345,357.007275,326.555566,190.245217,estrenar


### Transformacion en geodataframe using crs epsg 900913 - Web Merkator Projection

In [27]:
geometry = df['geometry'].map(shapely.wkt.loads)
crs = {'init': 'epsg:900913'}
deptos_gdf = geopandas.GeoDataFrame(df, crs=crs, geometry=geometry)

In [28]:
deptos_gdf.head()

Unnamed: 0,id,superficie,cubierta,precio,piso,expensas,descripcion,ambientes,pileta,seguridad,parrilla,balcon,cochera,geometry,dist_comisarias,dist_subte,dist_trenes,dist_metrobus,antiguedad
0,19,104.0,96.0,3365.384615,,,Excelente semipiso al contra frente en Bulnes ...,3,0,0,0,1,0,POINT (-58.4058744847 -34.580503566),719.300376,991.604946,794.459912,1500.489635,usado
1,209,68.0,48.0,3455.882353,,,"Excelente 2 Ambientes orientacion Este, Muy lu...",2,1,0,0,0,0,POINT (-58.4372870022 -34.56457657510001),980.716124,906.891789,260.100239,876.358223,estrenar
2,220,69.0,53.0,3447.753623,,,El edificio está situado en el barrio de las ...,2,1,0,0,0,1,POINT (-58.43823280199999 -34.5652650561),894.771967,791.276622,371.547893,766.818862,usado
3,241,39.0,35.0,3776.923077,,,IMPECABLE TORRE COY III – DEPARTAMENTO DE UN A...,1,0,1,1,1,0,POINT (-58.41230720000001 -34.5888617),354.987173,115.947579,1737.834489,1285.978768,estrenar
4,266,47.0,41.0,2872.340426,,1800.0,CODIGO: ubicado en: Avenida Santa Fe 5000 - ...,2,0,0,0,1,0,POINT (-58.4314684478 -34.5765042465),773.123345,357.007275,326.555566,190.245217,estrenar


### Lectura de dataframe de subbarrios de Palermo y transformación en geodataframe

In [29]:
barrios = pd.read_csv('palermos_map.csv')

In [30]:
geometry = barrios['geometry'].map(shapely.wkt.loads)
crs = {'init': 'epsg:900913'}
barrios_gdf = geopandas.GeoDataFrame(barrios, crs=crs, geometry=geometry)

`sjoin` para ubicar los POINTS (deptos) de deptos_gdf en los POLYGONS (barrios) de barrios_gdf

In [31]:
gdf = geopandas.sjoin(deptos_gdf, barrios_gdf, op='within', how='left')

In [32]:
gdf.head()

Unnamed: 0,id,superficie,cubierta,precio,piso,expensas,descripcion,ambientes,pileta,seguridad,...,balcon,cochera,geometry,dist_comisarias,dist_subte,dist_trenes,dist_metrobus,antiguedad,index_right,barrio
0,19,104.0,96.0,3365.384615,,,Excelente semipiso al contra frente en Bulnes ...,3,0,0,...,1,0,POINT (-58.4058744847 -34.580503566),719.300376,991.604946,794.459912,1500.489635,usado,2.0,Norte
1,209,68.0,48.0,3455.882353,,,"Excelente 2 Ambientes orientacion Este, Muy lu...",2,1,0,...,0,0,POINT (-58.4372870022 -34.56457657510001),980.716124,906.891789,260.100239,876.358223,estrenar,6.0,Las Cañitas
2,220,69.0,53.0,3447.753623,,,El edificio está situado en el barrio de las ...,2,1,0,...,0,1,POINT (-58.43823280199999 -34.5652650561),894.771967,791.276622,371.547893,766.818862,usado,6.0,Las Cañitas
3,241,39.0,35.0,3776.923077,,,IMPECABLE TORRE COY III – DEPARTAMENTO DE UN A...,1,0,1,...,1,0,POINT (-58.41230720000001 -34.5888617),354.987173,115.947579,1737.834489,1285.978768,estrenar,11.0,Alto
4,266,47.0,41.0,2872.340426,,1800.0,CODIGO: ubicado en: Avenida Santa Fe 5000 - ...,2,0,0,...,1,0,POINT (-58.4314684478 -34.5765042465),773.123345,357.007275,326.555566,190.245217,estrenar,7.0,Pacifico


Eliminar columna 'index_right' y renombra columna 'nombre' con 'barrio'

In [33]:
gdf.drop(['index_right'], axis=1, inplace=True)

In [34]:
gdf.barrio.unique()

array(['Norte', 'Las Cañitas', 'Alto', 'Pacifico', 'Botanico', 'Nuevo',
       'Zoo', 'Freud', 'Falso Belgrano', 'Soho', 'Hollywood', 'Chico',
       'Costa', nan], dtype=object)

### Revisar los casos con barrio == 'Costa'

In [35]:
gdf_costa = gdf[gdf.barrio == 'Costa']
gdf_costa

Unnamed: 0,id,superficie,cubierta,precio,piso,expensas,descripcion,ambientes,pileta,seguridad,parrilla,balcon,cochera,geometry,dist_comisarias,dist_subte,dist_trenes,dist_metrobus,antiguedad,barrio
305,20192,90.0,,2290.222222,,,SEMIPISOS DE CATEGORIA A ESTRENAR FRENTE AL MA...,5,0,0,1,1,1,POINT (-58.4170088 -34.5580305),2508.92945,2398.975126,1620.22259,2337.956588,estrenar,Costa


In [36]:
m = folium.Map([-34.5712, -58.4233], zoom_start=13.4, tiles="OpenStreetMap") 
# se crean los poligonos y se agrega al mapa
folium.GeoJson(barrios_gdf.to_json()).add_to(m)
folium.GeoJson(gdf_costa.to_json()).add_to(m);
m

<folium.features.GeoJson at 0x10c076470>

<folium.features.GeoJson at 0x10d756c18>

Eliminar la ID 20192 por tener malas coordenadas.  
Reasignar la ID 60050 a Las Canitas

In [37]:
gdf.drop(gdf[gdf.id == 20192].index, inplace=True)
gdf.loc[gdf.id == 60050, 'barrio'] = 'Las Cañitas'

### Revisar los casos con barrio == nan

In [38]:
gdf_nan = gdf[gdf.barrio.isnull()]
gdf_nan

Unnamed: 0,id,superficie,cubierta,precio,piso,expensas,descripcion,ambientes,pileta,seguridad,parrilla,balcon,cochera,geometry,dist_comisarias,dist_subte,dist_trenes,dist_metrobus,antiguedad,barrio
1729,119362,147.0,147.0,2210.884354,,,6° Piso: Superficie cubierta y total 50.84 m27...,5,0,0,0,0,0,POINT (-58.4004389 -34.5841248),1074.580897,473.796205,887.770435,2035.445437,usado,


In [39]:
m = folium.Map([-34.5712, -58.4233], zoom_start=13.4, tiles="OpenStreetMap") 
# se crean los poligonos y se agrega al mapa
folium.GeoJson(barrios_gdf.to_json()).add_to(m)
folium.GeoJson(gdf_nan.to_json()).add_to(m);
m

<folium.features.GeoJson at 0x10cdc99e8>

<folium.features.GeoJson at 0x10cdc9908>

Reasignar la ID 119362 a barrio 'Norte'

In [40]:
gdf.loc[gdf.id == 119362, 'barrio'] = 'Norte'

In [41]:
gdf.shape
gdf.head()

(1770, 20)

Unnamed: 0,id,superficie,cubierta,precio,piso,expensas,descripcion,ambientes,pileta,seguridad,parrilla,balcon,cochera,geometry,dist_comisarias,dist_subte,dist_trenes,dist_metrobus,antiguedad,barrio
0,19,104.0,96.0,3365.384615,,,Excelente semipiso al contra frente en Bulnes ...,3,0,0,0,1,0,POINT (-58.4058744847 -34.580503566),719.300376,991.604946,794.459912,1500.489635,usado,Norte
1,209,68.0,48.0,3455.882353,,,"Excelente 2 Ambientes orientacion Este, Muy lu...",2,1,0,0,0,0,POINT (-58.4372870022 -34.56457657510001),980.716124,906.891789,260.100239,876.358223,estrenar,Las Cañitas
2,220,69.0,53.0,3447.753623,,,El edificio está situado en el barrio de las ...,2,1,0,0,0,1,POINT (-58.43823280199999 -34.5652650561),894.771967,791.276622,371.547893,766.818862,usado,Las Cañitas
3,241,39.0,35.0,3776.923077,,,IMPECABLE TORRE COY III – DEPARTAMENTO DE UN A...,1,0,1,1,1,0,POINT (-58.41230720000001 -34.5888617),354.987173,115.947579,1737.834489,1285.978768,estrenar,Alto
4,266,47.0,41.0,2872.340426,,1800.0,CODIGO: ubicado en: Avenida Santa Fe 5000 - ...,2,0,0,0,1,0,POINT (-58.4314684478 -34.5765042465),773.123345,357.007275,326.555566,190.245217,estrenar,Pacifico


### Guardar archivo para correr modelos

In [42]:
#gdf.to_csv('palermo_deptos_final.csv')

In [43]:
gdf.ambientes.unique()

array([3, 2, 1, 4, 5, 7, 6])