In [60]:
# standard python packages
import pandas as pd
import ast
import numpy as np
import sys
import json


#geospatial manipulatio 
import geopandas
from geopy.distance import geodesic
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
from shapely.ops import nearest_points

#### Import the Properati and Census data 

In [61]:
properati = pd.read_csv('properati_data/rent-combined.csv', encoding= "ISO-8859-1")
census = pd.read_csv('census/cleaned_data2.csv',encoding= "ISO-8859-1")

#### Change the format of the geojson file to something that can be imported into Pandas

In [62]:
with open('geojson/baires.json', 'r+') as fd:
    ba_dict = json.load(fd)
    
ba_polygon = ba_dict['features']
header = ['type', 'id', 'coordinates', 'barrio', 'perimeter', 'area', 'commune']
ba_data_list = []
for feature in ba_polygon:
    type_name = feature['type']
    type_id = feature['id']
    coordinates = feature['geometry']['coordinates']
    barrio = feature['properties']['barrios']
    perimeter = feature['properties']['perimetro']
    area = feature['properties']['area']
    commune = feature['properties']['comunas']
    
    ba_data_list.append([type_name, type_id, coordinates, barrio, perimeter, area, commune])

ba_data = pd.DataFrame(ba_data_list, columns=header)

    
    
    

In [63]:
def modify_poly(data):
    coord_list = []
    #row = ast.literal_eval(data)
    for coord in data:
        coord_list.append(tuple(coord))
    return coord_list

In [64]:
# create a new column called coordinates on properati data frame
properati['coordinates'] = list(zip(properati.lon, properati.lat))
properati['coordinates'] = properati['coordinates'].apply(Point)

# convert properati data into geopandas dataframe
properati_gdf = geopandas.GeoDataFrame(properati, geometry='coordinates')

# convert census polygon data into tuples and polygon
census.coordinates = census.coordinates.apply(lambda x: ast.literal_eval(x)[0])
census.coordinates = census.coordinates.apply(lambda x: modify_poly(x))
census.coordinates = census.coordinates.apply(Polygon)

census_gdf = geopandas.GeoDataFrame(census, geometry='coordinates')

properati_census_gdf = geopandas.sjoin(properati_gdf, census_gdf, how="inner", op='within')
print ('{:.2f}%'.format((len(properati_census_gdf)/len(properati_gdf))*100))

print (len(properati_census_gdf))

24.73%
65574


In [65]:
def modify_json_coordinates(coordinates):
    coord_list = []
    for coord in coordinates[0][0]:
        coord_list.append(tuple(coord))
    return coord_list
        

In [67]:
ba_data.coordinates = ba_data.coordinates.apply(lambda x: modify_json_coordinates(x))
#print (ba_data.coordinates[0])
ba_data['coordinates'] = ba_data['coordinates'].apply(Polygon)
#print (ba_data.coordinates[0])
ba_data_gdf = geopandas.GeoDataFrame(ba_data, geometry='coordinates')

In [73]:
#print (properati_census_gdf.columns.values)
#properati_census_gdf.index = properati_census_gdf.index_left
properati_census_gdf = properati_census_gdf.drop(columns=['id_left', 'id_right', 'index_right'], axis=1)
print (properati_census_gdf.columns.values)
final_gdf = geopandas.sjoin(properati_census_gdf , ba_data_gdf, how="inner", op='within')
print (final_gdf.columns.values)


['dataset_date' 'created_on' 'operation' 'property_type' 'place_name'
 'place_with_parent_names' 'country_name' 'state_name' 'geonames_id'
 'lat_lon' 'lat' 'lon' 'price' 'currency' 'price_aprox_local_currency'
 'price_aprox_usd' 'properati_url' 'description' 'title' 'image_thumbnail'
 'coordinates' 'AREA' 'Commune' 'DEPTO' 'FRAC' 'Computer Percent'
 'Computer Quantile' 'Cellular Percent' 'Cellular Quantile' 'Rent Percent'
 'Rent Quantile' 'LINK' 'Immigration Percent' 'Immigration Quantile'
 'Education Percent' 'Education Quantile' 'PERIMETER' 'Owner Percent'
 'Owner Quantile' 'PROV' 'RADIO' 'RADPAIS_' 'REDCODE' 'Regular Percent'
 'Regular Quantile' 'TIPO' 'Uninhabited Percent' 'Uninhabited Quantile'
 'type']
['dataset_date' 'created_on' 'operation' 'property_type' 'place_name'
 'place_with_parent_names' 'country_name' 'state_name' 'geonames_id'
 'lat_lon' 'lat' 'lon' 'price' 'currency' 'price_aprox_local_currency'
 'price_aprox_usd' 'properati_url' 'description' 'title' 'image_thumbnai

In [78]:
print (ba_data_gdf)
ba_data_gdf.coordinates.to_file(driver = 'ESRI Shapefile', filename= "shape files/ba_data.shp")
filtered_gdf = final_gdf[['dataset_date', 'created_on', 'operation', 'property_type', 'place_name', 'place_with_parent_names',\
                          'state_name', 'price', 'currency', 'price_aprox_local_currency', 'price_aprox_usd']]


       type                 id  \
0   Feature   nyu_2451_34198.1   
1   Feature   nyu_2451_34198.2   
2   Feature   nyu_2451_34198.3   
3   Feature   nyu_2451_34198.4   
4   Feature   nyu_2451_34198.5   
5   Feature   nyu_2451_34198.6   
6   Feature   nyu_2451_34198.7   
7   Feature   nyu_2451_34198.8   
8   Feature   nyu_2451_34198.9   
9   Feature  nyu_2451_34198.10   
10  Feature  nyu_2451_34198.11   
11  Feature  nyu_2451_34198.12   
12  Feature  nyu_2451_34198.13   
13  Feature  nyu_2451_34198.14   
14  Feature  nyu_2451_34198.15   

                                          coordinates  \
0   POLYGON ((-58.39520579705124 -34.5721906912082...   
1   POLYGON ((-58.41287003130885 -34.6141162515853...   
2   POLYGON ((-58.43060582833087 -34.6070470885962...   
3   POLYGON ((-58.45199524545651 -34.6297454449785...   
4   POLYGON ((-58.51925405323249 -34.6330100952682...   
5   POLYGON ((-58.48833736732458 -34.6201603572931...   
6   POLYGON ((-58.49838483988304 -34.5963676568872...   