In [143]:
import pandas as pd
import ast
import numpy as np
import sys
print(sys.executable)
import geopandas
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

 

/anaconda3/envs/geopandas/bin/python


### Import data

In [144]:
properati = pd.read_csv('properati_data/rent-2015.csv', encoding= "ISO-8859-1")
prop_data = pd.read_csv('prop_data/cleaned_data.csv', encoding= "ISO-8859-1")
census = pd.read_csv('census/cleaned_data2.csv',encoding= "ISO-8859-1")

### Basic stats per dataframe

In [145]:
print ('Properati Columns:\n {}\n'.format(properati.columns.values))
print ('Property Values Columns:\n {}\n'.format(prop_data.columns.values))
print ('Census Columns:\n {}\n'.format(census.columns.values))

Properati Columns:
 ['dataset_date' 'id' 'created_on' 'operation' 'property_type' 'place_name'
 'place_with_parent_names' 'country_name' 'state_name' 'geonames_id'
 'lat_lon' 'lat' 'lon' 'price' 'currency' 'price_aprox_local_currency'
 'price_aprox_usd' 'properati_url' 'description' 'title' 'image_thumbnail']

Property Values Columns:
 ['Unnamed: 0' 'Location' 'Commune' 'Longitude' 'Latitude' 'Date'
 'Value in US Dollars' 'Value of m2 (US Dollars)' 'm2']

Census Columns:
 ['id' 'AREA' 'Commune' 'DEPTO' 'FRAC' 'Computer Percent'
 'Computer Quantile' 'Cellular Percent' 'Cellular Quantile' 'Rent Percent'
 'Rent Quantile' 'LINK' 'Immigration Percent' 'Immigration Quantile'
 'Education Percent' 'Education Quantile' 'PERIMETER' 'Owner Percent'
 'Owner Quantile' 'PROV' 'RADIO' 'RADPAIS_' 'REDCODE' 'Regular Percent'
 'Regular Quantile' 'TIPO' 'Uninhabited Percent' 'Uninhabited Quantile'
 'coordinates' 'type']



### Explore the dataframe

In [146]:
print ('Properati:\n {}\n'.format(properati.head(5)))
print ('Property Values:\n {}\n'.format(prop_data.head(5)))
print ('Census:\n {}\n'.format(census.head(5)))

Properati:
    dataset_date                                        id created_on  \
0        201501  6052eff893a6541b80eff7c92e2a84244a4a52f7  1/22/2015   
1        201501  2a07484bec751cdfe11ed4d66965a8db29aec855   1/8/2015   
2        201501  a07be7abc0c1e556eee42315752b6a852069a937  1/22/2015   
3        201501  30a6877645bcd6365369c4f7d8dd4cc2ef346686  1/27/2015   
4        201501  63c85f1bec3242055bf83e3f93ff4fa1208a4e39  1/10/2015   

  operation property_type       place_name      place_with_parent_names  \
0      rent     apartment  Capital Federal  |Argentina|Capital Federal|   
1      rent         store  Capital Federal  |Argentina|Capital Federal|   
2      rent     apartment  Capital Federal  |Argentina|Capital Federal|   
3      rent         store  Capital Federal  |Argentina|Capital Federal|   
4      rent         store  Capital Federal  |Argentina|Capital Federal|   

  country_name       state_name  geonames_id  ...        lat        lon  \
0    Argentina  Capital Feder

### Create GeoDataFrames
Following the documentation found [here](http://geopandas.org/gallery/create_geopandas_from_pandas.html#sphx-glr-gallery-create-geopandas-from-pandas-py)

In [147]:
def modify_poly(data):
    coord_list = []
    #row = ast.literal_eval(data)
    for coord in data:
        coord_list.append(tuple(coord))
    return coord_list

In [148]:
# create a new column called coordinates on properati data frame
properati['coordinates'] = list(zip(properati.lon, properati.lat))
properati['coordinates'] = properati['coordinates'].apply(Point)

# convert properati data into geopandas dataframe
properati_gdf = geopandas.GeoDataFrame(properati, geometry='coordinates')

# convert the longitudes and latitudes in the properaty values dataframe into float (they were originally in string)
prop_data.Latitude = pd.to_numeric(prop_data.Latitude, errors='coerce')
prop_data.Longitude = pd.to_numeric(prop_data.Longitude, errors='coerce')

#print (properati_gdf.coordinates.head(5))
print (properati_gdf.coordinates.isnull().values.any())
print (len(properati_gdf))

# convert property values data into geopandas dataframe
prop_data['coordinates'] = list(zip(prop_data.Longitude, prop_data.Latitude))
#prop_data['coordinates'] = list(zip(prop_data.Latitude, prop_data.Longitude))
prop_data['coordinates'] = prop_data['coordinates'].apply(Point)
prop_data_gdf = geopandas.GeoDataFrame(prop_data, geometry='coordinates')


# convert census polygon data into tuples and polygon
census.coordinates = census.coordinates.apply(lambda x: ast.literal_eval(x)[0])
census.coordinates = census.coordinates.apply(lambda x: modify_poly(x))
census.coordinates = census.coordinates.apply(Polygon)

census_gdf = geopandas.GeoDataFrame(census, geometry='coordinates')
print (len(census_gdf))
#print (census_gdf.coordinates[0])
print (census_gdf.coordinates.isnull().values.any())

False
51411
3552
False


In [149]:
properati_census_gdf = geopandas.sjoin(properati_gdf, census_gdf, how="inner", op='within')
print (len(properati_census_gdf))

7107


In [150]:
filter = census.query('Commune == 2')
print (filter['FRAC'].unique())
print (properati.lat_lon.head())
'''
Rent Percent
Immigration Percent
Education Percent
Owner Percent
Uninhabited Percent
'''

[ 1.  2. 11. 12. 10.  3.  4. 23. 13.  9.  5. 24. 22.  8. 14. 21. 25. 15.
  7.  6. 16. 20. 19. 18. 17.]
0    -34.5815242,-58.4332607
1     -34.602282,-58.3761509
2    -34.5815242,-58.4332607
3     -34.622528,-58.4307416
4    -34.6032291,-58.3955202
Name: lat_lon, dtype: object


'\nRent Percent\nImmigration Percent\nEducation Percent\nOwner Percent\nUninhabited Percent\n'

for index, row in properati.iterrows():
    # create polygon
    print ('Properati Coord: {}'.format(row.lat_lon))
    point = Point(ast.literal_eval(row.lat_lon)) # create point
    
    
    for index, census_row in census.iterrows():
        coord_list = []
        row = ast.literal_eval(census_row.coordinates)[0]
        for coord in row:
            coord_list.append(tuple(reversed(coord)))
        print ('Census polygon: {}'.format(coord_list))    
        polygon = Polygon(coord_list) # create polygon
        #print ('Running')
        print (polygon.contains(point))
        if polygon.contains(point):
            print (census_row.Commune)# check if polygon contains point

In [151]:
point_example = Point((-58.43326070000001,-34.5815242))
polygon_example = Polygon([(-58.37828,-34.59891), (-58.37826,-34.59911), (-58.37822,-34.59989), (-58.37821,-34.60008), (-58.37833,-34.60009), (-58.37967,-34.60013), (-58.37973,-34.59916), (-58.37974,-34.59897), (-58.37828,-34.59891)])

if polygon_example.contains(point_example):
    print ('Done')
else:
    print ('No.')



No.
