### Installing Packages

#### Conda environnment
You can create the same conda environment that was used to run this notebook using the following command:

```
conda env create -f requirements.yml
```

In [2]:
import pandas as pd
import ast
import numpy as np
import sys
import geopandas
from geopy.distance import geodesic
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
from shapely.ops import nearest_points

 

### Import data

In [4]:
properati = pd.read_csv('properati_data/rent/rent-combined.csv', encoding= "ISO-8859-1")
prop_data = pd.read_csv('prop_data/cleaned_data.csv', encoding= "ISO-8859-1")
census = pd.read_csv('census/cleaned_data2.csv',encoding= "ISO-8859-1")

### Basic stats per dataframe

In [5]:
print ('Properati Columns:\n {}\n'.format(properati.columns.values))
print ('Property Values Columns:\n {}\n'.format(prop_data.columns.values))
print ('Census Columns:\n {}\n'.format(census.columns.values))

Properati Columns:
 ['dataset_date' 'id' 'created_on' 'operation' 'property_type' 'place_name'
 'place_with_parent_names' 'country_name' 'state_name' 'geonames_id'
 'lat_lon' 'lat' 'lon' 'price' 'currency' 'price_aprox_local_currency'
 'price_aprox_usd' 'properati_url' 'description' 'title' 'image_thumbnail']

Property Values Columns:
 ['Commune' 'Longitude' 'Latitude' 'Date' 'Value in US Dollars'
 'Value of m2 (US Dollars)' 'm2' 'b_id']

Census Columns:
 ['id' 'AREA' 'Commune' 'DEPTO' 'FRAC' 'Computer Percent'
 'Computer Quantile' 'Cellular Percent' 'Cellular Quantile' 'Rent Percent'
 'Rent Quantile' 'LINK' 'Immigration Percent' 'Immigration Quantile'
 'Education Percent' 'Education Quantile' 'PERIMETER' 'Owner Percent'
 'Owner Quantile' 'PROV' 'RADIO' 'RADPAIS_' 'REDCODE' 'Regular Percent'
 'Regular Quantile' 'TIPO' 'Uninhabited Percent' 'Uninhabited Quantile'
 'coordinates' 'type']



### Explore the dataframe

In [6]:
prop_data.head(10)

Unnamed: 0,Commune,Longitude,Latitude,Date,Value in US Dollars,Value of m2 (US Dollars),m2,b_id
0,14,-58.4413,-34.5695,2001/12/10,720000.0,2400.0,300.0,0
1,13,-58.4521,-34.5579,2001/12/10,729975.0,973.3,750.0,1
2,13,-58.4523,-34.5577,2001/12/10,650032.5,847.5,767.0,1
3,13,-58.4677,-34.5374,2001/12/10,369991.2,994.6,372.0,2
4,15,-58.462,-34.5783,2001/12/10,94005.7,676.3,139.0,3
5,15,-58.4634,-34.5766,2001/12/10,94006.5,639.5,147.0,3
6,3,-58.4115,-34.6118,2001/12/10,220012.0,423.1,520.0,4
7,3,-58.4115,-34.6137,2001/12/10,45000.9,238.1,189.0,4
8,3,-58.4111,-34.6196,2001/12/10,128990.7,297.9,433.0,4
9,3,-58.4102,-34.6248,2001/12/10,90001.6,432.7,208.0,5


### Create GeoDataFrames
Following the documentation found [here](http://geopandas.org/gallery/create_geopandas_from_pandas.html#sphx-glr-gallery-create-geopandas-from-pandas-py)

In [5]:
def modify_poly(data):
    coord_list = []
    #row = ast.literal_eval(data)
    for coord in data:
        coord_list.append(tuple(coord))
    return coord_list

In [6]:
# create a new column called coordinates on properati data frame
properati['coordinates'] = list(zip(properati.lon, properati.lat))
properati['coordinates'] = properati['coordinates'].apply(Point)

# convert properati data into geopandas dataframe
properati_gdf = geopandas.GeoDataFrame(properati, geometry='coordinates')

# convert the longitudes and latitudes in the properaty values dataframe into float (they were originally in string)
prop_data.Latitude = pd.to_numeric(prop_data.Latitude, errors='coerce')
prop_data.Longitude = pd.to_numeric(prop_data.Longitude, errors='coerce')

#print (properati_gdf.coordinates.head(5))
print (properati_gdf.coordinates.isnull().values.any())
print (len(properati_gdf))

# convert property values data into geopandas dataframe
prop_data['coordinates'] = list(zip(prop_data.Longitude, prop_data.Latitude))
#prop_data['coordinates'] = list(zip(prop_data.Latitude, prop_data.Longitude))
prop_data['coordinates'] = prop_data['coordinates'].apply(Point)
prop_data_gdf = geopandas.GeoDataFrame(prop_data, geometry='coordinates')


# convert census polygon data into tuples and polygon
census.coordinates = census.coordinates.apply(lambda x: ast.literal_eval(x)[0])
census.coordinates = census.coordinates.apply(lambda x: modify_poly(x))
census.coordinates = census.coordinates.apply(Polygon)

census_gdf = geopandas.GeoDataFrame(census, geometry='coordinates')
print (len(census_gdf))
#print (census_gdf.coordinates[0])
print (census_gdf.coordinates.isnull().values.any())

False
265181
3552
False


In [10]:
#properati_prop_gdf = geopandas.sjoin(properati_gdf, prop_data_gdf, how="inner", op="within")
#print (len(properati_prop_gdf))

properati_census_gdf = geopandas.sjoin(properati_gdf, census_gdf, how="inner", op='within')
print ('{:.2f}%'.format(len(properati_census_gdf)/len(properati_gdf)))

print (len(properati_census_gdf))

'''property_join = geopandas.sjoin(properati_gdf, prop_data_gdf, how="inner", op='within')
print (property_join.head(5))
print ('Done.')'''



0.25%
65574


'property_join = geopandas.sjoin(properati_gdf, prop_data_gdf, how="inner", op=\'within\')\nprint (property_join.head(5))\nprint (\'Done.\')'

### [References](https://automating-gis-processes.github.io/2017/lessons/L3/nearest-neighbour.html)

In [26]:
def nearest(point, df):
    dist = 100000000
    fin_index = 0
    index = 0
    coord_list =  df.coordinates.to_list()
    for prop_point in coord_list:
        try:
            new_dist = geodesic((point.x, point.y), (prop_point.x, prop_point.y)).km
            if new_dist < dist:
                dist = new_dist
                fin_index = index
            index += 1
        except ValueError as e:
            pass
    
    print (dist)
    return dist, fin_index
        

In [27]:
properati_census_gdf['property_value_distance'],properati_census_gdf['prop_index'] = properati_census_gdf.coordinates.apply(lambda x: nearest(x, prop_data_gdf))





0.03567278198498203
0.03567278198498203
0.06882108362665816
0.06882108362665816
0.02361655291852966
0.02361655291852966
0.06882108362665816
0.02361655291852966
0.02361655291852966
0.07664338873053113
0.005837561136944921
0.03568425109550813
0.01874476940902667
0.07664338873053113
0.005837561136944921
0.03568425109550813
0.01874476940902667
0.07664338873053113
0.005837561136944921
0.03568425109550813
0.01874476940902667
0.07664338873053113
0.005837561136944921
0.03568425109550813
0.03286924664301781
0.005837561136944921
0.03568425109550813


KeyboardInterrupt: 

In [None]:
filter = census.query('Commune == 2')
print (filter['FRAC'].unique())
print (properati.lat_lon.head())
'''
Rent Percent
Immigration Percent
Education Percent
Owner Percent
Uninhabited Percent
'''

for index, row in properati.iterrows():
    # create polygon
    print ('Properati Coord: {}'.format(row.lat_lon))
    point = Point(ast.literal_eval(row.lat_lon)) # create point
    
    
    for index, census_row in census.iterrows():
        coord_list = []
        row = ast.literal_eval(census_row.coordinates)[0]
        for coord in row:
            coord_list.append(tuple(reversed(coord)))
        print ('Census polygon: {}'.format(coord_list))    
        polygon = Polygon(coord_list) # create polygon
        #print ('Running')
        print (polygon.contains(point))
        if polygon.contains(point):
            print (census_row.Commune)# check if polygon contains point

In [None]:
point_example = Point((-58.43326070000001,-34.5815242))
polygon_example = Polygon([(-58.37828,-34.59891), (-58.37826,-34.59911), (-58.37822,-34.59989), (-58.37821,-34.60008), (-58.37833,-34.60009), (-58.37967,-34.60013), (-58.37973,-34.59916), (-58.37974,-34.59897), (-58.37828,-34.59891)])

if polygon_example.contains(point_example):
    print ('Done')
else:
    print ('No.')



In [None]:
#properati_gdf.coordinates.to_file(driver = 'ESRI Shapefile', filename= "properati_large.shp")
print (len(properati_gdf))
#census_gdf.coordinates.to_file(driver = 'ESRI Shapefile', filename= "census.shp")
#prop_data_gdf.coordinates.to_file(driver = 'ESRI Shapefile', filename= "prop_data.shp")



