In [7]:
from osgeo import ogr
from shapely.geometry import MultiLineString
from shapely import wkt
import numpy as np
import sys
import pandas as pd
import geopandas as gpd

In [2]:
local_url  = 'https://raw.githubusercontent.com/guillermodangelo/tesis/master/capas/ine_localidades.gpkg'
censo_url  = 'https://raw.githubusercontent.com/guillermodangelo/tesis/master/tablas/personas_censo_2011.zip'

In [5]:
# Datos censales
censo = pd.read_csv(censo_url, compression='zip', header=0, sep=',', quotechar='"')
# carga capa localidades INE pt
localidad = gpd.read_file(local_url)
localidad.CODLOC = localidad.CODLOC.astype(int)

In [32]:
censo.head()

Unnamed: 0,DPTO,LOC,SECC,SEGM,PERPH02,PERNA01,PERNA02,PERMI01,PERMI01_1,PERMI01_2,...,PERMI06,PERMI06_1,PERMI06_2,PERMI06_3,PERMI06_4,PERMI07,PERMI07_1,PERMI07_2,PERMI07_3,PERMI07_4
0,1,20,1,1,2,22,1988-11-01,1,,,...,0,,,,0,0,,,,0
1,1,20,1,1,2,84,1927-07-01,1,,,...,0,,,,0,0,,,,0
2,1,20,1,1,1,21,1990-07-01,1,,,...,0,,,,0,0,,,,0
3,1,20,1,1,2,0,2011-08-01,1,,,...,0,,,,0,0,,,,0
4,1,20,1,1,2,31,1980-02-01,3,,6.0,...,2,,6.0,421.0,0,0,,,,0


In [35]:
# calcula la población de cada segmento censal
poblacion = censo[['DPTO', 'LOC', 'SECC', 'SEGM','PERPH02']].groupby(by=['DPTO','LOC', 'SECC', 'SEGM']).count()
poblacion.rename(columns={'PERPH02': 'poblacion'}, inplace=True)
                 
poblacion.reset_index(inplace=True)

# genera codloc
poblacion['CODSEGM'] = poblacion.DPTO.astype(str) + poblacion.SECC.astype(str).str.zfill(3) + poblacion.SEGM.astype(str).str.zfill(3)
                        
poblacion.head()

Unnamed: 0,DPTO,LOC,SECC,SEGM,poblacion,CODSEGM
0,1,20,1,1,696,1001001
1,1,20,1,2,660,1001002
2,1,20,1,3,786,1001003
3,1,20,1,104,882,1001104
4,1,20,1,105,818,1001105


In [None]:
pd.merge(localidad, poblacion pm)

ValueError: cannot insert level_0, already exists

In [20]:
poblacion.head(50)

Unnamed: 0,DPTO,LOC,poblacion
0,1,20,1298649
1,1,621,936
2,1,622,1954
3,1,721,3189
4,1,900,14027
5,2,220,40657
6,2,521,12200
7,2,522,2659
8,2,621,2531
9,2,721,380


In [None]:
## set the driver for the data
driver = ogr.GetDriverByName("ESRI Shapefile")
## folder where the shapefile resides
folder = r"C:\Users\glen.bambrick\Documents\GDAL\shp\\"
## name of the shapefile concatenated with folder
shp = "{0}Census2011_Small_Areas_generalised20m.shp".format(folder)
## open the shapefile
ds = driver.Open(shp, 0)
## reference the only layer in the shapefile
lyr = ds.GetLayer(0)


In [None]:

## create an output data source
out_ds = driver.CreateDataSource("{0}{1}_wgt_mean_center.shp".format(folder,lyr.GetName()))

## output mean center weighted filename
output_fc = "{0}{1}_wgt_mean_center".format(folder,lyr.GetName())

## field that has numerical weight
weight_fld = "TOTAL2011"

try:
    first_feat = lyr.GetFeature(1)
    xy_arr = np.ndarray((len(lyr), 2), dtype=np.float)
    wgt_arr = np.ndarray((len(lyr), 1), dtype=np.float)
    ## use the centroid for points and polygons
    if first_feat.geometry().GetGeometryName() in ["POINT", "MULTIPOINT", "POLYGON", "MULTIPOLYGON"]:
        for i, pt in enumerate(lyr):
            ft_geom = pt.geometry()
            weight = pt.GetField(weight_fld)
            xy_arr[i] = (ft_geom.Centroid().GetX() * weight, ft_geom.Centroid().GetY() * weight)
            wgt_arr[i] = weight
    ## midpoint of lines
    elif first_feat.geometry().GetGeometryName() in ["LINESTRING", "MULTILINESTRING"]:
        for i, ln in enumerate(lyr):
            line_geom = ln.geometry().ExportToWkt()
            weight = ln.GetField(weight_fld)
            shapely_line = MultiLineString(wkt.loads(line_geom))
            midpoint = shapely_line.interpolate(shapely_line.length/2)
            xy_arr[i] = (midpoint.x * weight, midpoint.y * weight)
            wgt_arr[i] = weight

except Exception:
    print "Unknown geometry or Incorrect field name for {}".format(input_lyr_name)
    sys.exit()

## do the maths
sum_x, sum_y = np.sum(xy_arr, axis=0)
sum_wgt = np.sum(wgt_arr)
weighted_x, weighted_y = sum_x/sum_wgt, sum_y/sum_wgt

print "Weighted Mean Center: {0}, {1}".format(weighted_x, weighted_y)

## create a new point layer with the same spatial ref as lyr
out_lyr = out_ds.CreateLayer(output_fc, lyr.GetSpatialRef(), ogr.wkbPoint)

## define and create new fields
x_fld = ogr.FieldDefn("X", ogr.OFTReal)
y_fld = ogr.FieldDefn("Y", ogr.OFTReal)
out_lyr.CreateField(x_fld)
out_lyr.CreateField(y_fld)

## create a new point for the mean center weighted
pnt = ogr.Geometry(ogr.wkbPoint)
pnt.AddPoint(weighted_x, weighted_y)

## add the mean center weighted to the new layer
feat_dfn = out_lyr.GetLayerDefn()
feat = ogr.Feature(feat_dfn)
feat.SetGeometry(pnt)
feat.SetField("X", weighted_x)
feat.SetField("Y", weighted_y)
out_lyr.CreateFeature(feat)

print "Created: {0}.shp".format(output_fc)

## free up resources
del ds, out_ds, lyr, first_feat, feat, out_lyr