In [94]:
from osgeo import ogr
from shapely.geometry import MultiLineString
from shapely.geometry import Polygon
from shapely.geometry import MultiPolygon
from shapely import wkt
import numpy as np
import sys
import pandas as pd
import geopandas as gpd

In [68]:
segm_url   = 'https://raw.githubusercontent.com/guillermodangelo/tesis/master/capas/ine_segm_11.gpkg'
censo_url  = 'https://raw.githubusercontent.com/guillermodangelo/tesis/master/tablas/personas_censo_2011.zip'

In [5]:
# Datos censales
censo = pd.read_csv(censo_url, compression='zip', header=0, sep=',', quotechar='"')

In [69]:
# carga capa de segmentos censales
seg = gpd.read_file(segm_url)

In [70]:
seg['CODSEG'] = seg['CODSEG'].astype(str)
seg.drop_duplicates('CODSEG', inplace=True)
seg.shape

(4302, 12)

In [71]:
seg.head()

Unnamed: 0,DEPTO,SECCION,SEGMENTO,LOCALIDAD,CODSEC,CODSEG,CODLOC,NOMBDEPTO,NOMBLOC,CDEPTO_ISO,CLOC_ISO,geometry
0,1,0,0,20,100,100000,1020,MONTEVIDEO,MONTEVIDEO,UYMO,UYMOMON,"MULTIPOLYGON (((579196.989 6136293.228, 579199..."
1,1,1,1,20,101,101001,1020,MONTEVIDEO,MONTEVIDEO,UYMO,UYMOMON,"MULTIPOLYGON (((572677.242 6137361.564, 572642..."
2,1,1,2,20,101,101002,1020,MONTEVIDEO,MONTEVIDEO,UYMO,UYMOMON,"MULTIPOLYGON (((573108.805 6137660.924, 573101..."
3,1,1,3,20,101,101003,1020,MONTEVIDEO,MONTEVIDEO,UYMO,UYMOMON,"MULTIPOLYGON (((573127.720 6137351.005, 573136..."
4,1,1,104,20,101,101104,1020,MONTEVIDEO,MONTEVIDEO,UYMO,UYMOMON,"MULTIPOLYGON (((573238.898 6137252.750, 573136..."


In [72]:
# calcula la población de cada segmento censal
poblacion = censo[['DPTO', 'LOC', 'SECC', 'SEGM','PERPH02']].groupby(by=['DPTO','LOC', 'SECC', 'SEGM']).count()
poblacion.rename(columns={'PERPH02': 'poblacion'}, inplace=True)
                 
poblacion.reset_index(inplace=True)

# genera codloc
poblacion['CODSEG'] = poblacion.DPTO.astype(str) + poblacion.SECC.astype(str).str.zfill(2) + poblacion.SEGM.astype(str).str.zfill(3)

print(poblacion.shape)
poblacion.head()

(4283, 6)


Unnamed: 0,DPTO,LOC,SECC,SEGM,poblacion,CODSEG
0,1,20,1,1,696,101001
1,1,20,1,2,660,101002
2,1,20,1,3,786,101003
3,1,20,1,104,882,101104
4,1,20,1,105,818,101105


In [73]:
poblacion.drop_duplicates('CODSEG', inplace=True)

In [74]:
print(poblacion.shape)


(4268, 6)


In [75]:
pobl_seg = pd.merge(seg, poblacion, on='CODSEG')

In [86]:
pobl_seg.head()

Unnamed: 0,DEPTO,SECCION,SEGMENTO,LOCALIDAD,CODSEC,CODSEG,CODLOC,NOMBDEPTO,NOMBLOC,CDEPTO_ISO,CLOC_ISO,geometry,DPTO,LOC,SECC,SEGM,poblacion
0,1,1,1,20,101,101001,1020,MONTEVIDEO,MONTEVIDEO,UYMO,UYMOMON,"MULTIPOLYGON (((572677.242 6137361.564, 572642...",1,20,1,1,696
1,1,1,2,20,101,101002,1020,MONTEVIDEO,MONTEVIDEO,UYMO,UYMOMON,"MULTIPOLYGON (((573108.805 6137660.924, 573101...",1,20,1,2,660
2,1,1,3,20,101,101003,1020,MONTEVIDEO,MONTEVIDEO,UYMO,UYMOMON,"MULTIPOLYGON (((573127.720 6137351.005, 573136...",1,20,1,3,786
3,1,1,104,20,101,101104,1020,MONTEVIDEO,MONTEVIDEO,UYMO,UYMOMON,"MULTIPOLYGON (((573238.898 6137252.750, 573136...",1,20,1,104,882
4,1,1,105,20,101,101105,1020,MONTEVIDEO,MONTEVIDEO,UYMO,UYMOMON,"MULTIPOLYGON (((573256.084 6137040.674, 573211...",1,20,1,105,818


In [96]:
centroides = pobl_seg[['DEPTO', 'poblacion']]
centroides['geometry'] = pobl_seg.centroid
centroides.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  centroides['geometry'] = pobl_seg.centroid


Unnamed: 0,DEPTO,poblacion,geometry
0,1,696,POINT (572879.280 6137427.379)
1,1,660,POINT (573378.925 6137624.396)
2,1,786,POINT (572978.099 6137196.515)
3,1,882,POINT (573261.100 6137350.570)
4,1,818,POINT (573252.634 6137152.083)


In [None]:
from shapely.geometry import MultiPoint

# read the csv file containing borderline longitudes and latitudes for all Indian districts
df = pd.read_csv(r"C:\Users\sam\IndiaMap\Ind_adm2_Points.csv")

district = ""
geoList = []
result_df = pd.DataFrame(data=None,columns=['State','District','Latitude','Longitude'])

for index, row in df.iterrows():
    # check if this is anew district value
    if district and (district!=df.iloc[index]['District']):
        # calculate centroid for previous district
        points = MultiPoint(geoList)
        # save the state, district, long-lat and centroid to new dataframe
        result_df = result_df.append({'State':df['State'].iloc[index-1],'District':df['District'].iloc[index-1],'Latitude':points.centroid.x,'Longitude':points.centroid.y}, ignore_index=True)
        # clear old geoList (APPEND NEW LONG-LAT ALSO)
        del geoList[:]
    # save this new district's name
    district = df.iloc[index]['District']
    # add this long lat info to later calculate centroid
    geoList.append((df.iloc[index]['Latitude'],df.iloc[index]['Longitude']))

# add last district's centroid
if geoList:
    points = MultiPoint(geoList)
    result_df = result_df.append({'State':df['State'].iloc[-1],'District':df['District'].iloc[-1],'Latitude':points.centroid.x,'Longitude':points.centroid.y}, ignore_index=True)
    del geoList[:]

result_df.to_csv("centroids.csv",index=False)