# Manual verification of unauthorized colonies

* Import colonies shapefile
* Select 5% of USO ID's for unauthorized colonies
* Find unauthorized colonies shapefile with map number and registration number. Ideally this would also have USO ID.
* Do a spatial join (or merge based on USO ID) of unauthorized colonies with map number and registration number.
* Export a shapefile with 5% unauthorized colonies. It should have the map number and registration number.

In [1]:
import os
import random
import math
import pandas as pd
import geopandas as gpd

## Import final colonies shapefile (including UACs)

In [2]:
colonies_fp = '../spatial_index_python/shapefiles/Spatial_Index_GIS/Colony_Shapefile/USO23Aug2020.shp'

In [3]:
colonies = gpd.read_file(colonies_fp)

In [4]:
colonies.head()

Unnamed: 0,AREA,USO_AREA_U,HOUSETAX_C,USO_FINAL,geometry
0,NEW DELHI 36,5584,,Planned,"POLYGON Z ((1020282.788 996796.773 0.000, 1020..."
1,NEW DELHI 35,5585,,Planned,"POLYGON Z ((1019724.475 994932.797 0.000, 1019..."
2,NEW DELHI 34,5586,,Planned,"POLYGON Z ((1019571.955 994876.019 0.000, 1019..."
3,NEW DELHI 33,5587,,Planned,"POLYGON Z ((1019352.702 994352.546 0.000, 1019..."
4,NEW DELHI 32,5588,,Planned,"POLYGON Z ((1018793.292 994224.182 0.000, 1018..."


## Get total count of UAC's and randomly select 5% of UACs for manual verification 

In [5]:
# Get subset of unauthorized colonies
uac = colonies[colonies['USO_FINAL'] == 'UAC']

In [29]:
num_uacs = len(uac)
print('total number of unauthorized colonies is', num_uacs)

# Calculate 5%
five_percent = math.ceil(num_uacs*0.05)
print('five percent of unauthorized colonies is', five_percent)

total number of unauthorized colonies is 1684
five percent of unauthorized colonies is 85


In [48]:
# List of all indices for unauthorized colonies
uac.index

Int64Index([  79,   87,   98,  100,  134,  136,  143,  147,  152,  154,
            ...
            4342, 4343, 4344, 4345, 4346, 4347, 4348, 4349, 4350, 4351],
           dtype='int64', length=1684)

In [51]:
# Randomly select 5% of USO ID's for Unauthorized Colonies
uac_uso_idx_five_percent = random.sample(list(uac.index), five_percent)

In [52]:
uac_uso_idx_five_percent[:5]

[4105, 3095, 856, 1695, 3873]

In [55]:
uac_five_percent = uac.loc[uac_uso_idx_five_percent, :]

In [56]:
uac_five_percent

Unnamed: 0,AREA,USO_AREA_U,HOUSETAX_C,USO_FINAL,geometry
4105,"VIJAY NAGAR, BAWANA, DELHI-37",5311,,UAC,"POLYGON Z ((1002412.709 1001915.576 0.000, 100..."
3095,"RAM VIHAR, LONI ROAD, JOHARIPUR",4301,,UAC,"POLYGON Z ((1022233.626 1008151.185 0.000, 102..."
856,"Chander Vihar, Nilothi",1324,G,UAC,"POLYGON Z ((1006650.762 1003369.507 0.000, 100..."
1695,"Hira Park, Najafgarh",1727,G,UAC,"MULTIPOLYGON Z (((997966.120 999075.507 0.000,..."
3873,RANI GARDEN ( AMBEDKAR PARK) SASTRI NAGAR DELH...,5079,,UAC,"POLYGON Z ((1027800.169 1010162.584 0.000, 102..."
...,...,...,...,...,...
3212,"DICHAON KALAN EXTN, MAIN DICHAON KALAN ROAD, N...",4418,,UAC,"POLYGON Z ((1028460.016 998347.856 0.000, 1028..."
2944,SABOLI BAGH PT-I NAND NAGARI DELHI-93,4150,,UAC,"POLYGON Z ((1003183.693 999614.709 0.000, 1003..."
3396,"DEENDARPUR EXTN.NAJAFGARH, DELHI",4602,,UAC,"POLYGON Z ((1031597.864 1009254.509 0.000, 103..."
3027,"VASHISHT ENCLAVE, BABA COLONY, BURARI, DELHI-84",4233,,UAC,"POLYGON Z ((1003805.303 1001674.888 0.000, 100..."


## Find unauthorized colonies shapefile

In [57]:
uac_with_map_number = gpd.read_file('near_perfect_uac_ch.shp')

In [58]:
# Reproject CRS to same as colonies
uac_with_map_number = uac_with_map_number.to_crs(uac_five_percent.crs)

In [60]:
uac_with_map_number.crs == uac_five_percent.crs

True

In [72]:
uac_for_verification = gpd.sjoin(uac_five_percent, uac_with_map_number, how='left')

## Export to shapefile

In [73]:
uac_for_verification.to_file('uac_verify.shp')

In [76]:
uac_five_percent['USO_AREA_U'].to_csv('uac_uso_ids.csv')