# Examine Invalid WKT
A brief notebook that allows users to use shapely to examine a wkt to check if it's invalid 

In [1]:
import shapely.wkt
from shapely.validation import explain_validity, make_valid
from shapely.geometry import MultiPoint
from digital_land.datatype.wkt import make_multipolygon
from digital_land.log import IssueLog
from digital_land.datatype.wkt import WktDataType,parse_wkt,dump_wkt, normalise_geometry
from old_wkt import WktDataType as OldWktDataType
from new_wkt import WktDataType as NewWktDataType
import geopandas as gpd
from pyproj.transformer import TransformerGroup
import logging

from plotting import plot_map

### Check right transformations are being used

When transforming our points we need to make sure the best available methods are used the code below does this.

In [2]:
if not TransformerGroup("epsg:27700", "epsg:4326").best_available:
    print("not using the best available OSGB correction tables")

### Investigating the invalidity

load in the invalid wkt, this is taken from expectation results for entity 44001120. The result on the site isn't the same wkt as we run validation on points entering the site. These need to be correct in the datasets as they're avaliable for download.

In [3]:
wkt = 'MULTIPOLYGON(((-1.111304 52.583072000000016,-1.111295 52.583072000000016,-1.111295 52.583072000000016,-1.111304 52.583072000000016)),((-1.103706 52.58558099999999,-1.103812 52.58561,-1.10403 52.58564000000004,-1.104045 52.585616000000016,-1.104088 52.585488,-1.104109 52.585454,-1.104177 52.585376999999994,-1.104222 52.585368999999986,-1.104336 52.58524,-1.104467 52.58527699999999,-1.104607 52.58521999999999,-1.10516 52.585189000000014,-1.105491 52.58518599999999,-1.105494 52.585171,-1.106129 52.58520199999998,-1.106118 52.585319,-1.10647 52.585342000000026,-1.106471 52.58533699999998,-1.106675 52.58537899999999,-1.106549 52.58586300000002,-1.107244 52.585931999999985,-1.107259 52.585848,-1.107353 52.58586199999999,-1.10738 52.58579499999999,-1.107372 52.585792999999995,-1.107396 52.58574999999999,-1.107404 52.585752000000014,-1.107577 52.58538200000001,-1.107598 52.58538600000003,-1.107688 52.58520300000001,-1.107916 52.585252,-1.108009 52.585092,-1.107499 52.584982,-1.107564 52.584869,-1.107359 52.584835,-1.107293 52.584810000000004,-1.106945 52.584761000000015,-1.107004 52.584688,-1.107202 52.58453700000001,-1.10722 52.58451500000001,-1.107738 52.584164999999985,-1.108504 52.58443,-1.108721 52.584203,-1.108732 52.584204,-1.108965 52.584261999999995,-1.109556 52.584438000000006,-1.109562 52.58443,-1.109731 52.584475,-1.109846 52.584541,-1.10985 52.58453,-1.109969 52.584569999999985,-1.110568 52.58465400000003,-1.111481 52.584768,-1.111333 52.58524,-1.111702 52.58527800000002,-1.111812 52.58497599999998,-1.112428 52.58501899999999,-1.112688 52.58447000000004,-1.112941 52.583774000000005,-1.112882 52.58357799999999,-1.112742 52.58332999999999,-1.112689 52.583259,-1.112666 52.58326199999996,-1.11258 52.583219999999955,-1.112565 52.58322800000002,-1.112237 52.583245000000005,-1.111869 52.583218999999985,-1.111735 52.58319999999995,-1.111558 52.58316099999999,-1.111399 52.583113,-1.111295 52.583072000000016,-1.111221 52.58307300000001,-1.111064 52.58302499999999,-1.110991 52.583106999999984,-1.111 52.58310999999998,-1.110944 52.58317900000003,-1.110967 52.58318600000001,-1.110838 52.58332100000001,-1.110682 52.583530999999994,-1.110448 52.583496,-1.110369 52.583451,-1.110323 52.58350899999999,-1.109712 52.58333300000001,-1.109605 52.58346599999996,-1.109388 52.58340000000001,-1.109302 52.58334900000003,-1.109105 52.583211000000006,-1.109013 52.58310900000001,-1.109033 52.583083999999985,-1.109038 52.58305200000001,-1.109006 52.58300200000002,-1.108981 52.582986999999974,-1.107971 52.58268799999999,-1.107966 52.58269999999999,-1.107482 52.582595,-1.107458 52.582599000000016,-1.107373 52.582583,-1.107546 52.582291,-1.10762 52.58213699999999,-1.107612 52.58213599999999,-1.10762 52.582115999999985,-1.105032 52.581647000000004,-1.104801 52.58163300000001,-1.104604 52.58164099999999,-1.104393 52.581676000000016,-1.104294 52.58170799999999,-1.104176 52.58168399999997,-1.104152 52.581668000000036,-1.10412 52.581661,-1.103745 52.58164099999999,-1.103565 52.58162299999998,-1.103562 52.581856999999985,-1.103466 52.581883000000005,-1.103502 52.58201300000002,-1.103617 52.58212499999999,-1.103668 52.58215799999999,-1.103091 52.582357,-1.103334 52.583292,-1.10338 52.58352099999999,-1.10342 52.583854,-1.103494 52.58392599999999,-1.103512 52.58417700000001,-1.103461 52.584655999999995,-1.103437 52.58477700000003,-1.103484 52.58477700000003,-1.103513 52.584742000000006,-1.103569 52.584711,-1.103667 52.58469600000001,-1.104922 52.58480500000002,-1.104939 52.58482699999999,-1.104903 52.58496400000004,-1.10488 52.584979000000004,-1.104398 52.58501700000002,-1.103929 52.58498399999999,-1.103856 52.585262,-1.103838 52.585260000000005,-1.103828 52.585286999999965,-1.103813 52.585350000000005,-1.10382 52.585351,-1.103813 52.58539999999999,-1.103764 52.585508000000004,-1.103723 52.585543,-1.103706 52.58558099999999)))'
# remove initial polygon which seems invalid
# wkt = 'MULTIPOLYGON(((-1.103706 52.58558099999999,-1.103812 52.58561,-1.10403 52.58564000000004,-1.104045 52.585616000000016,-1.104088 52.585488,-1.104109 52.585454,-1.104177 52.585376999999994,-1.104222 52.585368999999986,-1.104336 52.58524,-1.104467 52.58527699999999,-1.104607 52.58521999999999,-1.10516 52.585189000000014,-1.105491 52.58518599999999,-1.105494 52.585171,-1.106129 52.58520199999998,-1.106118 52.585319,-1.10647 52.585342000000026,-1.106471 52.58533699999998,-1.106675 52.58537899999999,-1.106549 52.58586300000002,-1.107244 52.585931999999985,-1.107259 52.585848,-1.107353 52.58586199999999,-1.10738 52.58579499999999,-1.107372 52.585792999999995,-1.107396 52.58574999999999,-1.107404 52.585752000000014,-1.107577 52.58538200000001,-1.107598 52.58538600000003,-1.107688 52.58520300000001,-1.107916 52.585252,-1.108009 52.585092,-1.107499 52.584982,-1.107564 52.584869,-1.107359 52.584835,-1.107293 52.584810000000004,-1.106945 52.584761000000015,-1.107004 52.584688,-1.107202 52.58453700000001,-1.10722 52.58451500000001,-1.107738 52.584164999999985,-1.108504 52.58443,-1.108721 52.584203,-1.108732 52.584204,-1.108965 52.584261999999995,-1.109556 52.584438000000006,-1.109562 52.58443,-1.109731 52.584475,-1.109846 52.584541,-1.10985 52.58453,-1.109969 52.584569999999985,-1.110568 52.58465400000003,-1.111481 52.584768,-1.111333 52.58524,-1.111702 52.58527800000002,-1.111812 52.58497599999998,-1.112428 52.58501899999999,-1.112688 52.58447000000004,-1.112941 52.583774000000005,-1.112882 52.58357799999999,-1.112742 52.58332999999999,-1.112689 52.583259,-1.112666 52.58326199999996,-1.11258 52.583219999999955,-1.112565 52.58322800000002,-1.112237 52.583245000000005,-1.111869 52.583218999999985,-1.111735 52.58319999999995,-1.111558 52.58316099999999,-1.111399 52.583113,-1.111295 52.583072000000016,-1.111221 52.58307300000001,-1.111064 52.58302499999999,-1.110991 52.583106999999984,-1.111 52.58310999999998,-1.110944 52.58317900000003,-1.110967 52.58318600000001,-1.110838 52.58332100000001,-1.110682 52.583530999999994,-1.110448 52.583496,-1.110369 52.583451,-1.110323 52.58350899999999,-1.109712 52.58333300000001,-1.109605 52.58346599999996,-1.109388 52.58340000000001,-1.109302 52.58334900000003,-1.109105 52.583211000000006,-1.109013 52.58310900000001,-1.109033 52.583083999999985,-1.109038 52.58305200000001,-1.109006 52.58300200000002,-1.108981 52.582986999999974,-1.107971 52.58268799999999,-1.107966 52.58269999999999,-1.107482 52.582595,-1.107458 52.582599000000016,-1.107373 52.582583,-1.107546 52.582291,-1.10762 52.58213699999999,-1.107612 52.58213599999999,-1.10762 52.582115999999985,-1.105032 52.581647000000004,-1.104801 52.58163300000001,-1.104604 52.58164099999999,-1.104393 52.581676000000016,-1.104294 52.58170799999999,-1.104176 52.58168399999997,-1.104152 52.581668000000036,-1.10412 52.581661,-1.103745 52.58164099999999,-1.103565 52.58162299999998,-1.103562 52.581856999999985,-1.103466 52.581883000000005,-1.103502 52.58201300000002,-1.103617 52.58212499999999,-1.103668 52.58215799999999,-1.103091 52.582357,-1.103334 52.583292,-1.10338 52.58352099999999,-1.10342 52.583854,-1.103494 52.58392599999999,-1.103512 52.58417700000001,-1.103461 52.584655999999995,-1.103437 52.58477700000003,-1.103484 52.58477700000003,-1.103513 52.584742000000006,-1.103569 52.584711,-1.103667 52.58469600000001,-1.104922 52.58480500000002,-1.104939 52.58482699999999,-1.104903 52.58496400000004,-1.10488 52.584979000000004,-1.104398 52.58501700000002,-1.103929 52.58498399999999,-1.103856 52.585262,-1.103838 52.585260000000005,-1.103828 52.585286999999965,-1.103813 52.585350000000005,-1.10382 52.585351,-1.103813 52.58539999999999,-1.103764 52.585508000000004,-1.103723 52.585543,-1.103706 52.58558099999999)))'
# Polygon formed after make_valid
# wkt = 'POLYGON ((-1.1113990000000000 52.5831129999999973, -1.1115580000000000 52.5831609999999898, -1.1117349999999999 52.5831999999999482, -1.1118690000000000 52.5832189999999855, -1.1122369999999999 52.5832450000000051, -1.1125650000000000 52.5832280000000196, -1.1125799999999999 52.5832199999999546, -1.1126659999999999 52.5832619999999622, -1.1126890000000000 52.5832589999999982, -1.1127419999999999 52.5833299999999895, -1.1128819999999999 52.5835779999999886, -1.1129410000000000 52.5837740000000053, -1.1126879999999999 52.5844700000000387, -1.1124280000000000 52.5850189999999884, -1.1118120000000000 52.5849759999999833, -1.1117020000000000 52.5852780000000166, -1.1113329999999999 52.5852399999999989, -1.1114809999999999 52.5847679999999968, -1.1105680000000000 52.5846540000000289, -1.1099690000000000 52.5845699999999852, -1.1098500000000000 52.5845300000000009, -1.1098460000000001 52.5845410000000015, -1.1097310000000000 52.5844749999999976, -1.1095619999999999 52.5844299999999976, -1.1095560000000000 52.5844380000000058, -1.1089650000000000 52.5842619999999954, -1.1087320000000001 52.5842039999999997, -1.1087210000000001 52.5842030000000022, -1.1085039999999999 52.5844299999999976, -1.1077379999999999 52.5841649999999845, -1.1072200000000001 52.5845150000000103, -1.1072020000000000 52.5845370000000116, -1.1070040000000001 52.5846879999999999, -1.1069450000000001 52.5847610000000145, -1.1072930000000001 52.5848100000000045, -1.1073590000000000 52.5848349999999982, -1.1075640000000000 52.5848689999999976, -1.1074990000000000 52.5849819999999966, -1.1080090000000000 52.5850920000000031, -1.1079159999999999 52.5852519999999970, -1.1076880000000000 52.5852030000000070, -1.1075980000000001 52.5853860000000282, -1.1075770000000000 52.5853820000000098, -1.1074040000000001 52.5857520000000136, -1.1073960000000000 52.5857499999999902, -1.1073720000000000 52.5857929999999953, -1.1073800000000000 52.5857949999999903, -1.1073530000000000 52.5858619999999917, -1.1072590000000000 52.5858479999999986, -1.1072439999999999 52.5859319999999855, -1.1065490000000000 52.5858630000000176, -1.1066750000000001 52.5853789999999890, -1.1064710000000000 52.5853369999999813, -1.1064700000000001 52.5853420000000256, -1.1061179999999999 52.5853189999999984, -1.1061289999999999 52.5852019999999811, -1.1054940000000000 52.5851710000000026, -1.1054910000000000 52.5851859999999931, -1.1051599999999999 52.5851890000000139, -1.1046069999999999 52.5852199999999925, -1.1044670000000001 52.5852769999999907, -1.1043360000000000 52.5852399999999989, -1.1042220000000000 52.5853689999999858, -1.1041770000000000 52.5853769999999940, -1.1041090000000000 52.5854539999999986, -1.1040880000000000 52.5854879999999980, -1.1040449999999999 52.5856160000000159, -1.1040300000000001 52.5856400000000406, -1.1038120000000000 52.5856100000000026, -1.1037060000000001 52.5855809999999906, -1.1037230000000000 52.5855430000000013, -1.1037640000000000 52.5855080000000044, -1.1038129999999999 52.5853999999999928, -1.1038200000000000 52.5853510000000028, -1.1038129999999999 52.5853500000000054, -1.1038280000000000 52.5852869999999655, -1.1038380000000001 52.5852600000000052, -1.1038559999999999 52.5852620000000002, -1.1039289999999999 52.5849839999999915, -1.1043980000000000 52.5850170000000219, -1.1048800000000001 52.5849790000000041, -1.1049030000000000 52.5849640000000420, -1.1049389999999999 52.5848269999999900, -1.1049220000000000 52.5848050000000171, -1.1036670000000000 52.5846960000000081, -1.1035690000000000 52.5847109999999986, -1.1035130000000000 52.5847420000000056, -1.1034839999999999 52.5847770000000310, -1.1034370000000000 52.5847770000000310, -1.1034610000000000 52.5846559999999954, -1.1035120000000000 52.5841770000000110, -1.1034940000000000 52.5839259999999911, -1.1034200000000001 52.5838540000000023, -1.1033800000000000 52.5835209999999904, -1.1033340000000000 52.5832920000000001, -1.1030910000000000 52.5823570000000018, -1.1036680000000001 52.5821579999999926, -1.1036170000000001 52.5821249999999907, -1.1035020000000000 52.5820130000000177, -1.1034660000000001 52.5818830000000048, -1.1035619999999999 52.5818569999999852, -1.1035649999999999 52.5816229999999791, -1.1037450000000000 52.5816409999999905, -1.1041200000000000 52.5816609999999969, -1.1041520000000000 52.5816680000000360, -1.1041760000000000 52.5816839999999672, -1.1042940000000001 52.5817079999999919, -1.1043930000000000 52.5816760000000158, -1.1046039999999999 52.5816409999999905, -1.1048009999999999 52.5816330000000107, -1.1050320000000000 52.5816470000000038, -1.1076200000000000 52.5821159999999850, -1.1076120000000000 52.5821359999999913, -1.1076200000000000 52.5821369999999888, -1.1075459999999999 52.5822909999999979, -1.1073729999999999 52.5825829999999996, -1.1074580000000001 52.5825990000000161, -1.1074820000000001 52.5825949999999978, -1.1079660000000000 52.5826999999999884, -1.1079710000000000 52.5826879999999903, -1.1089810000000000 52.5829869999999744, -1.1090059999999999 52.5830020000000218, -1.1090380000000000 52.5830520000000092, -1.1090329999999999 52.5830839999999853, -1.1090130000000000 52.5831090000000074, -1.1091050000000000 52.5832110000000057, -1.1093020000000000 52.5833490000000268, -1.1093880000000000 52.5834000000000117, -1.1096050000000000 52.5834659999999587, -1.1097120000000000 52.5833330000000103, -1.1103229999999999 52.5835089999999923, -1.1103689999999999 52.5834509999999966, -1.1104480000000001 52.5834959999999967, -1.1106819999999999 52.5835309999999936, -1.1108380000000000 52.5833210000000122, -1.1109670000000000 52.5831860000000120, -1.1109439999999999 52.5831790000000296, -1.1110000000000000 52.5831099999999765, -1.1109910000000001 52.5831069999999841, -1.1110640000000001 52.5830249999999921, -1.1112210000000000 52.5830730000000131, -1.1112949999999999 52.5830720000000156, -1.1113990000000000 52.5831129999999973))'
# Polygon made after applying make_multipolygon to valid one
# wkt = 'MULTIPOLYGON (((-1.1113990000000000 52.5831129999999973, -1.1115580000000000 52.5831609999999898, -1.1117349999999999 52.5831999999999482, -1.1118690000000000 52.5832189999999855, -1.1122369999999999 52.5832450000000051, -1.1125650000000000 52.5832280000000196, -1.1125799999999999 52.5832199999999546, -1.1126659999999999 52.5832619999999622, -1.1126890000000000 52.5832589999999982, -1.1127419999999999 52.5833299999999895, -1.1128819999999999 52.5835779999999886, -1.1129410000000000 52.5837740000000053, -1.1126879999999999 52.5844700000000387, -1.1124280000000000 52.5850189999999884, -1.1118120000000000 52.5849759999999833, -1.1117020000000000 52.5852780000000166, -1.1113329999999999 52.5852399999999989, -1.1114809999999999 52.5847679999999968, -1.1105680000000000 52.5846540000000289, -1.1099690000000000 52.5845699999999852, -1.1098500000000000 52.5845300000000009, -1.1098460000000001 52.5845410000000015, -1.1097310000000000 52.5844749999999976, -1.1095619999999999 52.5844299999999976, -1.1095560000000000 52.5844380000000058, -1.1089650000000000 52.5842619999999954, -1.1087320000000001 52.5842039999999997, -1.1087210000000001 52.5842030000000022, -1.1085039999999999 52.5844299999999976, -1.1077379999999999 52.5841649999999845, -1.1072200000000001 52.5845150000000103, -1.1072020000000000 52.5845370000000116, -1.1070040000000001 52.5846879999999999, -1.1069450000000001 52.5847610000000145, -1.1072930000000001 52.5848100000000045, -1.1073590000000000 52.5848349999999982, -1.1075640000000000 52.5848689999999976, -1.1074990000000000 52.5849819999999966, -1.1080090000000000 52.5850920000000031, -1.1079159999999999 52.5852519999999970, -1.1076880000000000 52.5852030000000070, -1.1075980000000001 52.5853860000000282, -1.1075770000000000 52.5853820000000098, -1.1074040000000001 52.5857520000000136, -1.1073960000000000 52.5857499999999902, -1.1073720000000000 52.5857929999999953, -1.1073800000000000 52.5857949999999903, -1.1073530000000000 52.5858619999999917, -1.1072590000000000 52.5858479999999986, -1.1072439999999999 52.5859319999999855, -1.1065490000000000 52.5858630000000176, -1.1066750000000001 52.5853789999999890, -1.1064710000000000 52.5853369999999813, -1.1064700000000001 52.5853420000000256, -1.1061179999999999 52.5853189999999984, -1.1061289999999999 52.5852019999999811, -1.1054940000000000 52.5851710000000026, -1.1054910000000000 52.5851859999999931, -1.1051599999999999 52.5851890000000139, -1.1046069999999999 52.5852199999999925, -1.1044670000000001 52.5852769999999907, -1.1043360000000000 52.5852399999999989, -1.1042220000000000 52.5853689999999858, -1.1041770000000000 52.5853769999999940, -1.1041090000000000 52.5854539999999986, -1.1040880000000000 52.5854879999999980, -1.1040449999999999 52.5856160000000159, -1.1040300000000001 52.5856400000000406, -1.1038120000000000 52.5856100000000026, -1.1037060000000001 52.5855809999999906, -1.1037230000000000 52.5855430000000013, -1.1037640000000000 52.5855080000000044, -1.1038129999999999 52.5853999999999928, -1.1038200000000000 52.5853510000000028, -1.1038129999999999 52.5853500000000054, -1.1038280000000000 52.5852869999999655, -1.1038380000000001 52.5852600000000052, -1.1038559999999999 52.5852620000000002, -1.1039289999999999 52.5849839999999915, -1.1043980000000000 52.5850170000000219, -1.1048800000000001 52.5849790000000041, -1.1049030000000000 52.5849640000000420, -1.1049389999999999 52.5848269999999900, -1.1049220000000000 52.5848050000000171, -1.1036670000000000 52.5846960000000081, -1.1035690000000000 52.5847109999999986, -1.1035130000000000 52.5847420000000056, -1.1034839999999999 52.5847770000000310, -1.1034370000000000 52.5847770000000310, -1.1034610000000000 52.5846559999999954, -1.1035120000000000 52.5841770000000110, -1.1034940000000000 52.5839259999999911, -1.1034200000000001 52.5838540000000023, -1.1033800000000000 52.5835209999999904, -1.1033340000000000 52.5832920000000001, -1.1030910000000000 52.5823570000000018, -1.1036680000000001 52.5821579999999926, -1.1036170000000001 52.5821249999999907, -1.1035020000000000 52.5820130000000177, -1.1034660000000001 52.5818830000000048, -1.1035619999999999 52.5818569999999852, -1.1035649999999999 52.5816229999999791, -1.1037450000000000 52.5816409999999905, -1.1041200000000000 52.5816609999999969, -1.1041520000000000 52.5816680000000360, -1.1041760000000000 52.5816839999999672, -1.1042940000000001 52.5817079999999919, -1.1043930000000000 52.5816760000000158, -1.1046039999999999 52.5816409999999905, -1.1048009999999999 52.5816330000000107, -1.1050320000000000 52.5816470000000038, -1.1076200000000000 52.5821159999999850, -1.1076120000000000 52.5821359999999913, -1.1076200000000000 52.5821369999999888, -1.1075459999999999 52.5822909999999979, -1.1073729999999999 52.5825829999999996, -1.1074580000000001 52.5825990000000161, -1.1074820000000001 52.5825949999999978, -1.1079660000000000 52.5826999999999884, -1.1079710000000000 52.5826879999999903, -1.1089810000000000 52.5829869999999744, -1.1090059999999999 52.5830020000000218, -1.1090380000000000 52.5830520000000092, -1.1090329999999999 52.5830839999999853, -1.1090130000000000 52.5831090000000074, -1.1091050000000000 52.5832110000000057, -1.1093020000000000 52.5833490000000268, -1.1093880000000000 52.5834000000000117, -1.1096050000000000 52.5834659999999587, -1.1097120000000000 52.5833330000000103, -1.1103229999999999 52.5835089999999923, -1.1103689999999999 52.5834509999999966, -1.1104480000000001 52.5834959999999967, -1.1106819999999999 52.5835309999999936, -1.1108380000000000 52.5833210000000122, -1.1109670000000000 52.5831860000000120, -1.1109439999999999 52.5831790000000296, -1.1110000000000000 52.5831099999999765, -1.1109910000000001 52.5831069999999841, -1.1110640000000001 52.5830249999999921, -1.1112210000000000 52.5830730000000131, -1.1112949999999999 52.5830720000000156, -1.1113990000000000 52.5831129999999973)))'

In [4]:
geometry_wkt = shapely.wkt.loads(wkt)

In [5]:
# display wherethe invalidity is and it's reason
print(f"Valid: {geometry_wkt.is_valid} Reason: {explain_validity(geometry_wkt)}")

Valid: False Reason: Too few points in geometry component[-1.111304 52.583072]


In [6]:
# load invalid point in to look at where error is
point_wkt = shapely.wkt.loads('POINT(-1.111304 52.583072)')
# use custom plotting function to plot the geometries
plot_map(geometry_wkt,point_wkt)

From exploring the above plot the invalidity is caused by an additional line segment, this is defined as a polygon and hence will be invalid. However is we run our parser on the point again it will be removed as shown below.

OldWktDataType is a copy of the WktDataType taken at the time of writing this

In [7]:
issue_log = IssueLog(dataset='conservation-area', resource='f8aaf696417923bac77e7c2d7fedeff24a9c4be81cf33e4e7a800fc8d70c2c0c')
reprocessed_geometry_wkt = shapely.wkt.loads(OldWktDataType().normalise(value=wkt, issues=issue_log))
reprocessed_geometry_wkt.is_valid

True

In [8]:
plot_map(reprocessed_geometry_wkt)

This shows the pipeline can be used to fix the invalid polygon, however it does not show the root cause of the invalid geometry and why it isn't currently being fixed in the pipeline.

### Finding the cause of the invalidity
One solution could be to run points through the normalisation stage multiple times however it's worth identifying what's causing the issue to confirm this is an acceptable solution. 


the next step is to run the parsing stage of the pipline on the raw wkt provided to investigate why the issues are not being removed. the extract converted resource note book can be used to extract which ever resource the fact belongs to as a csv.

datasette contains the relevant fact/resource information required to use the notebook. This could be improved in future to directly extract the value.

In [9]:
raw_wkt = "MULTIPOLYGON (((460800.6324 299022.9088,460807.3001 299021.65,460807.2798 299021.5298,460808.3501 299021.2899,460815.4001 299019.7001,460815.3601 299019.5099,460818.95 299018.4,460820.9601 299017.5501,460822.62 299016.5899,460822.5401 299015.6899,460822.4799 299014.7201,460821.5102 299012.4101,460820.25 299010.7099,460819.3399 299009.6099,460819.19 299009.33,460819.0999 299009.1499,460819.02 299008.9699,460818.8799 299008.5901,460818.82 299008.3999,460818.7801 299008.21,460817.1801 299001.8,460816.67 299000.0,460815.6366 298996.3699,460815.2191 298990.989,460815.7301 298990.8799,460814.7699 298983.8499,460814.5701 298983.88,460814.1301 298980.78,460812.9201 298980.9698,460811.97 298974.4699,460809.64 298959.94,460810.0156 298959.8867,460808.3272 298950.0235,460790.1299 298951.9199,460789.5301 298951.9801,460789.5275 298951.957,460782.9299 298952.28,460778.2499 298952.7701,460778.24 298953.16,460776.5599 298953.33,460748.35 298949.29,460743.9001 298948.6499,460743.7001 298948.5501,460743.1 298948.0999,460742.6501 298947.5001,460742.55 298947.25,460742.4 298946.9501,460742.3499 298946.6499,460742.3002 298946.4001,460742.2501 298946.0999,460742.2501 298945.45,460742.07 298944.2701,460740.82 298936.1499,460740.1499 298931.7501,460740.2789 298931.7345,460741.3101 298929.2726,460745.8101 298928.7001,460796.2998 298922.2499,460824.6502 298918.4999,460825.0499 298918.4001,460825.8 298918.2999,460826.3801 298918.26,460826.4999 298918.2501,460827.25 298918.2501,460828.0 298918.2999,460828.7501 298918.4001,460830.15 298918.7,460830.8499 298918.9,460831.5499 298919.1999,460832.2001 298919.5001,460832.4499 298919.6,460833.1001 298919.9499,460833.75 298920.4001,460834.9498 298921.3001,460835.4999 298921.8501,460836.5 298922.9501,460836.85 298923.4,460837.6501 298924.7,460838.0001 298925.3499,460838.3 298926.0499,460838.5501 298926.7502,460838.7622 298927.3867,460841.99 298927.3604,460840.5001 298913.9499,460837.75 298860.6001,460839.2999 298832.75,460844.4 298824.7001,460846.3499 298799.6001,460847.5929 298787.7912,460848.2237 298782.9764,460851.0001 298762.3,460851.5999 298758.7998,460856.0999 298733.6998,460868.7702 298658.5451,460829.9376 298635.8588,460833.4047 298632.2097,460839.24 298622.8899,460839.4901 298623.0499,460839.8201 298622.5598,460840.8501 298621.0601,460841.37 298619.8599,460842.6301 298613.7701,460843.81 298606.4401,460843.9701 298605.4799,460837.53 298602.4899,460837.6099 298591.9698,460837.31 298591.9698,460837.3499 298578.3799,460837.6872 298576.4826,460825.3999 298578.3501,460801.8501 298580.3,460801.5499 298580.2499,460801.1501 298580.1999,460800.8001 298580.1501,460800.4001 298580.1999,460800.0 298580.1999,460799.65 298580.3,460799.2499 298580.3999,460798.9 298580.5001,460798.2 298580.8,460797.85 298581.0,460797.5501 298581.2,460797.2499 298581.4501,460797.0001 298581.7,460796.2501 298582.5999,460796.1848 298582.7527,460788.1305 298585.3202,460787.8499 298585.0999,460786.3501 298584.1,460785.6001 298583.7,460784.3 298582.9499,460781.6 298581.75,460781.4501 298581.7,460779.6 298581.0501,460777.6998 298580.45,460775.7499 298579.95,460773.8501 298579.4,460767.2001 298577.6501,460764.8999 298577.3002,460762.6501 298577.05,460761.4001 298576.9499,460757.4999 298576.65,460753.8999 298576.5501,460750.9 298576.65,460747.9499 298576.9001,460746.2999 298577.1001,460738.25 298577.8499,460725.7192 298581.2171,460716.3484 298583.7402,460689.5 298591.2499,460660.95 298599.5499,460632.8 298607.6001,460621.2199 298610.9601,460620.08 298611.2901,460616.59 298612.3001,460599.0 298617.3999,460568.3 298626.15,460562.2273 298627.8628,460562.7514 298630.0835,460562.2001 298630.2499,460565.3699 298641.6401,460566.98 298647.43,460568.7502 298652.5,460571.11 298659.2599,460576.8098 298675.58,460576.8631 298675.7324,460578.2982 298680.04,460576.0817 298680.7327,460574.3502 298680.9501,460574.38 298681.1901,460572.5201 298681.7401,460572.3 298681.8,460572.1999 298681.6999,460571.9501 298681.4,460571.8001 298681.3499,460571.6001 298681.25,460571.0501 298681.25,460570.9001 298681.3001,460570.7001 298681.4,460559.0499 298685.2,460537.95 298692.6,460537.95 298692.5902,460537.6501 298691.2501,460533.7093 298693.1002,460529.928 298694.8764,460499.9999 298708.9299,460483.25 298716.7001,460468.8 298723.65,460468.5001 298723.85,460467.9 298724.3499,460467.6001 298724.6501,460467.35 298724.95,460467.0999 298725.3,460466.4668 298726.4698,460465.1357 298729.3932,460465.0501 298729.65,460464.8501 298730.8499,460464.8 298731.45,460464.8 298732.6499,460464.8501 298732.7501,460464.9499 298733.5499,460465.1499 298734.35,460465.4502 298735.1499,460465.75 298735.8999,460466.1501 298736.7,460466.4472 298737.2196,460462.3 298744.8,460460.0759 298748.4681,460456.6601 298752.11,460456.4699 298752.3201,460455.6501 298753.19,460449.1501 298760.0999,460449.45 298760.4001,460446.5699 298763.6,460440.7001 298769.2501,460434.9698 298771.88,460430.4699 298774.0899,460425.8501 298776.4,460418.8001 298761.5001,460413.6201 298763.84,460409.8299 298765.5601,460405.5699 298767.4799,460400.9799 298769.5602,460397.3501 298771.2001,460397.19 298771.2701,460395.5 298772.05,460394.9545 298772.301,460392.5268 298773.4188,460389.46 298774.8301,460380.3 298779.0499,460380.1599 298779.1199,460379.3601 298779.5,460377.1901 298780.5402,460374.0999 298774.05,460369.0401 298778.4599,460368.75 298779.0499,460359.4686 298781.2268,460355.9152 298782.059,460354.1501 298782.4701,460352.7901 298782.79,460351.7899 298780.3401,460350.6801 298777.72,460349.4798 298774.8799,460349.4 298774.6999,460343.3701 298761.2101,460342.51 298759.27,460339.3 298753.2499,460333.9999 298744.1,460335.5501 298743.35,460332.9499 298737.8999,460331.8499 298735.6,460332.4288 298735.3223,460327.6314 298726.1886,460326.7324 298726.6213,460316.9266 298731.3933,460311.2699 298731.127,460311.9181 298731.223,460304.7879 298735.6593,460304.7854 298735.6545,460294.8531 298740.3254,460293.9499 298740.9001,460290.4999 298742.1501,460289.85 298742.3999,460286.8999 298743.4999,460283.9501 298744.45,460281.9 298745.0501,460278.8 298745.7998,460272.85 298747.0001,460269.4 298747.7,460264.9999 298748.3499,460256.8999 298749.0999,460247.8413 298749.6477,460225.6683 298747.5139,460225.6499 298747.5,460225.2999 298747.2001,460224.65 298746.5499,460218.7939 298751.0771,460218.7701 298751.2001,460217.219 298750.8365,460213.5248 298758.641,460212.0881 298762.6566,460209.7721 298769.1293,460203.7 298786.0999,460202.7801 298790.76,460201.0701 298799.3801,460199.4099 298807.9099,460201.5 298816.9999,460201.6001 298817.4501,460203.0701 298824.6199,460204.8999 298833.4899,460206.4 298840.7999,460206.4999 298841.29,460208.3899 298850.3901,460210.09 298858.5801,460211.8 298866.8499,460212.6499 298870.9498,460213.2801 298874.0699,460215.6 298885.5,460219.0748 298899.3426,460221.29 298907.35,460222.7901 298912.75,460223.7 298916.0501,460232.4501 298946.7998,460238.46 298946.2701,460238.6901 298946.2501,460241.27 298946.0301,460244.4201 298945.7499,460274.3 298942.4999,460281.2498 298976.0001,460281.2901 298976.19,460281.96 298976.0901,460297.0899 298973.7699,460298.0001 298973.6301,460306.3498 298972.35,460299.4501 298933.6499,460296.9799 298919.6799,460305.679 298918.0209,460313.9835 298916.4318,460321.0899 298915.0699,460327.75 298913.7999,460330.7499 298913.2201,460340.1001 298911.43,460349.6599 298909.59,460359.0 298907.7999,460358.9699 298907.61,460363.3101 298906.6799,460363.9001 298906.5499,460368.8499 298905.4901,460371.8799 298904.8399,460373.3999 298904.5099,460378.2001 298903.4799,460392.4701 298900.4202,460399.7299 298898.9499,460407.84 298894.61,460408.0829 298895.7465,460415.9482 298888.5243,460418.6 298887.7499,460418.5924 298887.7213,460427.4595 298883.6389,460427.8399 298884.53,460428.0999 298884.4,460443.6899 298877.0301,460446.7199 298875.6001,460447.6899 298875.1401,460451.25 298873.4501,460453.8 298872.2499,460455.62 298871.3899,460458.02 298870.2499,460459.9399 298869.3401,460463.97 298867.4301,460467.9501 298865.5499,460468.1501 298865.45,460469.2499 298864.9999,460472.6 298863.8,460473.5301 298863.47,460474.3002 298863.1999,460478.68 298861.3799,460483.5499 298859.3501,460484.0001 298859.2001,460484.7501 298859.1,460486.7999 298862.8001,460499.15 298884.5499,460499.9999 298884.1499,460506.2302 298880.71,460507.7201 298879.8801,460515.07 298875.8201,460517.88 298874.2699,460521.9701 298872.0099,460522.6301 298871.6501,460523.7799 298871.0101,460550.5299 298856.2301,460551.4001 298855.7501,460553.0701 298857.6401,460556.3601 298861.3901,460557.85 298863.08,460558.44 298863.7499,460560.81 298866.44,460562.9999 298868.92,460566.76 298873.1898,460568.2401 298874.87,460569.2701 298876.0401,460569.6799 298876.5001,460574.4101 298881.8801,460575.11 298882.6799,460577.39 298885.2699,460579.3501 298887.4899,460580.64 298888.9599,460584.3899 298893.2199,460586.0 298895.05,460587.2988 298897.2513,460587.2072 298897.5769,460600.1092 298914.1623,460600.3999 298914.55,460600.7001 298915.0001,460600.9499 298915.45,460602.49 298918.0102,460603.28 298919.9801,460604.1001 298922.0701,460604.2906 298922.694,460580.6593 298927.8213,460579.7701 298928.6899,460578.07 298929.7801,460576.1901 298930.5201,460574.1 298931.0501,460574.0699 298930.9401,460562.2299 298934.1799,460565.6599 298944.2799,460566.5 298946.7501,460536.6401 298956.9299,460531.7499 298958.6,460537.8501 298976.5,460553.3501 298971.2199,460559.1999 298991.65,460560.6546 298991.2391,460564.506 299005.7566,460571.86 299032.48,460572.4107 299032.3123,460573.9904 299037.1607,460573.4001 299037.2999,460575.15 299044.8501,460581.5501 299043.3199,460582.1001 299046.05,460582.11 299046.2,460582.42 299052.0,460582.4501 299052.6499,460587.7001 299051.8501,460598.7299 299050.1801,460605.13 299049.23,460606.6368 299049.0033,460622.1672 299046.697,460628.25 299045.7999,460629.4099 299045.63,460629.6001 299045.5999,460628.9699 299041.4499,460628.5699 299038.82,460627.8001 299033.6999,460626.3999 299024.4502,460626.21 299021.9499,460625.3801 299017.63,460624.9001 299013.0,460624.2201 299008.32,460623.1001 299000.0,460621.7611 298991.6366,460635.6512 298987.2051,460635.7001 298987.6601,460643.5201 298986.82,460643.7899 298986.82,460644.6499 298986.8,460648.54 298986.4399,460653.8401 298985.9599,460659.5999 298985.4299,460659.0277 298972.4305,460680.009 298970.6778,460702.0499 298969.5001,460702.2499 298971.1999,460707.2501 298970.9501,460713.1421 298971.5216,460724.6959 298971.7746,460724.7501 298972.2001,460738.14 298973.5201,460742.6799 298973.9699,460762.1 298975.6501,460771.5001 298982.1501,460777.45 298979.5202,460780.4001 298978.15,460785.9 298988.95,460787.951 298992.6378,460791.0352 298993.5755,460794.0598 298999.1901,460794.5499 299000.0,460795.52 299002.15,460796.1499 299003.7601,460796.88 299005.9199,460797.1501 299007.2998,460799.625 299020.2517,460800.6324 299022.9088)))"

In [10]:
geometry_raw_wkt = shapely.wkt.loads(raw_wkt)
geometry_raw_wkt.is_valid

True

The above shows that the origin WKT is valid, so we're introducting invalidity in the parsing process as the final output is invalid (shown by the original wkt examined). now to look through step by step to see which line of code 

In [11]:
# parse and check it's valid
geometry_raw_wkt,issue = parse_wkt(raw_wkt)

In [12]:
geometry_raw_wkt.is_valid

True

In [13]:
# dump out the geometry and re-load it
geometry_raw_wkt = shapely.wkt.loads(dump_wkt(geometry_raw_wkt))
geometry_raw_wkt.is_valid

False

This shows that what's introducing the invalidity is reducing the given precision of the point. this is likely to be made worse by the fact that the point has to be transformed to our standard co-ordinates. However at this point the next step still normalise the wkt, so this should be fixed

In [14]:
geometry_raw_wkt,issue = normalise_geometry(geometry_raw_wkt)
geometry_raw_wkt.is_valid

True

In [15]:
geometry_raw_wkt = shapely.wkt.loads(dump_wkt(geometry_raw_wkt))
geometry_raw_wkt.is_valid

False

This shows that despite validating the shape when it is agained dumped out at the required precision further invalidations are being introduced. It's also worth noting that the make_valid doesn;t return it to it's original shape and instead just attempts to make what ir provided valid.

In [16]:
geometry_raw_wkt,issue = normalise_geometry(geometry_raw_wkt)
geometry_raw_wkt = shapely.wkt.loads(dump_wkt(geometry_raw_wkt))
geometry_raw_wkt.is_valid

True

the above proves that repeating the process should correct these wkts, doing a bit of research there appears to be very little support for reducing precision in shapely, mainly due to the lack of support for it in GEOS which shapely is based on. At a later date it could be worth spiking the possibility of moving these transformations to either a spatialite or postgis database or investigating other tools in python.

### Editing the normalisation process

In order to fix the above error we'll address the issue in the pipeline by iterating on the dumping/normalising process. This work can be foun in the new_wkt.py file.

In [17]:
raw_wkt = "MULTIPOLYGON (((460800.6324 299022.9088,460807.3001 299021.65,460807.2798 299021.5298,460808.3501 299021.2899,460815.4001 299019.7001,460815.3601 299019.5099,460818.95 299018.4,460820.9601 299017.5501,460822.62 299016.5899,460822.5401 299015.6899,460822.4799 299014.7201,460821.5102 299012.4101,460820.25 299010.7099,460819.3399 299009.6099,460819.19 299009.33,460819.0999 299009.1499,460819.02 299008.9699,460818.8799 299008.5901,460818.82 299008.3999,460818.7801 299008.21,460817.1801 299001.8,460816.67 299000.0,460815.6366 298996.3699,460815.2191 298990.989,460815.7301 298990.8799,460814.7699 298983.8499,460814.5701 298983.88,460814.1301 298980.78,460812.9201 298980.9698,460811.97 298974.4699,460809.64 298959.94,460810.0156 298959.8867,460808.3272 298950.0235,460790.1299 298951.9199,460789.5301 298951.9801,460789.5275 298951.957,460782.9299 298952.28,460778.2499 298952.7701,460778.24 298953.16,460776.5599 298953.33,460748.35 298949.29,460743.9001 298948.6499,460743.7001 298948.5501,460743.1 298948.0999,460742.6501 298947.5001,460742.55 298947.25,460742.4 298946.9501,460742.3499 298946.6499,460742.3002 298946.4001,460742.2501 298946.0999,460742.2501 298945.45,460742.07 298944.2701,460740.82 298936.1499,460740.1499 298931.7501,460740.2789 298931.7345,460741.3101 298929.2726,460745.8101 298928.7001,460796.2998 298922.2499,460824.6502 298918.4999,460825.0499 298918.4001,460825.8 298918.2999,460826.3801 298918.26,460826.4999 298918.2501,460827.25 298918.2501,460828.0 298918.2999,460828.7501 298918.4001,460830.15 298918.7,460830.8499 298918.9,460831.5499 298919.1999,460832.2001 298919.5001,460832.4499 298919.6,460833.1001 298919.9499,460833.75 298920.4001,460834.9498 298921.3001,460835.4999 298921.8501,460836.5 298922.9501,460836.85 298923.4,460837.6501 298924.7,460838.0001 298925.3499,460838.3 298926.0499,460838.5501 298926.7502,460838.7622 298927.3867,460841.99 298927.3604,460840.5001 298913.9499,460837.75 298860.6001,460839.2999 298832.75,460844.4 298824.7001,460846.3499 298799.6001,460847.5929 298787.7912,460848.2237 298782.9764,460851.0001 298762.3,460851.5999 298758.7998,460856.0999 298733.6998,460868.7702 298658.5451,460829.9376 298635.8588,460833.4047 298632.2097,460839.24 298622.8899,460839.4901 298623.0499,460839.8201 298622.5598,460840.8501 298621.0601,460841.37 298619.8599,460842.6301 298613.7701,460843.81 298606.4401,460843.9701 298605.4799,460837.53 298602.4899,460837.6099 298591.9698,460837.31 298591.9698,460837.3499 298578.3799,460837.6872 298576.4826,460825.3999 298578.3501,460801.8501 298580.3,460801.5499 298580.2499,460801.1501 298580.1999,460800.8001 298580.1501,460800.4001 298580.1999,460800.0 298580.1999,460799.65 298580.3,460799.2499 298580.3999,460798.9 298580.5001,460798.2 298580.8,460797.85 298581.0,460797.5501 298581.2,460797.2499 298581.4501,460797.0001 298581.7,460796.2501 298582.5999,460796.1848 298582.7527,460788.1305 298585.3202,460787.8499 298585.0999,460786.3501 298584.1,460785.6001 298583.7,460784.3 298582.9499,460781.6 298581.75,460781.4501 298581.7,460779.6 298581.0501,460777.6998 298580.45,460775.7499 298579.95,460773.8501 298579.4,460767.2001 298577.6501,460764.8999 298577.3002,460762.6501 298577.05,460761.4001 298576.9499,460757.4999 298576.65,460753.8999 298576.5501,460750.9 298576.65,460747.9499 298576.9001,460746.2999 298577.1001,460738.25 298577.8499,460725.7192 298581.2171,460716.3484 298583.7402,460689.5 298591.2499,460660.95 298599.5499,460632.8 298607.6001,460621.2199 298610.9601,460620.08 298611.2901,460616.59 298612.3001,460599.0 298617.3999,460568.3 298626.15,460562.2273 298627.8628,460562.7514 298630.0835,460562.2001 298630.2499,460565.3699 298641.6401,460566.98 298647.43,460568.7502 298652.5,460571.11 298659.2599,460576.8098 298675.58,460576.8631 298675.7324,460578.2982 298680.04,460576.0817 298680.7327,460574.3502 298680.9501,460574.38 298681.1901,460572.5201 298681.7401,460572.3 298681.8,460572.1999 298681.6999,460571.9501 298681.4,460571.8001 298681.3499,460571.6001 298681.25,460571.0501 298681.25,460570.9001 298681.3001,460570.7001 298681.4,460559.0499 298685.2,460537.95 298692.6,460537.95 298692.5902,460537.6501 298691.2501,460533.7093 298693.1002,460529.928 298694.8764,460499.9999 298708.9299,460483.25 298716.7001,460468.8 298723.65,460468.5001 298723.85,460467.9 298724.3499,460467.6001 298724.6501,460467.35 298724.95,460467.0999 298725.3,460466.4668 298726.4698,460465.1357 298729.3932,460465.0501 298729.65,460464.8501 298730.8499,460464.8 298731.45,460464.8 298732.6499,460464.8501 298732.7501,460464.9499 298733.5499,460465.1499 298734.35,460465.4502 298735.1499,460465.75 298735.8999,460466.1501 298736.7,460466.4472 298737.2196,460462.3 298744.8,460460.0759 298748.4681,460456.6601 298752.11,460456.4699 298752.3201,460455.6501 298753.19,460449.1501 298760.0999,460449.45 298760.4001,460446.5699 298763.6,460440.7001 298769.2501,460434.9698 298771.88,460430.4699 298774.0899,460425.8501 298776.4,460418.8001 298761.5001,460413.6201 298763.84,460409.8299 298765.5601,460405.5699 298767.4799,460400.9799 298769.5602,460397.3501 298771.2001,460397.19 298771.2701,460395.5 298772.05,460394.9545 298772.301,460392.5268 298773.4188,460389.46 298774.8301,460380.3 298779.0499,460380.1599 298779.1199,460379.3601 298779.5,460377.1901 298780.5402,460374.0999 298774.05,460369.0401 298778.4599,460368.75 298779.0499,460359.4686 298781.2268,460355.9152 298782.059,460354.1501 298782.4701,460352.7901 298782.79,460351.7899 298780.3401,460350.6801 298777.72,460349.4798 298774.8799,460349.4 298774.6999,460343.3701 298761.2101,460342.51 298759.27,460339.3 298753.2499,460333.9999 298744.1,460335.5501 298743.35,460332.9499 298737.8999,460331.8499 298735.6,460332.4288 298735.3223,460327.6314 298726.1886,460326.7324 298726.6213,460316.9266 298731.3933,460311.2699 298731.127,460311.9181 298731.223,460304.7879 298735.6593,460304.7854 298735.6545,460294.8531 298740.3254,460293.9499 298740.9001,460290.4999 298742.1501,460289.85 298742.3999,460286.8999 298743.4999,460283.9501 298744.45,460281.9 298745.0501,460278.8 298745.7998,460272.85 298747.0001,460269.4 298747.7,460264.9999 298748.3499,460256.8999 298749.0999,460247.8413 298749.6477,460225.6683 298747.5139,460225.6499 298747.5,460225.2999 298747.2001,460224.65 298746.5499,460218.7939 298751.0771,460218.7701 298751.2001,460217.219 298750.8365,460213.5248 298758.641,460212.0881 298762.6566,460209.7721 298769.1293,460203.7 298786.0999,460202.7801 298790.76,460201.0701 298799.3801,460199.4099 298807.9099,460201.5 298816.9999,460201.6001 298817.4501,460203.0701 298824.6199,460204.8999 298833.4899,460206.4 298840.7999,460206.4999 298841.29,460208.3899 298850.3901,460210.09 298858.5801,460211.8 298866.8499,460212.6499 298870.9498,460213.2801 298874.0699,460215.6 298885.5,460219.0748 298899.3426,460221.29 298907.35,460222.7901 298912.75,460223.7 298916.0501,460232.4501 298946.7998,460238.46 298946.2701,460238.6901 298946.2501,460241.27 298946.0301,460244.4201 298945.7499,460274.3 298942.4999,460281.2498 298976.0001,460281.2901 298976.19,460281.96 298976.0901,460297.0899 298973.7699,460298.0001 298973.6301,460306.3498 298972.35,460299.4501 298933.6499,460296.9799 298919.6799,460305.679 298918.0209,460313.9835 298916.4318,460321.0899 298915.0699,460327.75 298913.7999,460330.7499 298913.2201,460340.1001 298911.43,460349.6599 298909.59,460359.0 298907.7999,460358.9699 298907.61,460363.3101 298906.6799,460363.9001 298906.5499,460368.8499 298905.4901,460371.8799 298904.8399,460373.3999 298904.5099,460378.2001 298903.4799,460392.4701 298900.4202,460399.7299 298898.9499,460407.84 298894.61,460408.0829 298895.7465,460415.9482 298888.5243,460418.6 298887.7499,460418.5924 298887.7213,460427.4595 298883.6389,460427.8399 298884.53,460428.0999 298884.4,460443.6899 298877.0301,460446.7199 298875.6001,460447.6899 298875.1401,460451.25 298873.4501,460453.8 298872.2499,460455.62 298871.3899,460458.02 298870.2499,460459.9399 298869.3401,460463.97 298867.4301,460467.9501 298865.5499,460468.1501 298865.45,460469.2499 298864.9999,460472.6 298863.8,460473.5301 298863.47,460474.3002 298863.1999,460478.68 298861.3799,460483.5499 298859.3501,460484.0001 298859.2001,460484.7501 298859.1,460486.7999 298862.8001,460499.15 298884.5499,460499.9999 298884.1499,460506.2302 298880.71,460507.7201 298879.8801,460515.07 298875.8201,460517.88 298874.2699,460521.9701 298872.0099,460522.6301 298871.6501,460523.7799 298871.0101,460550.5299 298856.2301,460551.4001 298855.7501,460553.0701 298857.6401,460556.3601 298861.3901,460557.85 298863.08,460558.44 298863.7499,460560.81 298866.44,460562.9999 298868.92,460566.76 298873.1898,460568.2401 298874.87,460569.2701 298876.0401,460569.6799 298876.5001,460574.4101 298881.8801,460575.11 298882.6799,460577.39 298885.2699,460579.3501 298887.4899,460580.64 298888.9599,460584.3899 298893.2199,460586.0 298895.05,460587.2988 298897.2513,460587.2072 298897.5769,460600.1092 298914.1623,460600.3999 298914.55,460600.7001 298915.0001,460600.9499 298915.45,460602.49 298918.0102,460603.28 298919.9801,460604.1001 298922.0701,460604.2906 298922.694,460580.6593 298927.8213,460579.7701 298928.6899,460578.07 298929.7801,460576.1901 298930.5201,460574.1 298931.0501,460574.0699 298930.9401,460562.2299 298934.1799,460565.6599 298944.2799,460566.5 298946.7501,460536.6401 298956.9299,460531.7499 298958.6,460537.8501 298976.5,460553.3501 298971.2199,460559.1999 298991.65,460560.6546 298991.2391,460564.506 299005.7566,460571.86 299032.48,460572.4107 299032.3123,460573.9904 299037.1607,460573.4001 299037.2999,460575.15 299044.8501,460581.5501 299043.3199,460582.1001 299046.05,460582.11 299046.2,460582.42 299052.0,460582.4501 299052.6499,460587.7001 299051.8501,460598.7299 299050.1801,460605.13 299049.23,460606.6368 299049.0033,460622.1672 299046.697,460628.25 299045.7999,460629.4099 299045.63,460629.6001 299045.5999,460628.9699 299041.4499,460628.5699 299038.82,460627.8001 299033.6999,460626.3999 299024.4502,460626.21 299021.9499,460625.3801 299017.63,460624.9001 299013.0,460624.2201 299008.32,460623.1001 299000.0,460621.7611 298991.6366,460635.6512 298987.2051,460635.7001 298987.6601,460643.5201 298986.82,460643.7899 298986.82,460644.6499 298986.8,460648.54 298986.4399,460653.8401 298985.9599,460659.5999 298985.4299,460659.0277 298972.4305,460680.009 298970.6778,460702.0499 298969.5001,460702.2499 298971.1999,460707.2501 298970.9501,460713.1421 298971.5216,460724.6959 298971.7746,460724.7501 298972.2001,460738.14 298973.5201,460742.6799 298973.9699,460762.1 298975.6501,460771.5001 298982.1501,460777.45 298979.5202,460780.4001 298978.15,460785.9 298988.95,460787.951 298992.6378,460791.0352 298993.5755,460794.0598 298999.1901,460794.5499 299000.0,460795.52 299002.15,460796.1499 299003.7601,460796.88 299005.9199,460797.1501 299007.2998,460799.625 299020.2517,460800.6324 299022.9088)))"

In [18]:
issue_log = IssueLog(dataset='conservation-area', resource='f8aaf696417923bac77e7c2d7fedeff24a9c4be81cf33e4e7a800fc8d70c2c0c')
fixed_geometry = shapely.wkt.loads(NewWktDataType().normalise(value=raw_wkt, issues=issue_log))
fixed_geometry.is_valid

True

We can see iterating fixes the geometry

### Creating test data

We want to use this as a test case for unit/integration testing the new pipline. in order to do that we can used a simplified version of the shape.

In [19]:
raw_wkt = "MULTIPOLYGON (((460800.6324 299022.9088,460807.3001 299021.65,460807.2798 299021.5298,460808.3501 299021.2899,460815.4001 299019.7001,460815.3601 299019.5099,460818.95 299018.4,460820.9601 299017.5501,460822.62 299016.5899,460822.5401 299015.6899,460822.4799 299014.7201,460821.5102 299012.4101,460820.25 299010.7099,460819.3399 299009.6099,460819.19 299009.33,460819.0999 299009.1499,460819.02 299008.9699,460818.8799 299008.5901,460818.82 299008.3999,460818.7801 299008.21,460817.1801 299001.8,460816.67 299000.0,460815.6366 298996.3699,460815.2191 298990.989,460815.7301 298990.8799,460814.7699 298983.8499,460814.5701 298983.88,460814.1301 298980.78,460812.9201 298980.9698,460811.97 298974.4699,460809.64 298959.94,460810.0156 298959.8867,460808.3272 298950.0235,460790.1299 298951.9199,460789.5301 298951.9801,460789.5275 298951.957,460782.9299 298952.28,460778.2499 298952.7701,460778.24 298953.16,460776.5599 298953.33,460748.35 298949.29,460743.9001 298948.6499,460743.7001 298948.5501,460743.1 298948.0999,460742.6501 298947.5001,460742.55 298947.25,460742.4 298946.9501,460742.3499 298946.6499,460742.3002 298946.4001,460742.2501 298946.0999,460742.2501 298945.45,460742.07 298944.2701,460740.82 298936.1499,460740.1499 298931.7501,460740.2789 298931.7345,460741.3101 298929.2726,460745.8101 298928.7001,460796.2998 298922.2499,460824.6502 298918.4999,460825.0499 298918.4001,460825.8 298918.2999,460826.3801 298918.26,460826.4999 298918.2501,460827.25 298918.2501,460828.0 298918.2999,460828.7501 298918.4001,460830.15 298918.7,460830.8499 298918.9,460831.5499 298919.1999,460832.2001 298919.5001,460832.4499 298919.6,460833.1001 298919.9499,460833.75 298920.4001,460834.9498 298921.3001,460835.4999 298921.8501,460836.5 298922.9501,460836.85 298923.4,460837.6501 298924.7,460838.0001 298925.3499,460838.3 298926.0499,460838.5501 298926.7502,460838.7622 298927.3867,460841.99 298927.3604,460840.5001 298913.9499,460837.75 298860.6001,460839.2999 298832.75,460844.4 298824.7001,460846.3499 298799.6001,460847.5929 298787.7912,460848.2237 298782.9764,460851.0001 298762.3,460851.5999 298758.7998,460856.0999 298733.6998,460868.7702 298658.5451,460829.9376 298635.8588,460833.4047 298632.2097,460839.24 298622.8899,460839.4901 298623.0499,460839.8201 298622.5598,460840.8501 298621.0601,460841.37 298619.8599,460842.6301 298613.7701,460843.81 298606.4401,460843.9701 298605.4799,460837.53 298602.4899,460837.6099 298591.9698,460837.31 298591.9698,460837.3499 298578.3799,460837.6872 298576.4826,460825.3999 298578.3501,460801.8501 298580.3,460801.5499 298580.2499,460801.1501 298580.1999,460800.8001 298580.1501,460800.4001 298580.1999,460800.0 298580.1999,460799.65 298580.3,460799.2499 298580.3999,460798.9 298580.5001,460798.2 298580.8,460797.85 298581.0,460797.5501 298581.2,460797.2499 298581.4501,460797.0001 298581.7,460796.2501 298582.5999,460796.1848 298582.7527,460788.1305 298585.3202,460787.8499 298585.0999,460786.3501 298584.1,460785.6001 298583.7,460784.3 298582.9499,460781.6 298581.75,460781.4501 298581.7,460779.6 298581.0501,460777.6998 298580.45,460775.7499 298579.95,460773.8501 298579.4,460767.2001 298577.6501,460764.8999 298577.3002,460762.6501 298577.05,460761.4001 298576.9499,460757.4999 298576.65,460753.8999 298576.5501,460750.9 298576.65,460747.9499 298576.9001,460746.2999 298577.1001,460738.25 298577.8499,460725.7192 298581.2171,460716.3484 298583.7402,460689.5 298591.2499,460660.95 298599.5499,460632.8 298607.6001,460621.2199 298610.9601,460620.08 298611.2901,460616.59 298612.3001,460599.0 298617.3999,460568.3 298626.15,460562.2273 298627.8628,460562.7514 298630.0835,460562.2001 298630.2499,460565.3699 298641.6401,460566.98 298647.43,460568.7502 298652.5,460571.11 298659.2599,460576.8098 298675.58,460576.8631 298675.7324,460578.2982 298680.04,460576.0817 298680.7327,460574.3502 298680.9501,460574.38 298681.1901,460572.5201 298681.7401,460572.3 298681.8,460572.1999 298681.6999,460571.9501 298681.4,460571.8001 298681.3499,460571.6001 298681.25,460571.0501 298681.25,460570.9001 298681.3001,460570.7001 298681.4,460559.0499 298685.2,460537.95 298692.6,460537.95 298692.5902,460537.6501 298691.2501,460533.7093 298693.1002,460529.928 298694.8764,460499.9999 298708.9299,460483.25 298716.7001,460468.8 298723.65,460468.5001 298723.85,460467.9 298724.3499,460467.6001 298724.6501,460467.35 298724.95,460467.0999 298725.3,460466.4668 298726.4698,460465.1357 298729.3932,460465.0501 298729.65,460464.8501 298730.8499,460464.8 298731.45,460464.8 298732.6499,460464.8501 298732.7501,460464.9499 298733.5499,460465.1499 298734.35,460465.4502 298735.1499,460465.75 298735.8999,460466.1501 298736.7,460466.4472 298737.2196,460462.3 298744.8,460460.0759 298748.4681,460456.6601 298752.11,460456.4699 298752.3201,460455.6501 298753.19,460449.1501 298760.0999,460449.45 298760.4001,460446.5699 298763.6,460440.7001 298769.2501,460434.9698 298771.88,460430.4699 298774.0899,460425.8501 298776.4,460418.8001 298761.5001,460413.6201 298763.84,460409.8299 298765.5601,460405.5699 298767.4799,460400.9799 298769.5602,460397.3501 298771.2001,460397.19 298771.2701,460395.5 298772.05,460394.9545 298772.301,460392.5268 298773.4188,460389.46 298774.8301,460380.3 298779.0499,460380.1599 298779.1199,460379.3601 298779.5,460377.1901 298780.5402,460374.0999 298774.05,460369.0401 298778.4599,460368.75 298779.0499,460359.4686 298781.2268,460355.9152 298782.059,460354.1501 298782.4701,460352.7901 298782.79,460351.7899 298780.3401,460350.6801 298777.72,460349.4798 298774.8799,460349.4 298774.6999,460343.3701 298761.2101,460342.51 298759.27,460339.3 298753.2499,460333.9999 298744.1,460335.5501 298743.35,460332.9499 298737.8999,460331.8499 298735.6,460332.4288 298735.3223,460327.6314 298726.1886,460326.7324 298726.6213,460316.9266 298731.3933,460311.2699 298731.127,460311.9181 298731.223,460304.7879 298735.6593,460304.7854 298735.6545,460294.8531 298740.3254,460293.9499 298740.9001,460290.4999 298742.1501,460289.85 298742.3999,460286.8999 298743.4999,460283.9501 298744.45,460281.9 298745.0501,460278.8 298745.7998,460272.85 298747.0001,460269.4 298747.7,460264.9999 298748.3499,460256.8999 298749.0999,460247.8413 298749.6477,460225.6683 298747.5139,460225.6499 298747.5,460225.2999 298747.2001,460224.65 298746.5499,460218.7939 298751.0771,460218.7701 298751.2001,460217.219 298750.8365,460213.5248 298758.641,460212.0881 298762.6566,460209.7721 298769.1293,460203.7 298786.0999,460202.7801 298790.76,460201.0701 298799.3801,460199.4099 298807.9099,460201.5 298816.9999,460201.6001 298817.4501,460203.0701 298824.6199,460204.8999 298833.4899,460206.4 298840.7999,460206.4999 298841.29,460208.3899 298850.3901,460210.09 298858.5801,460211.8 298866.8499,460212.6499 298870.9498,460213.2801 298874.0699,460215.6 298885.5,460219.0748 298899.3426,460221.29 298907.35,460222.7901 298912.75,460223.7 298916.0501,460232.4501 298946.7998,460238.46 298946.2701,460238.6901 298946.2501,460241.27 298946.0301,460244.4201 298945.7499,460274.3 298942.4999,460281.2498 298976.0001,460281.2901 298976.19,460281.96 298976.0901,460297.0899 298973.7699,460298.0001 298973.6301,460306.3498 298972.35,460299.4501 298933.6499,460296.9799 298919.6799,460305.679 298918.0209,460313.9835 298916.4318,460321.0899 298915.0699,460327.75 298913.7999,460330.7499 298913.2201,460340.1001 298911.43,460349.6599 298909.59,460359.0 298907.7999,460358.9699 298907.61,460363.3101 298906.6799,460363.9001 298906.5499,460368.8499 298905.4901,460371.8799 298904.8399,460373.3999 298904.5099,460378.2001 298903.4799,460392.4701 298900.4202,460399.7299 298898.9499,460407.84 298894.61,460408.0829 298895.7465,460415.9482 298888.5243,460418.6 298887.7499,460418.5924 298887.7213,460427.4595 298883.6389,460427.8399 298884.53,460428.0999 298884.4,460443.6899 298877.0301,460446.7199 298875.6001,460447.6899 298875.1401,460451.25 298873.4501,460453.8 298872.2499,460455.62 298871.3899,460458.02 298870.2499,460459.9399 298869.3401,460463.97 298867.4301,460467.9501 298865.5499,460468.1501 298865.45,460469.2499 298864.9999,460472.6 298863.8,460473.5301 298863.47,460474.3002 298863.1999,460478.68 298861.3799,460483.5499 298859.3501,460484.0001 298859.2001,460484.7501 298859.1,460486.7999 298862.8001,460499.15 298884.5499,460499.9999 298884.1499,460506.2302 298880.71,460507.7201 298879.8801,460515.07 298875.8201,460517.88 298874.2699,460521.9701 298872.0099,460522.6301 298871.6501,460523.7799 298871.0101,460550.5299 298856.2301,460551.4001 298855.7501,460553.0701 298857.6401,460556.3601 298861.3901,460557.85 298863.08,460558.44 298863.7499,460560.81 298866.44,460562.9999 298868.92,460566.76 298873.1898,460568.2401 298874.87,460569.2701 298876.0401,460569.6799 298876.5001,460574.4101 298881.8801,460575.11 298882.6799,460577.39 298885.2699,460579.3501 298887.4899,460580.64 298888.9599,460584.3899 298893.2199,460586.0 298895.05,460587.2988 298897.2513,460587.2072 298897.5769,460600.1092 298914.1623,460600.3999 298914.55,460600.7001 298915.0001,460600.9499 298915.45,460602.49 298918.0102,460603.28 298919.9801,460604.1001 298922.0701,460604.2906 298922.694,460580.6593 298927.8213,460579.7701 298928.6899,460578.07 298929.7801,460576.1901 298930.5201,460574.1 298931.0501,460574.0699 298930.9401,460562.2299 298934.1799,460565.6599 298944.2799,460566.5 298946.7501,460536.6401 298956.9299,460531.7499 298958.6,460537.8501 298976.5,460553.3501 298971.2199,460559.1999 298991.65,460560.6546 298991.2391,460564.506 299005.7566,460571.86 299032.48,460572.4107 299032.3123,460573.9904 299037.1607,460573.4001 299037.2999,460575.15 299044.8501,460581.5501 299043.3199,460582.1001 299046.05,460582.11 299046.2,460582.42 299052.0,460582.4501 299052.6499,460587.7001 299051.8501,460598.7299 299050.1801,460605.13 299049.23,460606.6368 299049.0033,460622.1672 299046.697,460628.25 299045.7999,460629.4099 299045.63,460629.6001 299045.5999,460628.9699 299041.4499,460628.5699 299038.82,460627.8001 299033.6999,460626.3999 299024.4502,460626.21 299021.9499,460625.3801 299017.63,460624.9001 299013.0,460624.2201 299008.32,460623.1001 299000.0,460621.7611 298991.6366,460635.6512 298987.2051,460635.7001 298987.6601,460643.5201 298986.82,460643.7899 298986.82,460644.6499 298986.8,460648.54 298986.4399,460653.8401 298985.9599,460659.5999 298985.4299,460659.0277 298972.4305,460680.009 298970.6778,460702.0499 298969.5001,460702.2499 298971.1999,460707.2501 298970.9501,460713.1421 298971.5216,460724.6959 298971.7746,460724.7501 298972.2001,460738.14 298973.5201,460742.6799 298973.9699,460762.1 298975.6501,460771.5001 298982.1501,460777.45 298979.5202,460780.4001 298978.15,460785.9 298988.95,460787.951 298992.6378,460791.0352 298993.5755,460794.0598 298999.1901,460794.5499 299000.0,460795.52 299002.15,460796.1499 299003.7601,460796.88 299005.9199,460797.1501 299007.2998,460799.625 299020.2517,460800.6324 299022.9088)))"

In [20]:
geom_wkt = shapely.wkt.loads(raw_wkt)
geom_points = MultiPoint(list(geom_wkt.geoms[0].exterior.coords))
# points2.geometry = points2.geometry.apply(lambda x: MultiPoint(list(x.exterior.coords)))

In [21]:
plot_map(geom_wkt,geom_points)

In [22]:
alt_wkt = "MULTIPOLYGON (((460316.9266 298735.6545,460316.9266 298731.3933,460311.2699 298731.127,460311.9181 298731.223,460304.7879 298735.6593,460304.7854 298735.6545,460316.9266 298735.6545)))"

In [23]:
geom_alt_wkt = shapely.wkt.loads(alt_wkt)
geom_alt_points = MultiPoint(list(geom_alt_wkt.geoms[0].exterior.coords))
geom_alt_points = [point for point in geom_alt_points.geoms]
plot_map(geom_alt_wkt,geom_alt_points)

I've manually cut out the segment causing the issue, we'll just run the old pipeline to check it's still causing the issue

In [24]:
issue_log = IssueLog(dataset='conservation-area', resource='f8aaf696417923bac77e7c2d7fedeff24a9c4be81cf33e4e7a800fc8d70c2c0c')
processed_alt_wkt = shapely.wkt.loads(OldWktDataType().normalise(value=alt_wkt, issues=issue_log))
processed_alt_wkt.is_valid

False