# Import Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sqlite3
import mapbox_vector_tile
from pyproj import Proj, transform
from time import time
import operator
from collections import Counter

from tensorflow.keras import Sequential
from tensorflow.keras import layers

from shapely import geometry 
from simplification.cutil import (
    simplify_coords,
    simplify_coords_idx,
    simplify_coords_vw,
    simplify_coords_vw_idx,
    simplify_coords_vwp,
)

# Define Functions

In [3]:
def create_connection(db_file):
    """ create a database connection to a SQLite database """
    try:
        conn = sqlite3.connect(db_file)
        print(conn)
    except Error as e:
        print(e)
    
    return conn

def PolyArea(x,y):
    return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))

def ScoreFormula(old_number_of_datapoints, new_number_of_datapoints, processing_time):
    return (1 - (new_number_of_datapoints / old_number_of_datapoints)) * (1 - processing_time)


def ScaleFactor(all_geometries):
    b_list = []
    
    for geometries in all_geometries:
        
        polygon = geometry.Polygon(geometries)
        centroid = np.array(polygon.centroid)
        coordinates = np.vstack(geometries)
        
        b = coordinates - centroid
        b_min = np.min(b)
        b_max = np.max(b)
        b_list.append(b_min)
        b_list.append(b_max)
        
    return np.std(b_list)
    
def Normalize_Geometry(coordinates1, scale_factor):
    polygon = geometry.Polygon(coordinates1)
    centroid = np.array(polygon.centroid)
    coordinates2 = np.vstack(coordinates1)
    
    return (coordinates2 - centroid) / scale_factor

def Add_One_Hot(normalized_geometry):
    normalized_geometry = np.insert(normalized_geometry, 2, 1, axis=1)
    normalized_geometry = np.insert(normalized_geometry, 3, 0, axis=1)
    normalized_geometry = np.insert(normalized_geometry, 4, 0, axis=1)
    normalized_geometry[len(normalized_geometry)-1,2] = 0
    normalized_geometry[len(normalized_geometry)-1,4] = 1
    
    return normalized_geometry

def Add_Zero_Padding(one_hotted_geometry, max_length):
    boundary = max_length - len(one_hotted_geometry)
    zero_matrix = np.zeros([boundary,5])
    return np.append(one_hotted_geometry, zero_matrix, axis=0)

# Load Data

In [10]:
data = pd.read_csv('/Users/davemeijdam/Documents/Data Science/Master/Master Thesis/Data/Sample data/ONTWIKKELBUURTEN.csv', sep =';')

conn_pand_centrum = create_connection("/Users/davemeijdam/Documents/Data Science/Master/Master Thesis/Data/SQLite/Pand_26116_centrum.db")

cur = conn_pand_centrum.cursor()
cur.execute("SELECT data FROM tiles;")

rows = cur.fetchall()
pand_centrum_data = []
for row in rows:
    pand_centrum_data.append(mapbox_vector_tile.decode(row[0]))
    #print(row[0])
print(pand_centrum_data[0:3])
print(len(pand_centrum_data))

<sqlite3.Connection object at 0x7febe203e810>
[{'bag_pand.se_fld12_geometrie': {'extent': 4096, 'version': 2, 'features': [{'geometry': {'type': 'Polygon', 'coordinates': [[[243, 2760], [242, 2760], [242, 2761], [243, 2760]]]}, 'properties': {}, 'id': 0, 'type': 3}, {'geometry': {'type': 'MultiPolygon', 'coordinates': []}, 'properties': {}, 'id': 1, 'type': 3}, {'geometry': {'type': 'Polygon', 'coordinates': [[[243, 2759], [243, 2760], [244, 2760], [244, 2759], [243, 2759]]]}, 'properties': {}, 'id': 2, 'type': 3}, {'geometry': {'type': 'Polygon', 'coordinates': [[[244, 2759], [243, 2759], [243, 2760], [244, 2760], [244, 2759]]]}, 'properties': {}, 'id': 3, 'type': 3}, {'geometry': {'type': 'Polygon', 'coordinates': [[[243, 2759], [242, 2759], [242, 2760], [243, 2760], [243, 2759]]]}, 'properties': {}, 'id': 4, 'type': 3}, {'geometry': {'type': 'Polygon', 'coordinates': [[[241, 2759], [241, 2760], [242, 2759], [241, 2759]]]}, 'properties': {}, 'id': 5, 'type': 3}]}}, {'bag_pand.se_fld1

# Pre Processing

In [5]:
simplify_possibilities = [['D-P', 0], ['D-P', 0.005], ['D-P', 0.001], ['D-P', 0.0005], ['D-P', 0.0001],
                         ['D-P', 0.00005], ['D-P', 0.00001], ['V-W', 0.000005],
                         ['V-W', 0.000001], ['V-W', 0.0000005], ['V-W', 0.0000001], ['V-W', 0.00000005], 
                         ['V-W', 0.00000001]]

MAX_LENGTH_DEFICIT = -0.1

In [1]:
Lines = []
Polygons = []
MultiPolygons = []
a=0
for row in pand_centrum_data[0:10]:
    print(str(a) + " / " + str(len(pand_centrum_data)), end="\r")
    a = a + 1
    keys = row.keys()
    
    for key in keys:
        for element in row[key]['features']:
            
            if element['geometry']['type'] == 'LineString': 
                Lines.append(element['geometry']['coordinates'])
            
            if element['geometry']['type'] == 'Polygon':
                Polygons.append(element['geometry']['coordinates'][0])
                
            #if element['geometry']['type'] == 'MultiPolygon':
                #MultiPolygons.append(element['geometry']['coordinates'])
    
    

#test = lvl10_data[0]['spoor.se_fld12_lijngeometrie2d']['features'][0]['geometry']['coordinates']
#print(Polygons)

NameError: name 'Proj' is not defined

In [8]:
test = geometry.Polygon(pand_centrum_data[0]['bag_pand.se_fld12_geometrie']['features'][0]['geometry']['coordinates'][0])
#pand_centrum_data[0]['bag_pand.se_fld12_geometrie']['features'][0]['geometry']['coordinates'][0]
test.length


AttributeError: 'list' object has no attribute '_geom'

In [43]:
results_list = []
length_list = []
scale_factor = ScaleFactor(Polygons)
print("Scale Factor done")


# Decide order from longest polygon to smallest polygon
for row in Polygons:

    length_list.append([row, len(row)])

length_list.sort(key=operator.itemgetter(1), reverse=True)
print("Sorted the Polygons")
a=0
for element in length_list[:20000]:
    print(str(a) + " / " + str(len(length_list)), end="\r")
    a = a + 1
    results_dict = {}
    poly1 = geometry.Polygon(element[0])
    results = []
    for possibility in simplify_possibilities:

        if possibility[0] == 'D-P':
            # Simplification function Douglas-Peucker
            time_start = time()
            simplified_coordinates = simplify_coords(element[0], possibility[1])
            time_end = time()
            process_time = time_end - time_start

        if possibility[0] == 'V-W':
            # Simplification function Visvalingam-Whyatt
            time_start = time()
            simplified_coordinates = simplify_coords_vw(element[0], possibility[1])
            time_end = time()
            process_time = time_end - time_start
        
        if len(simplified_coordinates) >= 3:
            poly2 = geometry.Polygon(simplified_coordinates)
            length_deficit = (poly2.length - poly1.length) / poly1.length
        
            # If the length deficit of the polygon is smaller(greater) than the provided MAX_LENGTH_DEFICIT, 
            # the score gets saved
            if length_deficit > MAX_LENGTH_DEFICIT:
                score = ScoreFormula(len(element[0]), len(simplified_coordinates), process_time)
                results.append(score)
    
    results_dict['polygon'] = Add_Zero_Padding(Add_One_Hot(Normalize_Geometry(element[0], scale_factor)), len(length_list[0][0]))
    results_dict['algorithm'] = results.index(max(results))
    results_list.append(results_dict)

X = []
y = []
a = 0
for element in results_list:
    print(str(a) + " / " + str(len(length_list)), end="\r")
    a = a + 1
    
    X.append(element['polygon'])
    y.append(element['algorithm'])
X = np.array(X)
y = np.array(y)
# Calculate the deficit in the number of 
#point_deficit = len(coordinates) - len(simplified_coordinates)
#print('Point Deficit: ' + str(point_deficit) + ' out of ' + str(len(coordinates)))

#old_area = PolyArea(old_xs,old_ys)
#new_area = PolyArea(new_xs,new_ys)
#area_deficit_percentage = (new_area - old_area) / old_area
#print(area_deficit_percentage)

Scale Factor done
Sorted the Polygons
19999 / 429768

# Data Stats

In [44]:
pd.DataFrame({'keys':Counter(y).keys(),
              'freq':Counter(y).values()})

Unnamed: 0,keys,freq
0,7,53
1,8,211
2,11,1169
3,12,1853
4,9,396
5,0,15181
6,10,750
7,2,51
8,4,63
9,6,89


In [None]:
# Select index of simplification possibility
INDEX = 6


possibility = simplify_possibilities[INDEX]

if possibility[0] == 'D-P':
    # Simplification function Douglas-Peucker
    simplified_coordinates = simplify_coords(coordinates, possibility[1])

if possibility[0] == 'V-W':
    # Simplification function Visvalingam-Whyatt
    simplified_coordinates = simplify_coords_vw(coordinates, possibility[1])

old_xs, old_ys = zip(*coordinates)
new_xs, new_ys = zip(*simplified_coordinates)

print(len(simplified_coordinates))
print(len(coordinates))

# Plotting

In [None]:
plt.figure()
plt.plot(old_xs, old_ys)
plt.plot(new_xs, new_ys)
plt.show()

# Keras

In [45]:
input_shape = X[0].shape
print(input_shape)
model = Sequential()
model.add(layers.Conv1D(32, 5, activation='relu', input_shape=input_shape))
model.add(layers.MaxPooling1D(3,3))

model.add(layers.Conv1D(64, 5, activation='relu'))
model.add(layers.GlobalAveragePooling1D())
model.add(layers.Dense(len(simplify_possibilities), activation='softmax'))

print(model.summary())

(200, 5)
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 196, 32)           832       
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 65, 32)            0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 61, 64)            10304     
_________________________________________________________________
global_average_pooling1d (Gl (None, 64)                0         
_________________________________________________________________
dense (Dense)                (None, 13)                845       
Total params: 11,981
Trainable params: 11,981
Non-trainable params: 0
_________________________________________________________

In [46]:
model.compile(loss='sparse_categorical_crossentropy',
                optimizer='adam', metrics=['accuracy'])

BATCH_SIZE = 400
EPOCHS = 50

history = model.fit(X,
                    y,
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_split=0.2,
                    verbose=1)

Train on 16000 samples, validate on 4000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50

KeyboardInterrupt: 

In [47]:
y

array([7, 8, 8, ..., 0, 0, 0])

# PyTorch