# Import Libraries

In [1]:
import sys
sys.path.append('..')
for p in sys.path:
    print(p)

/Users/davemeijdam/Documents/Data Science/Master/Master Thesis/Scripts/map-generalization-thesis-2021/notebooks
/opt/anaconda3/envs/infoviz/lib/python37.zip
/opt/anaconda3/envs/infoviz/lib/python3.7
/opt/anaconda3/envs/infoviz/lib/python3.7/lib-dynload

/opt/anaconda3/envs/infoviz/lib/python3.7/site-packages
/opt/anaconda3/envs/infoviz/lib/python3.7/site-packages/IPython/extensions
/Users/davemeijdam/.ipython
..


In [54]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sqlite3
#import mapbox_vector_tile
from time import time
import operator
from collections import Counter
import json
import os
import math
import pickle
import random
import torch
from sklearn.utils import resample

from tensorflow.keras import Sequential
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam

import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
from torchvision import datasets, transforms

from shapely import geometry 
from PIL import Image, ImageDraw
from simplification.cutil import (
    simplify_coords,
    simplify_coords_idx,
    simplify_coords_vw,
    simplify_coords_vw_idx,
    simplify_coords_vwp,
)

# Define Functions

In [6]:
def create_connection(db_file):
    """ create a database connection to a SQLite database """
    try:
        conn = sqlite3.connect(db_file)
        print(conn)
    except Error as e:
        print(e)
    
    return conn

def PolyArea(x,y):
    return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))

def ScoreFormula(old_number_of_datapoints, new_number_of_datapoints, processing_time):
    return (1 - (new_number_of_datapoints / old_number_of_datapoints)) * (1 - processing_time)


def ScaleFactor(all_geometries):
    b_list = []
    
    for geometries in all_geometries:
        
        polygon = geometry.Polygon(geometries)
        centroid = np.array(polygon.centroid)
        coordinates = np.vstack(geometries)
        
        b = coordinates - centroid
        b_min = np.min(b)
        b_max = np.max(b)
        b_list.append(b_min)
        b_list.append(b_max)
        
    return np.std(b_list)
    
def Normalize_Geometry(coordinates1, scale_factor):
    polygon = geometry.Polygon(coordinates1)
    centroid = np.array(polygon.centroid)
    coordinates2 = np.vstack(coordinates1)
    
    return (coordinates2 - centroid) / scale_factor

def Add_One_Hot(normalized_geometry):
    normalized_geometry = np.insert(normalized_geometry, 2, 1, axis=1)
    normalized_geometry = np.insert(normalized_geometry, 3, 0, axis=1)
    normalized_geometry = np.insert(normalized_geometry, 4, 0, axis=1)
    normalized_geometry[len(normalized_geometry)-1,2] = 0
    normalized_geometry[len(normalized_geometry)-1,4] = 1
    
    return normalized_geometry

def Add_Zero_Padding(one_hotted_geometry, max_length):
    boundary = max_length - len(one_hotted_geometry)
    zero_matrix = np.zeros([boundary,5])
    return np.append(one_hotted_geometry, zero_matrix, axis=0)

def CreateGrid(poly, dx, dy):
    
    x_ls = []
    y_ls = []

    for a in poly:
        x_ls.append(a[0])
    for a in poly:
        y_ls.append(a[1])
        
    minx = min(x_ls)
    maxx = max(x_ls)
    miny = min(y_ls)
    maxy = max(y_ls)

    nx = int(math.ceil(abs(maxx - minx)/dx))
    ny = int(math.ceil(abs(maxy - miny)/dy))

    grid = []       
    for i in range(ny):   
        grid.append(geometry.LineString([[minx,max(maxy-dy*i,miny)], [maxx, max(maxy-dy*i,miny)]]))

    for j in range(nx):
        grid.append(geometry.LineString([[min(minx+dx*j,maxx), maxy], [min(minx+dx*j,maxx), miny]]))
    
    return grid
    
def CheckSameIntersections(poly, simplified_coords, grid, ROUNDING):
    
    original = geometry.Polygon(poly)
    simplified = geometry.Polygon(simplified_coords)

    o_ls = []
    s_ls = []
    for line in grid:
        x = original.intersection(line)
        y = simplified.intersection(line)
        if x:
            if x.geom_type == 'Point':
                o_ls.append(hash(tuple([round(x.coords[0][0],ROUNDING), round(x.coords[0][1],ROUNDING)])))
            if x.geom_type == 'LineString':
                for xy in x.coords:
                    o_ls.append(hash(tuple([round(xy[0],ROUNDING), round(xy[1],ROUNDING)])))
    
        if y:
            if y.geom_type == 'Point':
                s_ls.append(hash(tuple([round(y.coords[0][0],ROUNDING), round(y.coords[0][1],ROUNDING)])))
            if y.geom_type == 'LineString':
                for xy in y.coords:
                    s_ls.append(hash(tuple([round(xy[0],ROUNDING), round(xy[1],ROUNDING)])))
        
    return len(list(set(o_ls).intersection(s_ls))) / len(set(o_ls))

    
def alter_by_zoom(poly, zoom):

    mpp = {
    '0' : 156543,
    '1' : 78271.5,
    '2' : 39135.8,
    '3' : 19567.88,
    '4' : 9783.94,
    '5' : 4891.97,
    '6' : 2445.98,
    '7' : 1222.99,
    '8' : 611.5,
    '9' : 305.75,
    '10' : 152.87,
    '11' : 76.44,
    '12' : 38.219,
    '13' : 19.109,
    '14' : 9.555,
    '15' : 4.777,
    '16' : 2.3887,
    '17' : 1.1943,
    '18' : 0.5972,
    '19' : 0.2986,
    '20' : 0.14929,
    '21' : 0.074646,
    '22' : 0.037323
    }
    return (np.array(poly) / mpp[str(zoom)]).tolist()


def check_pixel_similarity(original_coords, simplified_coords, zoom):
    
    poly1 = alter_by_zoom(original_coords, zoom)
    poly2 = alter_by_zoom(simplified_coords, zoom)

    x = []
    y = []
    for a in poly1:
        x.append(a[0])
        y.append(a[1])
    
    for a in poly1:
        a[0] = a[0] - min(x)
        a[1] = a[1] - min(y)
    
    for a in poly2:
        a[0] = a[0] - min(x)
        a[1] = a[1] - min(y)
    
    width = int(max(x) - min(x))
    height = int(max(y) - min(y))

    poly1 = [tuple(x) for x in poly1]
    poly2 = [tuple(x) for x in poly2]

    img1 = Image.new('L', (width, height), 0)
    ImageDraw.Draw(img1).polygon(poly1, outline=1, fill=0)
    mask1 = np.array(img1)
    
    img2 = Image.new('L', (width, height), 0)
    ImageDraw.Draw(img2).polygon(poly2, outline=1, fill=0)
    mask2 = np.array(img2)
    
    return np.sum(mask1 == mask2) / (width*height)
    #return mask1

#check_pixel_similarity(Polygons[2], Polygons[2], 17)

In [None]:
geometry.Polygon(Polygons[2])

# Load Data

## Pand Centrum

In [None]:
conn_pand_centrum = create_connection("/Users/davemeijdam/Documents/Data Science/Master/Master Thesis/Data/SQLite/Pand_26116_centrum.db")

cur = conn_pand_centrum.cursor()
cur.execute("SELECT data FROM tiles;")

rows = cur.fetchall()
pand_centrum_data = []
for row in rows:
    pand_centrum_data.append(mapbox_vector_tile.decode(row[0]))
    #print(row[0])
print(len(pand_centrum_data))

## Wegdeel Buiten

In [None]:
conn_wegdeel_buiten = create_connection("/Users/davemeijdam/Documents/Data Science/Master/Master Thesis/Data/SQLite/Wegdeel_23770_buitengebied.db")

cur = conn_wegdeel_buiten.cursor()
cur.execute("SELECT data FROM tiles;")

rows = cur.fetchall()
wegdeel_buiten_data = []
for row in rows:
    wegdeel_buiten_data.append(mapbox_vector_tile.decode(row[0]))

In [7]:
path = '/Users/davemeijdam/Documents/Data Science/Master/Master Thesis/Data/Sample_data_03_05/'
Polygons = []
Types = []

for filename in os.listdir(path):
    if "geometrie." in filename:
        print(filename)
        
        f = open(str(path + filename))
        jsondata = json.load(f)
        
        

        for a in jsondata['features']:
            if len(a['geometry']['coordinates']) == 1:
                Polygons.append(a['geometry']['coordinates'][0])
                Types.append(a['geometry']['type'])
            if a['geometry']['type'] == 'LineString':
                Polygons.append(a['geometry']['coordinates'])
                Types.append(a['geometry']['type'])
            else:
                for b in a['geometry']['coordinates']:
                    Polygons.append(b)
                    Types.append(a['geometry']['type'])
            
geometry_df = pd.DataFrame({'geometry':Polygons,
                            'type':Types})
    
    
    

#f = open('/Users/davemeijdam/Documents/Data Science/Master/Master Thesis/Data/Sample_data_03_05/spoor_export_buitengebied_geometrie.json')
#wegdeeljson = json.load(f)
#wegdeeljson



waterdeel_export_stedelijk_geometrie.json
wegdeel_export_buitengebied_geometrie.json
bag_pand_buitengebeid_export_geometrie.json
wegdeel_export_stedelijk_geometrie.json
spoor_export_stedelijk_geometrie.json
waterdeel_export_buitengebied_geometrie.json
bag_pand_stedelijk_export_geometrie.json
spoor_export_buitengebied_geometrie.json


In [None]:
wegdeeljson['features'][:5]


In [None]:
wegdeeljson['features'][0]['geometry']['type']

In [None]:
import shapely.geometry as sg
import shapely.ops as so
import matplotlib.pyplot as plt

ls = []
#for a in wegdeeljson['features'][:5]:
#    ls.append(geometry.Polygon(a['geometry']['coordinates'][0]))

new_shape = so.cascaded_union(ls)
fig, axs = plt.subplots()
axs.set_aspect('equal', 'datalim')

for geom in new_shape.geoms:    
    xs, ys = geom.exterior.xy    
    axs.fill(xs, ys, alpha=1, fc='r', ec='none')

plt.show()

In [None]:
import shapely.geometry as sg
import shapely.ops as so
import matplotlib.pyplot as plt


ls = []
for element in wegdeel_buiten_data[3]['wegdeel.se_fld15_vlakgeometrie2d']['features']:
    
    #print(element['geometry']['coordinates'][0])
    #geometry.Polygon(element['geometry']['coordinates'][0])
    element2 = element['geometry']
    
    if element2['type'] == 'MultiPolygon':
        if element2['coordinates']:
            for poly in element2['coordinates'][0]:
                print(poly)
                ls.append(geometry.Polygon(poly))
    
    else:
        ls.append(geometry.Polygon(element['geometry']['coordinates'][0]))

#r1 = sg.Polygon([[243, 2760], [242, 2760], [242, 2761], [243, 2760]])
#r2 = sg.Polygon([[243, 2759], [243, 2760], [244, 2760], [244, 2759], [243, 2759]])
#r3 = sg.Polygon([[244, 2759], [243, 2759], [243, 2760], [244, 2760], [244, 2759]])
#r4 = sg.Polygon([[243, 2759], [242, 2759], [242, 2760], [243, 2760], [243, 2759]])
#r5 = sg.Polygon([[241, 2759], [241, 2760], [242, 2759], [241, 2759]])

new_shape = so.cascaded_union(ls)
fig, axs = plt.subplots()
axs.set_aspect('equal', 'datalim')

for geom in new_shape.geoms:    
    xs, ys = geom.exterior.xy    
    axs.fill(xs, ys, alpha=1, fc='r', ec='none')

plt.show()

# Pre Processing

## Parameters

In [8]:
# Simplification Possibilities
simplify_possibilities = [['D-P', 0], ['D-P', 0.5], ['D-P', 0.1], ['D-P', 0.05], ['D-P', 0.01], ['D-P', 0.005], 
                          ['D-P', 0.001], ['V-W', 0.5], ['V-W', 0.1], ['V-W', 0.05], ['V-W', 0.01], 
                          ['V-W', 0.005], ['V-W', 0.001], ['V-W', 0.0005], ['V-W', 0.0001], ['V-W', 0.00005]]

# Polygon length evaluation
MAX_LENGTH_DEFICIT = -0.1

# Grid
dx = 1
dy = 1
ROUNDING = 1

MIN_INTERSECTIONS_PERC = 0.75

In [None]:
Lines = []
Polygons = []
MultiPolygons = []
a=0
for row in pand_centrum_data[:10000]:
    print(str(a) + " / " + str(len(pand_centrum_data)), end="\r")
    a = a + 1
    keys = row.keys()
    
    for key in keys:
        for element in row[key]['features']:
            
            if element['geometry']['type'] == 'LineString': 
                Lines.append(element['geometry']['coordinates'])
            
            if element['geometry']['type'] == 'Polygon':
                Polygons.append(element['geometry']['coordinates'][0])
                
            #if element['geometry']['type'] == 'MultiPolygon':
                #MultiPolygons.append(element['geometry']['coordinates'])
    
    

#test = lvl10_data[0]['spoor.se_fld12_lijngeometrie2d']['features'][0]['geometry']['coordinates']
#print(Polygons)

In [None]:
Lines = []
Polygons = []
MultiPolygons = []
a=0
for row in wegdeel_buiten_data:
    print(str(a) + " / " + str(len(wegdeel_buiten_data)), end="\r")
    a = a + 1
    keys = row.keys()
    
    for key in keys:
        for element in row[key]['features']:
            
            if element['geometry']['type'] == 'LineString': 
                Lines.append(element['geometry']['coordinates'])
            
            if element['geometry']['type'] == 'Polygon':
                Polygons.append(element['geometry']['coordinates'][0])
                
            if element['geometry']['type'] == 'MultiPolygon':
                if element['geometry']['coordinates']:
                    for poly in element['geometry']['coordinates'][0]:
                        MultiPolygons.append(poly)
    
    

#test = lvl10_data[0]['spoor.se_fld12_lijngeometrie2d']['features'][0]['geometry']['coordinates']
#print(Polygons)

In [None]:
#print(len(Lines))
print(len(Polygons))
#print(len(MultiPolygons))

ls = []
for a in Polygons:
    ls.append(len(a))
    
pd.DataFrame({'lengths':Counter(ls).keys(),
              'freq':Counter(ls).values()})

In [9]:
Polygons = list(geometry_df['geometry'][geometry_df['type'] == 'Polygon'])
len(Polygons)

303244

In [10]:
results_list = []
length_list = []
Polygons_sample = random.sample(Polygons, 50000)
scale_factor = ScaleFactor(Polygons_sample)
print("Scale Factor done")


# Decide order from longest polygon to smallest polygon
for row in Polygons_sample:

    length_list.append([row, len(row)])

length_list.sort(key=operator.itemgetter(1), reverse=True)
print("Sorted the Polygons")
a=0
for element in length_list:
    print(str(a) + " / " + str(len(length_list)), end="\r")
    a = a + 1
    results_dict = {}
    poly1 = geometry.Polygon(element[0])
    results = []
    #grid = CreateGrid(element[0], dx, dy)
    
    for possibility in simplify_possibilities:
        

        if possibility[0] == 'D-P':
            # Simplification function Douglas-Peucker
            time_start = time()
            simplified_coordinates = simplify_coords(element[0], possibility[1])
            time_end = time()
            process_time = time_end - time_start

        if possibility[0] == 'V-W':
            # Simplification function Visvalingam-Whyatt
            time_start = time()
            simplified_coordinates = simplify_coords_vw(element[0], possibility[1])
            time_end = time()
            process_time = time_end - time_start
        
        
        if len(simplified_coordinates) >= 3:
            poly2 = geometry.Polygon(simplified_coordinates)
            #length_deficit = (poly2.length - poly1.length) / poly1.length
        
            # If the length deficit of the polygon is smaller(greater) than the provided MAX_LENGTH_DEFICIT, 
            # the score gets saved
            #if length_deficit > MAX_LENGTH_DEFICIT:
            
            #if length_deficit == 0:
            #    score = ScoreFormula(len(element[0]), len(simplified_coordinates), process_time)
            #    results.append(score)
            #    continue
                
            #try:
            #    if CheckSameIntersections(element[0], simplified_coordinates, grid, ROUNDING) > MIN_INTERSECTIONS_PERC:
            #        score = ScoreFormula(len(element[0]), len(simplified_coordinates), process_time)
            #        results.append(score)
            #except Exception:
            #    continue
            
            if np.isnan(check_pixel_similarity(element[0], simplified_coordinates, 17)) == True:
                results.append('Remove')
                break
                
                
            if check_pixel_similarity(element[0], simplified_coordinates, 17) == 1:
                score = ScoreFormula(len(element[0]), len(simplified_coordinates), process_time)
                results.append(score)
                
    
    results_dict['polygon'] = Add_Zero_Padding(Add_One_Hot(Normalize_Geometry(element[0], scale_factor)), len(length_list[0][0]))
    if results[0] == 'Remove':
        results_dict['algorithm'] = len(simplify_possibilities)
        
    else:    
        results_dict['algorithm'] = results.index(max(results))
        
    results_list.append(results_dict)

Scale Factor done
Sorted the Polygons
2673 / 50000



49999 / 50000

In [109]:
# Saving Labels and Normalized Data
pickle.dump( results_list, open( "/Users/davemeijdam/Documents/Data Science/Master/Master Thesis/Scripts/data/temp/results_list.p", "wb" ) )

# Data Balancing

In [24]:
results_list = pickle.load( open( "/Users/davemeijdam/Documents/Data Science/Master/Master Thesis/Scripts/data/temp/results_list.p", "rb" ) )

results_df = pd.DataFrame(results_list)

df_majority = results_df[results_df.algorithm == 0]
df_minority = results_df[results_df.algorithm != 0]

df_majority_downsampled = resample(df_majority,
                                  replace=False,
                                  n_samples=5000,
                                  random_state=123)

df_downsampled = pd.concat([df_majority_downsampled, df_minority])

results_list = df_downsampled.to_dict('records')


X = []
y = []
a = 0
for element in results_list:
    print(str(a) + " / " + str(len(length_list)), end="\r")
    a = a + 1
    
    X.append(element['polygon'])
    y.append(element['algorithm'])
X = np.array(X)
y = np.array(y)

0 / 500001 / 500002 / 500003 / 500004 / 500005 / 500006 / 500007 / 500008 / 500009 / 5000010 / 5000011 / 5000012 / 5000013 / 5000014 / 5000015 / 5000016 / 5000017 / 5000018 / 5000019 / 5000020 / 5000021 / 5000022 / 5000023 / 5000024 / 5000025 / 5000026 / 5000027 / 5000028 / 5000029 / 5000030 / 5000031 / 5000032 / 5000033 / 5000034 / 5000035 / 5000036 / 5000037 / 5000038 / 5000039 / 5000040 / 5000041 / 5000042 / 5000043 / 5000044 / 5000045 / 5000046 / 5000047 / 5000048 / 5000049 / 5000050 / 5000051 / 5000052 / 5000053 / 5000054 / 5000055 / 5000056 / 5000057 / 5000058 / 5000059 / 5000060 / 5000061 / 5000062 / 5000063 / 5000064 / 5000065 / 5000066 / 5000067 / 5000068 / 5000069 / 5000070 / 5000071 / 5000072 / 5000073 / 5000074 / 5000075 / 5000076 / 5000077 / 5000078 / 5000079 / 5000080 / 5000081 / 5000082 / 5000083 / 5000084 / 5000085 / 5000086 / 5000087 / 5000088 / 5000089 / 5000090 / 5000091 / 5000

1457 / 500001458 / 500001459 / 500001460 / 500001461 / 500001462 / 500001463 / 500001464 / 500001465 / 500001466 / 500001467 / 500001468 / 500001469 / 500001470 / 500001471 / 500001472 / 500001473 / 500001474 / 500001475 / 500001476 / 500001477 / 500001478 / 500001479 / 500001480 / 500001481 / 500001482 / 500001483 / 500001484 / 500001485 / 500001486 / 500001487 / 500001488 / 500001489 / 500001490 / 500001491 / 500001492 / 500001493 / 500001494 / 500001495 / 500001496 / 500001497 / 500001498 / 500001499 / 500001500 / 500001501 / 500001502 / 500001503 / 500001504 / 500001505 / 500001506 / 500001507 / 500001508 / 500001509 / 500001510 / 500001511 / 500001512 / 500001513 / 500001514 / 500001515 / 500001516 / 500001517 / 500001518 / 500001519 / 500001520 / 500001521 / 500001522 / 500001523 / 500001524 / 500001525 / 500001526 / 500001527 / 500001528 / 500001529 / 500001530 / 500001531 / 500001532 / 500001533 / 50000

2354 / 500002355 / 500002356 / 500002357 / 500002358 / 500002359 / 500002360 / 500002361 / 500002362 / 500002363 / 500002364 / 500002365 / 500002366 / 500002367 / 500002368 / 500002369 / 500002370 / 500002371 / 500002372 / 500002373 / 500002374 / 500002375 / 500002376 / 500002377 / 500002378 / 500002379 / 500002380 / 500002381 / 500002382 / 500002383 / 500002384 / 500002385 / 500002386 / 500002387 / 500002388 / 500002389 / 500002390 / 500002391 / 500002392 / 500002393 / 500002394 / 500002395 / 500002396 / 500002397 / 500002398 / 500002399 / 500002400 / 500002401 / 500002402 / 500002403 / 500002404 / 500002405 / 500002406 / 500002407 / 500002408 / 500002409 / 500002410 / 500002411 / 500002412 / 500002413 / 500002414 / 500002415 / 500002416 / 500002417 / 500002418 / 500002419 / 500002420 / 500002421 / 500002422 / 500002423 / 500002424 / 500002425 / 500002426 / 500002427 / 500002428 / 500002429 / 500002430 / 5000

3353 / 500003354 / 500003355 / 500003356 / 500003357 / 500003358 / 500003359 / 500003360 / 500003361 / 500003362 / 500003363 / 500003364 / 500003365 / 500003366 / 500003367 / 500003368 / 500003369 / 500003370 / 500003371 / 500003372 / 500003373 / 500003374 / 500003375 / 500003376 / 500003377 / 500003378 / 500003379 / 500003380 / 500003381 / 500003382 / 500003383 / 500003384 / 500003385 / 500003386 / 500003387 / 500003388 / 500003389 / 500003390 / 500003391 / 500003392 / 500003393 / 500003394 / 500003395 / 500003396 / 500003397 / 500003398 / 500003399 / 500003400 / 500003401 / 500003402 / 500003403 / 500003404 / 500003405 / 500003406 / 500003407 / 500003408 / 500003409 / 500003410 / 500003411 / 500003412 / 500003413 / 500003414 / 500003415 / 500003416 / 500003417 / 500003418 / 500003419 / 500003420 / 500003421 / 500003422 / 500003423 / 500003424 / 500003425 / 500003426 / 500003427 / 500003428 / 500003429 / 50000

4353 / 500004354 / 500004355 / 500004356 / 500004357 / 500004358 / 500004359 / 500004360 / 500004361 / 500004362 / 500004363 / 500004364 / 500004365 / 500004366 / 500004367 / 500004368 / 500004369 / 500004370 / 500004371 / 500004372 / 500004373 / 500004374 / 500004375 / 500004376 / 500004377 / 500004378 / 500004379 / 500004380 / 500004381 / 500004382 / 500004383 / 500004384 / 500004385 / 500004386 / 500004387 / 500004388 / 500004389 / 500004390 / 500004391 / 500004392 / 500004393 / 500004394 / 500004395 / 500004396 / 500004397 / 500004398 / 500004399 / 500004400 / 500004401 / 500004402 / 500004403 / 500004404 / 500004405 / 500004406 / 500004407 / 500004408 / 500004409 / 500004410 / 500004411 / 500004412 / 500004413 / 500004414 / 500004415 / 500004416 / 500004417 / 500004418 / 500004419 / 500004420 / 500004421 / 500004422 / 500004423 / 500004424 / 500004425 / 500004426 / 500004427 / 500004428 / 500004429 / 5000

5824 / 500005825 / 500005826 / 500005827 / 500005828 / 500005829 / 500005830 / 500005831 / 500005832 / 500005833 / 500005834 / 500005835 / 500005836 / 500005837 / 500005838 / 500005839 / 500005840 / 500005841 / 500005842 / 500005843 / 500005844 / 500005845 / 500005846 / 500005847 / 500005848 / 500005849 / 500005850 / 500005851 / 500005852 / 500005853 / 500005854 / 500005855 / 500005856 / 500005857 / 500005858 / 500005859 / 500005860 / 500005861 / 500005862 / 500005863 / 500005864 / 500005865 / 500005866 / 500005867 / 500005868 / 500005869 / 500005870 / 500005871 / 500005872 / 500005873 / 500005874 / 500005875 / 500005876 / 500005877 / 500005878 / 500005879 / 500005880 / 500005881 / 500005882 / 500005883 / 500005884 / 500005885 / 500005886 / 500005887 / 500005888 / 500005889 / 500005890 / 500005891 / 500005892 / 500005893 / 500005894 / 500005895 / 500005896 / 500005897 / 500005898 / 500005899 / 500005900 / 50000

6852 / 500006853 / 500006854 / 500006855 / 500006856 / 500006857 / 500006858 / 500006859 / 500006860 / 500006861 / 500006862 / 500006863 / 500006864 / 500006865 / 500006866 / 500006867 / 500006868 / 500006869 / 500006870 / 500006871 / 500006872 / 500006873 / 500006874 / 500006875 / 500006876 / 500006877 / 500006878 / 500006879 / 500006880 / 500006881 / 500006882 / 500006883 / 500006884 / 500006885 / 500006886 / 500006887 / 500006888 / 500006889 / 500006890 / 500006891 / 500006892 / 500006893 / 500006894 / 500006895 / 500006896 / 500006897 / 500006898 / 500006899 / 500006900 / 500006901 / 500006902 / 500006903 / 500006904 / 500006905 / 500006906 / 500006907 / 500006908 / 500006909 / 500006910 / 500006911 / 500006912 / 500006913 / 500006914 / 500006915 / 500006916 / 500006917 / 500006918 / 500006919 / 500006920 / 500006921 / 500006922 / 500006923 / 500006924 / 500006925 / 500006926 / 500006927 / 500006928 / 5000

8351 / 500008352 / 500008353 / 500008354 / 500008355 / 500008356 / 500008357 / 500008358 / 500008359 / 500008360 / 500008361 / 500008362 / 500008363 / 500008364 / 500008365 / 500008366 / 500008367 / 500008368 / 500008369 / 500008370 / 500008371 / 500008372 / 500008373 / 500008374 / 500008375 / 500008376 / 500008377 / 500008378 / 500008379 / 500008380 / 500008381 / 500008382 / 500008383 / 500008384 / 500008385 / 500008386 / 500008387 / 500008388 / 500008389 / 500008390 / 500008391 / 500008392 / 500008393 / 500008394 / 500008395 / 500008396 / 500008397 / 500008398 / 500008399 / 500008400 / 500008401 / 500008402 / 500008403 / 500008404 / 500008405 / 500008406 / 500008407 / 500008408 / 500008409 / 500008410 / 500008411 / 500008412 / 500008413 / 500008414 / 500008415 / 500008416 / 500008417 / 500008418 / 500008419 / 500008420 / 500008421 / 500008422 / 500008423 / 500008424 / 500008425 / 500008426 / 500008427 / 50000

9851 / 500009852 / 500009853 / 500009854 / 500009855 / 500009856 / 500009857 / 500009858 / 500009859 / 500009860 / 500009861 / 500009862 / 500009863 / 500009864 / 500009865 / 500009866 / 500009867 / 500009868 / 500009869 / 500009870 / 500009871 / 500009872 / 500009873 / 500009874 / 500009875 / 500009876 / 500009877 / 500009878 / 500009879 / 500009880 / 500009881 / 500009882 / 500009883 / 500009884 / 500009885 / 500009886 / 500009887 / 500009888 / 500009889 / 500009890 / 500009891 / 500009892 / 500009893 / 500009894 / 500009895 / 500009896 / 500009897 / 500009898 / 500009899 / 500009900 / 500009901 / 500009902 / 500009903 / 500009904 / 500009905 / 500009906 / 500009907 / 500009908 / 500009909 / 500009910 / 500009911 / 500009912 / 500009913 / 500009914 / 500009915 / 500009916 / 500009917 / 500009918 / 500009919 / 500009920 / 500009921 / 500009922 / 500009923 / 500009924 / 500009925 / 500009926 / 500009927 / 5000

11350 / 5000011351 / 5000011352 / 5000011353 / 5000011354 / 5000011355 / 5000011356 / 5000011357 / 5000011358 / 5000011359 / 5000011360 / 5000011361 / 5000011362 / 5000011363 / 5000011364 / 5000011365 / 5000011366 / 5000011367 / 5000011368 / 5000011369 / 5000011370 / 5000011371 / 5000011372 / 5000011373 / 5000011374 / 5000011375 / 5000011376 / 5000011377 / 5000011378 / 5000011379 / 5000011380 / 5000011381 / 5000011382 / 5000011383 / 5000011384 / 5000011385 / 5000011386 / 5000011387 / 5000011388 / 5000011389 / 5000011390 / 5000011391 / 5000011392 / 5000011393 / 5000011394 / 5000011395 / 5000011396 / 5000011397 / 5000011398 / 5000011399 / 5000011400 / 5000011401 / 5000011402 / 5000011403 / 5000011404 / 5000011405 / 5000011406 / 5000011407 / 5000011408 / 5000011409 / 5000011410 / 5000011411 / 5000011412 / 5000011413 / 5000011414 / 5000011415 / 5000011416 / 5000011417 / 5000011418 / 5000011419 / 5000011420 / 5000011421 

12850 / 5000012851 / 5000012852 / 5000012853 / 5000012854 / 5000012855 / 5000012856 / 5000012857 / 5000012858 / 5000012859 / 5000012860 / 5000012861 / 5000012862 / 5000012863 / 5000012864 / 5000012865 / 5000012866 / 5000012867 / 5000012868 / 5000012869 / 5000012870 / 5000012871 / 5000012872 / 5000012873 / 5000012874 / 5000012875 / 5000012876 / 5000012877 / 5000012878 / 5000012879 / 5000012880 / 5000012881 / 5000012882 / 5000012883 / 5000012884 / 5000012885 / 5000012886 / 5000012887 / 5000012888 / 5000012889 / 5000012890 / 5000012891 / 5000012892 / 5000012893 / 5000012894 / 5000012895 / 5000012896 / 5000012897 / 5000012898 / 5000012899 / 5000012900 / 5000012901 / 5000012902 / 5000012903 / 5000012904 / 5000012905 / 5000012906 / 5000012907 / 5000012908 / 5000012909 / 5000012910 / 5000012911 / 5000012912 / 5000012913 / 5000012914 / 5000012915 / 5000012916 / 5000012917 / 5000012918 / 5000012919 / 5000012920 / 5000012921

14321 / 5000014322 / 5000014323 / 5000014324 / 5000014325 / 5000014326 / 5000014327 / 5000014328 / 5000014329 / 5000014330 / 5000014331 / 5000014332 / 5000014333 / 5000014334 / 5000014335 / 5000014336 / 5000014337 / 5000014338 / 5000014339 / 5000014340 / 5000014341 / 5000014342 / 5000014343 / 5000014344 / 5000014345 / 5000014346 / 5000014347 / 5000014348 / 5000014349 / 5000014350 / 5000014351 / 5000014352 / 5000014353 / 5000014354 / 5000014355 / 5000014356 / 5000014357 / 5000014358 / 5000014359 / 5000014360 / 5000014361 / 5000014362 / 5000014363 / 5000014364 / 5000014365 / 5000014366 / 5000014367 / 5000014368 / 5000014369 / 5000014370 / 5000014371 / 5000014372 / 5000014373 / 5000014374 / 5000014375 / 5000014376 / 5000014377 / 5000014378 / 5000014379 / 5000014380 / 5000014381 / 5000014382 / 5000014383 / 5000014384 / 5000014385 / 5000014386 / 5000014387 / 5000014388 / 5000014389 / 5000014390 / 5000014391 / 5000014392 

15349 / 5000015350 / 5000015351 / 5000015352 / 5000015353 / 5000015354 / 5000015355 / 5000015356 / 5000015357 / 5000015358 / 5000015359 / 5000015360 / 5000015361 / 5000015362 / 5000015363 / 5000015364 / 5000015365 / 5000015366 / 5000015367 / 5000015368 / 5000015369 / 5000015370 / 5000015371 / 5000015372 / 5000015373 / 5000015374 / 5000015375 / 5000015376 / 5000015377 / 5000015378 / 5000015379 / 5000015380 / 5000015381 / 5000015382 / 5000015383 / 5000015384 / 5000015385 / 5000015386 / 5000015387 / 5000015388 / 5000015389 / 5000015390 / 5000015391 / 5000015392 / 5000015393 / 5000015394 / 5000015395 / 5000015396 / 5000015397 / 5000015398 / 5000015399 / 5000015400 / 5000015401 / 5000015402 / 5000015403 / 5000015404 / 5000015405 / 5000015406 / 5000015407 / 5000015408 / 5000015409 / 5000015410 / 5000015411 / 5000015412 / 5000015413 / 5000015414 / 5000015415 / 5000015416 / 5000015417 / 5000015418 / 5000015419 / 5000015420

16753 / 5000016754 / 5000016755 / 5000016756 / 5000016757 / 5000016758 / 5000016759 / 5000016760 / 5000016761 / 5000016762 / 5000016763 / 5000016764 / 5000016765 / 5000016766 / 5000016767 / 5000016768 / 5000016769 / 5000016770 / 5000016771 / 5000016772 / 5000016773 / 5000016774 / 5000016775 / 5000016776 / 5000016777 / 5000016778 / 5000016779 / 5000016780 / 5000016781 / 5000016782 / 5000016783 / 5000016784 / 5000016785 / 5000016786 / 5000016787 / 5000016788 / 5000016789 / 5000016790 / 5000016791 / 5000016792 / 5000016793 / 5000016794 / 5000016795 / 5000016796 / 5000016797 / 5000016798 / 5000016799 / 5000016800 / 5000016801 / 5000016802 / 5000016803 / 5000016804 / 5000016805 / 5000016806 / 5000016807 / 5000016808 / 5000016809 / 5000016810 / 5000016811 / 5000016812 / 5000016813 / 5000016814 / 5000016815 / 5000016816 / 5000016817 / 5000016818 / 5000016819 / 5000016820 / 5000016821 / 5000016822 / 5000016823 / 5000016824 

17848 / 5000017849 / 5000017850 / 5000017851 / 5000017852 / 5000017853 / 5000017854 / 5000017855 / 5000017856 / 5000017857 / 5000017858 / 5000017859 / 5000017860 / 5000017861 / 5000017862 / 5000017863 / 5000017864 / 5000017865 / 5000017866 / 5000017867 / 5000017868 / 5000017869 / 5000017870 / 5000017871 / 5000017872 / 5000017873 / 5000017874 / 5000017875 / 5000017876 / 5000017877 / 5000017878 / 5000017879 / 5000017880 / 5000017881 / 5000017882 / 5000017883 / 5000017884 / 5000017885 / 5000017886 / 5000017887 / 5000017888 / 5000017889 / 5000017890 / 5000017891 / 5000017892 / 5000017893 / 5000017894 / 5000017895 / 5000017896 / 5000017897 / 5000017898 / 5000017899 / 5000017900 / 5000017901 / 5000017902 / 5000017903 / 5000017904 / 5000017905 / 5000017906 / 5000017907 / 5000017908 / 5000017909 / 5000017910 / 5000017911 / 5000017912 / 5000017913 / 5000017914 / 5000017915 / 5000017916 / 5000017917 / 5000017918 / 5000017919

19078 / 5000019079 / 5000019080 / 5000019081 / 5000019082 / 5000019083 / 5000019084 / 5000019085 / 5000019086 / 5000019087 / 5000019088 / 5000019089 / 5000019090 / 5000019091 / 5000019092 / 5000019093 / 5000019094 / 5000019095 / 5000019096 / 5000019097 / 5000019098 / 5000019099 / 5000019100 / 5000019101 / 5000019102 / 5000019103 / 5000019104 / 5000019105 / 5000019106 / 5000019107 / 5000019108 / 5000019109 / 5000019110 / 5000019111 / 5000019112 / 5000019113 / 5000019114 / 5000019115 / 5000019116 / 5000019117 / 5000019118 / 5000019119 / 5000019120 / 5000019121 / 5000019122 / 5000019123 / 5000019124 / 5000019125 / 5000019126 / 5000019127 / 5000019128 / 5000019129 / 5000019130 / 5000019131 / 5000019132 / 5000019133 / 5000019134 / 5000019135 / 5000019136 / 5000019137 / 5000019138 / 5000019139 / 5000019140 / 5000019141 / 5000019142 / 5000019143 / 5000019144 / 5000019145 / 5000019146 / 5000019147 / 5000019148 / 5000019149

20347 / 5000020348 / 5000020349 / 5000020350 / 5000020351 / 5000020352 / 5000020353 / 5000020354 / 5000020355 / 5000020356 / 5000020357 / 5000020358 / 5000020359 / 5000020360 / 5000020361 / 5000020362 / 5000020363 / 5000020364 / 5000020365 / 5000020366 / 5000020367 / 5000020368 / 5000020369 / 5000020370 / 5000020371 / 5000020372 / 5000020373 / 5000020374 / 5000020375 / 5000020376 / 5000020377 / 5000020378 / 5000020379 / 5000020380 / 5000020381 / 5000020382 / 5000020383 / 5000020384 / 5000020385 / 5000020386 / 5000020387 / 5000020388 / 5000020389 / 5000020390 / 5000020391 / 5000020392 / 5000020393 / 5000020394 / 5000020395 / 5000020396 / 5000020397 / 5000020398 / 5000020399 / 5000020400 / 5000020401 / 5000020402 / 5000020403 / 5000020404 / 5000020405 / 5000020406 / 5000020407 / 5000020408 / 5000020409 / 5000020410 / 5000020411 / 5000020412 / 5000020413 / 5000020414 / 5000020415 / 5000020416 / 5000020417 / 5000020418

21420 / 5000021421 / 5000021422 / 5000021423 / 5000021424 / 5000021425 / 5000021426 / 5000021427 / 5000021428 / 5000021429 / 5000021430 / 5000021431 / 5000021432 / 5000021433 / 5000021434 / 5000021435 / 5000021436 / 5000021437 / 5000021438 / 5000021439 / 5000021440 / 5000021441 / 5000021442 / 5000021443 / 5000021444 / 5000021445 / 5000021446 / 5000021447 / 5000021448 / 5000021449 / 5000021450 / 5000021451 / 5000021452 / 5000021453 / 5000021454 / 5000021455 / 5000021456 / 5000021457 / 5000021458 / 5000021459 / 5000021460 / 5000021461 / 5000021462 / 5000021463 / 5000021464 / 5000021465 / 5000021466 / 5000021467 / 5000021468 / 5000021469 / 5000021470 / 5000021471 / 5000021472 / 5000021473 / 5000021474 / 5000021475 / 5000021476 / 5000021477 / 5000021478 / 5000021479 / 5000021480 / 5000021481 / 5000021482 / 5000021483 / 5000021484 / 5000021485 / 5000021486 / 5000021487 / 5000021488 / 5000021489 / 5000021490 / 5000021491 

22668 / 5000022669 / 5000022670 / 5000022671 / 5000022672 / 5000022673 / 5000022674 / 5000022675 / 5000022676 / 5000022677 / 5000022678 / 5000022679 / 5000022680 / 5000022681 / 5000022682 / 5000022683 / 5000022684 / 5000022685 / 5000022686 / 5000022687 / 5000022688 / 5000022689 / 5000022690 / 5000022691 / 5000022692 / 5000022693 / 5000022694 / 5000022695 / 5000022696 / 5000022697 / 5000022698 / 5000022699 / 5000022700 / 5000022701 / 5000022702 / 5000022703 / 5000022704 / 5000022705 / 5000022706 / 5000022707 / 5000022708 / 5000022709 / 5000022710 / 5000022711 / 5000022712 / 5000022713 / 5000022714 / 5000022715 / 5000022716 / 5000022717 / 5000022718 / 5000022719 / 5000022720 / 5000022721 / 5000022722 / 5000022723 / 5000022724 / 5000022725 / 5000022726 / 5000022727 / 5000022728 / 5000022729 / 5000022730 / 5000022731 / 5000022732 / 5000022733 / 5000022734 / 5000022735 / 5000022736 / 5000022737 / 5000022738 / 5000022739

23763 / 5000023764 / 5000023765 / 5000023766 / 5000023767 / 5000023768 / 5000023769 / 5000023770 / 5000023771 / 5000023772 / 5000023773 / 5000023774 / 5000023775 / 5000023776 / 5000023777 / 5000023778 / 5000023779 / 5000023780 / 5000023781 / 5000023782 / 5000023783 / 5000023784 / 5000023785 / 5000023786 / 5000023787 / 5000023788 / 5000023789 / 5000023790 / 5000023791 / 5000023792 / 5000023793 / 5000023794 / 5000023795 / 5000023796 / 5000023797 / 5000023798 / 5000023799 / 5000023800 / 5000023801 / 5000023802 / 5000023803 / 5000023804 / 5000023805 / 5000023806 / 5000023807 / 5000023808 / 5000023809 / 5000023810 / 5000023811 / 5000023812 / 5000023813 / 5000023814 / 5000023815 / 5000023816 / 5000023817 / 5000023818 / 5000023819 / 5000023820 / 5000023821 / 5000023822 / 5000023823 / 5000023824 / 5000023825 / 5000023826 / 5000023827 / 5000023828 / 5000023829 / 5000023830 / 5000023831 / 5000023832 / 5000023833 / 5000023834 

24898 / 5000024899 / 5000024900 / 5000024901 / 5000024902 / 5000024903 / 5000024904 / 5000024905 / 5000024906 / 5000024907 / 5000024908 / 5000024909 / 5000024910 / 5000024911 / 5000024912 / 5000024913 / 5000024914 / 5000024915 / 5000024916 / 5000024917 / 5000024918 / 5000024919 / 5000024920 / 5000024921 / 5000024922 / 5000024923 / 5000024924 / 5000024925 / 5000024926 / 5000024927 / 5000024928 / 5000024929 / 5000024930 / 5000024931 / 5000024932 / 5000024933 / 5000024934 / 5000024935 / 5000024936 / 5000024937 / 5000024938 / 5000024939 / 5000024940 / 5000024941 / 5000024942 / 5000024943 / 5000024944 / 5000024945 / 5000024946 / 5000024947 / 5000024948 / 5000024949 / 5000024950 / 5000024951 / 5000024952 / 5000024953 / 5000024954 / 5000024955 / 5000024956 / 5000024957 / 5000024958 / 5000024959 / 5000024960 / 5000024961 / 5000024962 / 5000024963 / 5000024964 / 5000024965 / 5000024966 / 5000024967 / 5000024968 / 5000024969

# Data Stats

In [25]:
pd.DataFrame({'keys':list(Counter(y).keys()),
              'freq':list(Counter(y).values())})

Unnamed: 0,keys,freq
0,0,5000
1,1,6292
2,2,2093
3,3,1276
4,6,2176
5,4,2944
6,5,1665
7,12,112
8,7,1699
9,8,671


In [None]:
print(X[0])

In [None]:
# Select index of simplification possibility
INDEX = 6


possibility = simplify_possibilities[INDEX]

if possibility[0] == 'D-P':
    # Simplification function Douglas-Peucker
    simplified_coordinates = simplify_coords(coordinates, possibility[1])

if possibility[0] == 'V-W':
    # Simplification function Visvalingam-Whyatt
    simplified_coordinates = simplify_coords_vw(coordinates, possibility[1])

old_xs, old_ys = zip(*coordinates)
new_xs, new_ys = zip(*simplified_coordinates)

print(len(simplified_coordinates))
print(len(coordinates))

# Plotting

In [None]:
plt.figure()
plt.plot(old_xs, old_ys)
plt.plot(new_xs, new_ys)
plt.show()

# Keras

In [26]:
input_shape = X[0].shape
print(input_shape)
model = Sequential()
model.add(layers.Conv1D(32, 5, activation='relu', input_shape=input_shape))
model.add(layers.MaxPooling1D(3,3))

model.add(layers.Conv1D(64, 5, activation='relu'))
model.add(layers.GlobalAveragePooling1D())
model.add(layers.Dense(len(simplify_possibilities)+1, activation='softmax'))

print(model.summary())

(3936, 5)
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 3932, 32)          832       
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 1310, 32)          0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 1306, 64)          10304     
_________________________________________________________________
global_average_pooling1d (Gl (None, 64)                0         
_________________________________________________________________
dense (Dense)                (None, 17)                1105      
Total params: 12,241
Trainable params: 12,241
Non-trainable params: 0
_________________________________________________________________
None


In [33]:
model.compile(loss='sparse_categorical_crossentropy',
                optimizer=Adam(lr=1e-4), metrics=['accuracy'])

BATCH_SIZE = 32
EPOCHS = 50

history = model.fit(X,
                    y,
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    validation_split=0.2,
                    verbose=1)

Train on 20265 samples, validate on 5067 samples
Epoch 1/50
Epoch 2/50
 1792/20265 [=>............................] - ETA: 2:46 - loss: 1.9666 - accuracy: 0.3364

KeyboardInterrupt: 

In [44]:
results_tensor = []
for a in results_list:
    x_tensor = torch.Tensor(a['polygon'])
    y_tensor = torch.tensor(int(a['algorithm']))
    results_tensor.append([x_tensor, y_tensor])

In [77]:
for x, y in results_tensor:
    print(len(x))


3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936


3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936


3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936


3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936


3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936


3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936


3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936


3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936


3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936


3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936
3936


KeyboardInterrupt: 

# PyTorch

In [103]:
batch_size = 64
test_batch_size = 1000
epochs = 14
lr = 0.001
gamma = 0.7
log_interval = 10
dry_run = False
save_model = False


class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv1d(3936, 32, 5)
        self.pool1 = nn.MaxPool1d(3, stride=3)
        self.conv2 = nn.Conv1d(32, 64, 5)
        self.pool2 = nn.AvgPool1d(64)
        self.fc1 = nn.Linear(17,17)

    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = F.softmax(self.fc1(x))
        return x


net = Net()

In [104]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    criterion = nn.CrossEntropyLoss()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()), end="\r")
            if dry_run:
                break


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    criterion = nn.CrossEntropyLoss(reduction='sum')
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += criterion(output, target).item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.1f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [105]:
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')

    device = "cpu"

    train_kwargs = {'batch_size': batch_size}
    test_kwargs = {'batch_size': test_batch_size}

    #transform=transforms.Compose([
    #    transforms.ToTensor(),
    #    transforms.Normalize((0.1307,), (0.3081,))
    #    ])
    dataset1 = results_tensor
    dataset2 = results_tensor
    train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
    test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)

    model = Net().to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    scheduler = StepLR(optimizer, step_size=1, gamma=gamma)
    for epoch in range(1, epochs + 1):
        train(model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader)
        scheduler.step()

    if save_model:
        torch.save(model.state_dict(), "mnist_cnn.pt")
        
main()

RuntimeError: Given input size: (32x1x1). Calculated output size: (32x1x0). Output size is too small

In [87]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv1d(len(X), 32, 5)
        self.pool1 = nn.MaxPool1d(3, 3)
        self.conv2 = nn.Conv1d(61, 64, 5)
        self.pool2 = nn.AvgPool1d(64)
        self.fc1 = nn.Linear(13,13)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = F.softmax(self.fc1(x))
        return x


net = Net()

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')