In [1]:
import numpy as np
from sklearn.cluster import MeanShift
import utm
from sklearn.cluster import MeanShift
import statistics 
from statistics import mode 
import json
import requests
import time
import os
import geocoder
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import haversine_distances
from tqdm import tqdm
from multiprocessing import Process, Pipe
import urllib.request
%matplotlib inline

In [18]:
def parallel(b2, p, cluster_id, cluster_array,cordinate_array, zone_array, data_array, conn):
    current_cluster_result = {}
    
    len_of_cluster = len(cluster_array)
    compass_array = cluster_array[:,[2]]
    a = np.tan(np.radians(90-compass_array))
    b = np.ones(np.shape(compass_array))
    c = np.multiply(a,cluster_array[:,[0]]) - cluster_array[:,[1]]
    eq_coeff_cluster = np.hstack((a, -b ,c))
    # we take the value of p as 300 just for analysing the time taken to run this
    # Creating lines from the compass and the current point
    # Initializing the intersection of lines numpy array
    pairwise_indices = np.random.randint(0, len_of_cluster , (p,2))
    pairs = eq_coeff_cluster[pairwise_indices]

    A = pairs[:,:,:-1]
    Y = pairs[:,:,-1:]
    intersections_of_lines = np.squeeze(np.matmul(np.linalg.pinv(A),  Y))
    ms = MeanShift(bandwidth=b2)  
    labels = (ms.fit_predict(intersections_of_lines)).tolist()
    cluster_centers = ms.cluster_centers_
    end = time.time()

    mode_of_labels = max(set(labels), key=labels.count)
    current_cluster_result['cluster_id'] = cluster_id
    current_cluster_result['cluster_item_count'] = len_of_cluster       
    
    cordinate_list = (cordinate_array).tolist()
    cordinate = max(set(cordinate_list), key=cordinate_list.count)

    zone_list = (zone_array).tolist()
    zone = max(set(zone_list), key=zone_list.count)
    lat_long = utm.to_latlon(cluster_centers[mode_of_labels][0], cluster_centers[mode_of_labels][1], cordinate, zone)

    current_cluster_result['cluster_latitude'] = lat_long[0]
    current_cluster_result['cluster_longitude'] = lat_long[1]
    current_cluster_result['cluster_objects'] = (data_array).tolist()
    conn.send(current_cluster_result)

In [19]:
def read_remote_url(url, conn):
    response = urllib.request.urlopen(url).read()
    json_data = json.loads(response)
    data = json_data['data']
    conn.send(data)

In [45]:

def lambda_handler(b1 = 50, b2 = 150, p = 250):
    processes = []
    parent_connections = []
    
    parent1, child1 = Pipe()
    parent2, child2 = Pipe()
    
    parent_connections.append(parent1)
    parent_connections.append(parent2)
    
    p1 = Process(target=read_remote_url, args=('http://backend.digitaltwincities.info', child1))
    p2 = Process(target=read_remote_url, args=('http://backend.digitaltwincities.info/poles', child2))
    
    p1.start()
    p2.start()
    
    p1.join()
    p2.join()

    data = parent1.recv()
    poles_data = parent2.recv()
    array = []
    # Creating image array which contains all the images
    data_array = []
    # Creating a cordinate array which contains all the coordinates
    cordinate_array = []

    zone_array = []

    for d in data:
        ut_cordinates = utm.from_latlon(d['latitude'], d['longitude'])
        t_list = [ut_cordinates[0],ut_cordinates[1],d['compass']+2.28]
        data_array.append(d)
        cordinate_array.append(ut_cordinates[2])
        zone_array.append(ut_cordinates[3])
        array.append(t_list)
    array = np.array(array)
    data_array = np.array(data_array)
    cordinate_array = np.array(cordinate_array)
    zone_array = np.array(zone_array)

    # Now the array will have latitude, longitude and compass as its columns
    X = array[:,[0,1]]
    # Now X will have only latitude and longitude values. Performing mean shift algorithm on the latitude and longitudes
    ms = MeanShift(bandwidth=b1)
    labels = ms.fit_predict(X)
    cluster_centers = ms.cluster_centers_
    labels = np.vstack(labels)
    
    array = np.hstack((array,labels))
    # Stacking the labels next to array. Now array contains latitude, longitude, compass, label
    
    return_value = []
    processes = []
    parent_connections = []
    
    for cluster in range(len(cluster_centers)):
        
        indices_of_cluster = np.where(array[:,3] == cluster)
        cluster_array = array[indices_of_cluster]  
        len_of_cluster = len(cluster_array)
        if len_of_cluster < 2:
            continue
        parent_conn, child_conn = Pipe()
        parent_connections.append(parent_conn)
        
        process = Process(target=parallel, args=(b2, p , cluster, cluster_array, cordinate_array[indices_of_cluster], 
                    zone_array[indices_of_cluster], data_array[indices_of_cluster], child_conn,))
        
        processes.append(process)
        
    for process in processes:
        process.start()
    
    for process in processes:
        process.join()
    
    only_cluster_centers = []
    for parent_connection in parent_connections:
        cluster = parent_connection.recv()
        return_value.append(cluster) #[0]
        only_cluster_centers.append([cluster['cluster_latitude'], cluster['cluster_longitude']])

    # get all the cluster center locations and all the pole locations
    # get the closest pole for each of the cluster
    # update the pole number in each of the cluster as a new closest_pole_number attribute
    poles = []
    
    for d in poles_data:
        poles.append([d['latitude'], d['longitude']])
    
    poles = np.array(poles)
    
    only_cluster_centers = np.array(only_cluster_centers)

    result = haversine_distances(np.radians(only_cluster_centers), np.radians(poles))
    nearest_pole_manual_id = (result.argmin(axis=1, ) + 1).tolist()
    print("Pole errors", result.min(axis=1)*6371000)
    print("Mean pole errors", result.min(axis=1).mean()*6371000)
    print("Max pole error", result.min(axis=1).max()*6371000)
    for index, cluster in enumerate(return_value):
        cluster['nearest_pole'] = nearest_pole_manual_id[index]


    return {
        "statusCode": 200,
        "body": {'objects': return_value}
    }

In [46]:
def goodness(b1=50, b2=100, p=100):
    results = lambda_handler(b1=b1, b2=b2,p=p)['body']['objects']
    cluster_centers = [[cluster['cluster_latitude'], cluster['cluster_longitude']]for cluster in results]
    cluster_centers = np.array(cluster_centers)
    
    json_data = open('data_ver3.json') 
    data = json.load(json_data)['data']
    collected_data = [[d['latitude'], d['longitude']] for d in data]
    collected_data = np.array(collected_data)
    json_data.close()
    
    true_data = [[29.70154390339952, -95.39209601022654],[29.70428338419638, -95.39100648039813], [29.703335743447496, -95.39130824050142], [29.702188761458533, -95.3936918414558], [29.702691697389692, -95.39530032710667], [29.703085520490866, -95.39629883360872], [29.722233773075338, -95.36705220231357],[29.72526628586194, -95.37240452291601], [29.72734309820575, -95.375255487577], [29.728189612898646, -95.37701142524726],[29.729725295675568, -95.38003134004325], [29.73117267948174, -95.38260255498517],[29.731857687219605, -95.38404510951365],[29.756970123575087, -95.37525775033865], [29.7552538566023, -95.37571750526642],[29.755200480497276, -95.3794874504529],[29.75464650875399, -95.38024579642178],[29.754603020003177, -95.38300456681712],[29.752629019819906, -95.38306070220811], [29.752095895517666, -95.38491481879633]]
    true_data = np.array(true_data)
    
    return collected_data, cluster_centers, true_data

In [47]:
def find_Error(b1=50, b2=150, p=100, print_data = True):
    collected_data, cluster_centers, true_data = goodness(b1, b2, p)
    radians_in_m = 6371000
    result = haversine_distances(np.radians(true_data), np.radians(cluster_centers)) * radians_in_m
    min_distances = result.min(axis=1)
    print("Min distances ", min_distances)
    
    new_cluster_center = cluster_centers[np.argmin(result, axis=1)]
    mean = min_distances.mean()
    maxim = min_distances.max()
    if print_data:
        print("Mean error is {}m".format(mean))
        print("Maximum error is {}m".format(maxim))
    return mean, maxim

In [48]:
find_Error(50,150,100)

Pole errors [15.42430175 19.4592518  44.83437874  9.16294583 71.55319174  5.23516925
  6.50281922  3.61947334  7.7617927  12.18258267  6.37746913 13.01331268
  4.36511677 26.8142986  12.34307113 14.2379882  26.14501983  5.23800575
 10.21548424 37.57675737]
Mean pole errors 17.603121537274866
Max pole error 71.55319173810241
Min distances  [ 4.70139147  3.69329973  3.8878708   4.63681103  3.19171568  5.16559677
  1.85820794  9.28243535  5.55476928 11.54327371  4.31383105  4.54097628
  1.39203835  4.40669808  2.61441637  5.53343948  3.03353469  3.09634655
  2.00183442  2.10752166]
Mean error is 4.327800435561568m
Maximum error is 11.543273705780168m


(4.327800435561568, 11.543273705780168)