In [36]:
import numpy as np
from sklearn.cluster import MeanShift
import utm
from sklearn.cluster import MeanShift
import statistics 
from statistics import mode 
import json
import requests
import time
import os
import geocoder
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import haversine_distances
from tqdm import tqdm
from multiprocessing import Process, Pipe
%matplotlib inline

In [48]:
def parallel(cluster_id, cluster_array,cordinate_array, zone_array, data_array, conn):
    current_cluster_result = {}
    
    len_of_cluster = len(cluster_array)
    compass_array = cluster_array[:,[2]]
    a = np.tan(np.radians(90-compass_array))
    b = np.ones(np.shape(compass_array))
    c = np.multiply(a,cluster_array[:,[0]]) - cluster_array[:,[1]]
    eq_coeff_cluster = np.hstack((a, -b ,c))
    # we take the value of p as 300 just for analysing the time taken to run this
    # Creating lines from the compass and the current point
    p = 200

    # Initializing the intersection of lines numpy array
    pairwise_indices = np.random.randint(0, len_of_cluster , (p,2))
    pairs = eq_coeff_cluster[pairwise_indices]

    A = pairs[:,:,:-1]
    Y = pairs[:,:,-1:]
    intersections_of_lines = np.squeeze(np.matmul(np.linalg.pinv(A),  Y))
    ms = MeanShift(bandwidth=150)  
    labels = (ms.fit_predict(intersections_of_lines)).tolist()
    cluster_centers = ms.cluster_centers_
    end = time.time()

    mode_of_labels = max(set(labels), key=labels.count)
    current_cluster_result['cluster_id'] = cluster_id
    current_cluster_result['cluster_item_count'] = len_of_cluster       
    
    cordinate_list = (cordinate_array).tolist()
    cordinate = max(set(cordinate_list), key=cordinate_list.count)

    zone_list = (zone_array).tolist()
    zone = max(set(zone_list), key=zone_list.count)
    lat_long = utm.to_latlon(cluster_centers[mode_of_labels][0], cluster_centers[mode_of_labels][1], cordinate, zone)

    current_cluster_result['cluster_latitude'] = lat_long[0]
    current_cluster_result['cluster_longitude'] = lat_long[1]
    current_cluster_result['cluster_objects'] = (data_array).tolist()
    conn.send(current_cluster_result)

In [49]:
#Make sure dt
def lambda_handler(b1 = 50, b2 = 150, p = 250):
    with open('data_ver3.json') as json_data:
        
        data = json.load(json_data)['data']
        array = []
        # Creating image array which contains all the images
        data_array = []
        # Creating a cordinate array which contains all the coordinates
        cordinate_array = []

        zone_array = []

        #add magnetic declination adjustment in College Station = 2.85°, in Houston = 2.28°
        
        for d in data:
            ut_cordinates = utm.from_latlon(d['latitude'], d['longitude'])
            t_list = [ut_cordinates[0],ut_cordinates[1],d['compass']+2.28]
            data_array.append(d)
            cordinate_array.append(ut_cordinates[2])
            zone_array.append(ut_cordinates[3])
            array.append(t_list)
        array = np.array(array)
        data_array = np.array(data_array)
        cordinate_array = np.array(cordinate_array)
        zone_array = np.array(zone_array)
        
        
        # Now the array will have latitude, longitude and compass as its columns
        X = array[:,[0,1]]
        # Now X will have only latitude and longitude values. Performing mean shift algorithm on the latitude and longitudes
        ms = MeanShift(bandwidth=b1)
        labels = ms.fit_predict(X)
        cluster_centers = ms.cluster_centers_
        labels = np.vstack(labels)

        array = np.hstack((array,labels))
        # Stacking the labels next to array. Now array contains latitude, longitude, compass, label
        
        return_value = []
        processes = []
        parent_connections = []
        
        for cluster in range(len(cluster_centers)):
            
            indices_of_cluster = np.where(array[:,3] == cluster)
            cluster_array = array[indices_of_cluster]  
            len_of_cluster = len(cluster_array)
            if len_of_cluster < 2:
                continue
            parent_conn, child_conn = Pipe()
            parent_connections.append(parent_conn)
            
            process = Process(target=parallel, args=(cluster, cluster_array, cordinate_array[indices_of_cluster], 
                     zone_array[indices_of_cluster], data_array[indices_of_cluster], child_conn,))
            
            processes.append(process)
            
        for process in processes:
            process.start()
        
        for process in processes:
            process.join()
        
        for parent_connection in parent_connections:
            return_value.append(parent_connection.recv()) #[0]
            
        return {
            "statusCode": 200,
            "body": json.dumps({'objects': return_value})
        }


In [56]:
start = time.time()
lambda_handler()
end = time.time()
print((end-start)*1000)

621.0441589355469
