In [1]:
import pandas as pd
import json
from scipy.spatial.distance import cdist
import numpy as np
import csv

In [2]:
with open('data/original/eco.json') as json_file:
    eco_json = json.load(json_file)

In [3]:
mobile_station_data = pd.read_csv('data/tranformed/data-49158-2020-10-16.csv', 
                                  index_col='Unnamed: 0')

In [4]:
mobile_station_data = mobile_station_data[mobile_station_data['Rank'] == 0]

In [5]:
def closest_point(point, points):
    """ Find closest point from a list of points. """
    return points[cdist([point], points).argmin()]

In [6]:
# vectorized haversine function
def haversine(lat1, lon1, lat2, lon2, to_radians=True, earth_radius=6371):
    """
    slightly modified version: of http://stackoverflow.com/a/29546836/2901002

    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees or in radians)

    All (lat, lon) coordinates must have numeric dtypes and be of equal length.

    """
    if to_radians:
        lat1, lon1, lat2, lon2 = np.radians([lat1, lon1, lat2, lon2])

    a = np.sin((lat2-lat1)/2.0)**2 + \
        np.cos(lat1) * np.cos(lat2) * np.sin((lon2-lon1)/2.0)**2

    return earth_radius * 2 * np.arcsin(np.sqrt(a))

In [7]:
data_list = []
check_points = [[e[0][0], e[0][1]]for e in eco_json]
for current_element in mobile_station_data.values:
    current_closest_point = closest_point([current_element[5], 
                                           current_element[4]], 
                                  check_points)
    distance = haversine(current_element[5], 
                         current_element[4],  
                         current_closest_point[0],
                         current_closest_point[1],
                         to_radians=True
                        ) * 1000
    if distance > 3000:
        data_list.append({'Latitude':current_element[5], 
             'Longitude':current_element[4], 
             'Distance':distance})

In [8]:
data = pd.DataFrame(data_list,columns=['Latitude', 'Longitude', 'Distance'])


In [9]:
data

Unnamed: 0,Latitude,Longitude,Distance
0,56.007634,37.797262,5141.766473
1,55.992985,37.168481,5592.220499
2,55.616954,37.315334,3203.782659
3,55.789978,37.799032,3698.631756
4,55.816885,37.823017,3018.826508
...,...,...,...
663,55.966030,37.187755,4416.818112
664,55.793207,37.814429,3896.539756
665,55.970878,37.150795,6536.635919
666,55.692130,37.744720,3486.095855


In [10]:
data.to_csv('data/final/gut_place.csv', sep=';', quoting=csv.QUOTE_NONNUMERIC, index=False)