In [11]:
"""
Checks if distance between city centers (spb, msk) and
places in dataset more than treshold value.
If yes, than skip the place.
Input:
Dataset with defined latitude and longitude in format:
{"lat": value, "lng": value}
Output:
Similar dataset without places out of treshold radius
from city center.
"""


import json
from json import JSONDecodeError
from geopy import distance


# calculate distance between geopoints
def dist_between(obj_1, obj_2):
    geopoint_1 = (obj_1["lat"], obj_1["lng"])
    geopoint_2 = (obj_2["lat"], obj_2["lng"])
    dist = distance.distance(geopoint_1,
                             geopoint_2).km
    return dist

spb_center = {"lat": 59.9342802, "lng": 30.3350986}
msk_center = {"lat": 55.7522200, "lng": 37.6155600}
city_centers = {"spb": spb_center, "msk": msk_center}

# city and treshold radius from center
city = "msk"
treshold_radius = 50

# Directory and in/out files:
direct = "D:/Work/Data_files/working_dir/"
input_file = "merged_ds_"
in_city_file = "dist_clean_"
out_of_city_file = "out_of_dist_"

places_in_city = []
places_out_city = []

with open(direct + input_file + city + ".json",
           'r', encoding = "utf-8") as inf, \
     open(direct + in_city_file + city + ".json",
           'w', encoding = "utf-8") as in_city_f, \
     open(direct + out_of_city_file + city + ".json",
           'w', encoding = "utf-8") as out_city_f:
    try:
        all_places = json.load(inf)
        for place in all_places:
            dist = dist_between(city_centers[city], place)
            if dist < treshold_radius:
                places_in_city.append(place)
            else:
                places_out_city.append(place)
                #print("Out of radius: ", place)
        #for place in places_in_city:
            #print("\n\n", place)
        json.dump(places_in_city, in_city_f)
        json.dump(places_out_city, out_city_f)
        #print("\n\nOut:\n",places_out_city)
    except JSONDecodeError:
        print(JSONDecodeError)
       
# check the results:
with open(direct + in_city_file + city + ".json",
           'r', encoding = "utf-8") as in_city_f, \
     open(direct + out_of_city_file + city + ".json",
           'r', encoding = "utf-8") as out_city_f:
    try:
        places_within = json.load(in_city_f)
        places_out_of = json.load(out_city_f)
        print("Number of places within city radius: ",
             len(places_within))
        print("Place example:\n", places_within[-1])
        print("Number of places out of city: ",
             len(places_out_of))
        print("Place example:\n", places_out_of[-1])
    except JSONDecodeError:
        print(JSONDecodeError)


Number of places within city radius:  19622
Place example:
 {'categories': ['Others'], 'duration': None, 'image': {'image': 'https://kudago.com/media/images/place/5e/65/5e6522ce82ee5bbb3f7048081dee1bee.jpg', 'thumbnails': {'640x384': 'https://kudago.com/media/thumbs/640x384/images/place/5e/65/5e6522ce82ee5bbb3f7048081dee1bee.jpg', '144x96': 'https://kudago.com/media/thumbs/144x96/images/place/5e/65/5e6522ce82ee5bbb3f7048081dee1bee.jpg'}, 'source': {'name': 'yandex.kz', 'link': 'https://yandex.kz/collections/card/5b61742f24e06c0095d5d95e/'}}, 'open_hours': {'0': [530, 100], '1': [530, 100], '2': [530, 100], '3': [530, 100], '4': [530, 100], '5': [530, 100], '6': [530, 100]}, 'id_KudaGo': 33187, 'title': 'Станция метро «Ленинский проспект»', 'favorites_count_KudaGo': 0, 'comments_count_KudaGo': 0, 'address': 'просп. Ленинский', 'events_count_KudaGo': 0, 'additional_categories': ['metro'], 'lat': 55.70733378641174, 'lng': 37.58599100197056, 'city_brand': None, 'facebook_checkins': None, '