In [1]:
"""
Convert dataset of KudaGo places to FIFA(ITMO) format
"""

import json
from json import JSONDecodeError
from digitalise_timetable import digitalise
from redefine_category import find_category


location = "spb"    # choose location here (spb, msk)

# Directory and in/out files:
direct = "D:/Work/Data_files/working_dir/"
origin_file = "places_events_counted_"
converted_file = "new_format_"

def find_duration(categories):
    """
    Returns presumable duration of visit
    based on place-category.
    Not defined duration for categories:
    "Others", "Concerts & Shows"
    """
    
    duration = []
    # category-duration correspondense
    correspondence_list = {
        "Sights & Landmarks": 15,
        "Museums & Libraries": 180,
        "Nature & Parks": 60,
        "Restaurant": 180,
    }
    
    for category in correspondence_list:
        if category in categories:
            duration.append(correspondence_list[category])
    if duration:
        return max(duration)
    else:
        return None

def convert(place):
    """
    Input: Place from KudaGo dataset
           with already counted events.
    Output: The same place converted
            to the FIFA dataset format.
    """
    
    categories = find_category(place["categories"])
    duration = find_duration(categories)
    
    
    # Convert 'timetable' KudaGo to FIFA 'open_hours'
    # digitalise_timetable module is used
    open_hours = digitalise(place["timetable"])
    
    # First image from the original list
    image = place["images"][0] if place["images"] else None        

    # Place representation in new format (FIFA set)
    # in lack of information, some fields left empty
    new_format = {
        # New or changed metrics:
        "categories": categories,
        "duration": duration,
        "image": image,
        "open_hours": open_hours,
        
        # Migrated without changes:
        "id_KudaGo": place["id"],
        "title": place["title"],
        "favorites_count_KudaGo": place["favorites_count"],
        "comments_count_KudaGo": place["comments_count"],
        "address": place["address"],
        # Number of KudaGo events refer to place,
        # already calculated and added to dataset.
        "events_count_KudaGo" : place["events_count"],
        # To keep original list of categories
        "additional_categories": place["categories"],
        # Coordinates:
        "lat": place["coords"]["lat"],
        "lng": place["coords"]["lon"],
        
        # Empty in KudaGo set:
        "city_brand": None,
        "facebook_checkins": None,
        "facebook_rating": None, 
        "foursquare_checkinsCount": None,
        "foursquare_rating": None,
        "foursquare_ratingVotes": None,
        "foursquare_userCount": None,
        "instagram_title": None,
        "instagram_visitorsNumber": None,
        "tripAdvisor_rating": None,
        "tripAdvisor_reviewsNumber": None,
        "wikipedia_page": None,
        "wikipedia_title": None,
        # Coordinates in FIFA format (to be filled later):
        "x": None,
        "y": None
        }
    return new_format

with open(direct + origin_file + location + ".json",
           'r', encoding = "utf-8") as pls_f, \
     open(direct + converted_file + location + ".json",
           'w', encoding = "utf-8") as ouf:
    converted = []
    try:
        origin = json.load(pls_f)
        for place in origin:
            converted.append(convert(place))
    except JSONDecodeError:
        print("Input file cannot be read")
    json.dump(converted, ouf)
    

with open(direct + origin_file + location + ".json",
           'r', encoding = "utf-8") as inf_1, \
     open(direct + converted_file + location + ".json",
           'r', encoding = "utf-8") as inf_2:   
    initial_set = json.load(inf_1)
    formatted_set = json.load(inf_2)
    print("Places number in:")
    print("Initial set: ", len(initial_set))
    print("Formatted set: ", len(formatted_set))
    print("\n\nPlace in:")
    print("Initial set:\n", initial_set[10])
    print("Formatted set:\n", formatted_set[10])

Places number in:
Initial set:  3736
Formatted set:  3736


Place in:
Initial set:
 {'id': 118, 'title': 'Музей артиллерии, инженерных войск и войск связи', 'slug': 'art-i-inzh-voysk', 'address': 'Александровский парк, д. 7', 'timetable': 'ср–вс 11:00–18:00 (касса: ср–вс 11:00–17:00)', 'phone': '+7 812 232-02-96', 'is_stub': False, 'body_text': '<p>Петра I очень увлекала военная тематика. Царь понимал, что укрепление оборонных рубежей страны будет способствовать её процветанию и силе. Для популяризации батальной темы в 1703 году он приказал основать Цейхгауз (Кронверк) — место для сохранения «достопамятных» орудий. Экземпляры для него свозились со всей страны. К орудиям добавлялись воинские знамёна, форма и различные виды вооружений. Коллекция музея постоянно пополнялась, формировались его фонды, и в 1965 году ему было присвоено название Военно-исторического музея артиллерии, инженерных войск и войск связи (ВИМАИВиВС).</p>\n<p>На сегодняшний день музей располагает богатейшим собранием 