In [42]:
import json
import pandas as pd
import geopandas as gpd

In [49]:
def load_google_as_geopandas(cache_directory):
    """
    This function loads a local JSON file and converts it into a Geodataframe.

    :param cache_directory: str
        Contains the directory for the cache POI data file.
    :return:
    data: geopandas
        Contains the cached POI data file formatted as a Geodataframe.
    """
    with open(cache_directory) as file:
        data_json = json.load(file)
    data = pd.json_normalize(data_json['features'])
    data[['geometry.lat','geometry.lng']] = pd.DataFrame(data['geometry.location.coordinates'].tolist(), index= data.index)
    data = gpd.GeoDataFrame(data,
                            geometry=gpd.points_from_xy(data['geometry.lng'],
                                                        data['geometry.lat']))
    return data


def load_json_as_geopandas(cache_directory):
    """
    This function loads a local JSON file and converts it into a Geodataframe.

    :param cache_directory: str
        Contains the directory for the cache POI data file.
    :return:
    data: geopandas
        Contains the cached POI data file formatted as a Geodataframe.
    """
    with open(cache_directory) as file:
        data_json = json.load(file)
    data = pd.json_normalize(data_json['features'])
    data = gpd.GeoDataFrame(data,
                            geometry=gpd.points_from_xy(data['geometry.lng'],
                                                        data['geometry.lat']))
    return data


def convert_to_json(data):
    features = []
    for i in range(len(data)):
        if 'route' in data.loc[i, 'properties.place_type'] or 'neighborhood' in data.loc[i, 'properties.place_type']:
            continue

        poi_dict = {
            'type': 'Feature',
            'geometry': {'lat': data.loc[i, 'geometry.lat'], 'lng': data.loc[i, 'geometry.lng']},
            'properties': {'address': data.loc[i, 'properties.address'],
                           'name': data.loc[i, 'properties.name'],
                           'place_type': '; '.join(data.loc[i, 'properties.place_type']),
                           'source': 'GoogleMap',
                           'requires_verification': {'summary': 'No'}},
            'id': data.loc[i, 'id'],
            'extraction_date': "20201028"
        }
        features.append(poi_dict)

    return features

In [50]:
google_directory = '../data/googlemap/googlemap_poi_tampines_25vbb.json'
google_data = load_google_as_geopandas(google_directory)
google_data.drop(columns=['geometry.location.type',
                          'geometry.location.coordinates', 'properties.address.country',
                          'properties.address.house_number', 'properties.address.road',
                          'properties.address.city', 'properties.address.house',
                          'properties.address.level', 'properties.address.unit',
                          'properties.address.postcode', 'properties.address.entrance',
                          'properties.address.suburb', 'properties.address.po_box',
                          'properties.address.category', 'properties.address.near',
                          'properties.address.city_district', 'properties.address.state'],
                 inplace=True)
google_data.rename(columns={"properties.address.formatted_address": "properties.address"},
                   inplace=True)
formatted_features = convert_to_json(google_data)

with open('../data/googlemap/googlemap_area_poi.json', 'w') as json_file:
    feature_collection = {'type': 'FeatureCollection',
                          'features': formatted_features}
    json.dump(feature_collection, json_file)