# Preprocess and Save Transportation/Business Sat DF

In [62]:
%%writefile preprocess_map_data.py
import os
import pickle
from pyrosm import OSM, get_data

import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--score_type", help="Input type of score would like to preprocess/update ['business' or 'transporation']",type=str)
parser.add_argument("--update_pbf", help="Update map information file (Protobuf)",type=bool)
args = parser.parse_args()
score_type = args.score_type
if args.score_type not in ['business', 'transporation']:
    raise Exception("Undefine score type")

LOAD_FILE = './data/{}_filter_dict.pickle'.format(score_type)
SAVE_FILE = './data/{}-latest.csv'.format(score_type)

if os.path.isfile('./data/Bangkok.osm.pbf'):
    if args.update_pbf:
        fp = get_data("bangkok", directory="/data", update=True)
    else:
        fp = './data/Bangkok.osm.pbf'
else:
    fp = get_data("bangkok", directory="/data", update=True)
osm = OSM(fp)


with open(LOAD_FILE, 'rb') as f:
    my_filter, combine_dict = pickle.load(f)
    print('Finish load infos from: {}'.format(LOAD_FILE))

filter_key = list(my_filter.keys())
pois = osm.get_pois(custom_filter=my_filter)
merged_filter_col = pois[filter_key[0]].fillna('')
merged_filter_col = pois[filter_key[0]].fillna('')
[merged_filter_col := merged_filter_col+pois[filter_key[i]].fillna('') for i in range(1, len(filter_key))]
pois['poi_type'] = merged_filter_col
pois.to_csv(SAVE_FILE)
print('Finish save preprocess data to: {}'.format(SAVE_FILE))

Overwriting preprocess_map_data.py


# Main

In [41]:
import json
import geopandas
import numpy as np
import pandas as pd
from scipy.stats import rankdata

from shapely import wkt
from shapely.geometry import Polygon
from AreaMap import AreaMap, boundingBox


In [28]:
def replace_or_combine_tag(df, keyword, replace_word):
  filter = np.where(df['poi_type'].str.contains(keyword, case=False, na=False), True, False)
  df.loc[filter, 'poi_type']=replace_word

def compute_score(categoried_df):
    # Concat each point's dataframe to main dataframe
    df = pd.concat(categoried_df, axis=1).fillna(0)
    # prepare list for keep sum of rank score for each point
    # if we have 4 interested point max rank score = 4, min rank score = 1
    sum_rank = [0 for i in range(len(categoried_df))]

    
    description = {'Point {}'.format(i):[] for i in range(len(categoried_df))}
    desc_key = list(description.keys())

    for index, row in df.iterrows():
        row = [row[key] for key in desc_key]
        rank = rankdata(row, method='max')
        for i in range(len(desc_key)):
            description[desc_key[i]].append('This location have {} score rank: {}'.format(index, (4-rank[i])+1))
        sum_rank+=rank
    
    # Compute score (Max score=100)
    score = (sum_rank/len(df.index))*(100/len(categoried_df))

    # last row is a overall score
    df.loc[len(df)] = score
    # get the all index and set last index to "Overall score".
    as_list = df.index.tolist()
    idx = as_list.index(len(df)-1)
    as_list[idx] = 'Overall score'
    df.index = as_list
    return df, pd.DataFrame(description, index=as_list[:-1])


def return_format(clean_result, score_df, rank_df):
    score_json = json.loads(score_df.to_json())
    rank_json = json.loads(rank_df.to_json())

    ret = {}
    for i in range(len(clean_result)):
        key = 'Point {}'.format(i)
        
        ret[key] = {
            'score' : score_json[key],
            'rank': rank_json[key],
            'properties': [item['properties'] for item in json.loads(clean_result[i].to_json())['features']] #Extract only real data, ignore auto-generate from pandas
        }
    return ret


In [33]:
# Transportation score = True, Business Saturation score = False
score = True

interested_points = [
    {
      'lat': 13.757929379398433,
      'long': 100.5655348237836
    },
    {
      'lat': 13.7649760109863,
      'long': 100.53827980930785
    },
    {
      'lat': 13.72193753690904,
      'long': 100.53024243361882
    },
    {
      'lat':13.711183848783898, 
      'long':100.48792930114243
    }
]
# if score:
#     my_filter={
#       "amenity":["bicycle_parking", "bus_station", "ferry_terminal", "fuel", "charging_station", "motorcycle_parking", "parking", "parking_space", "taxi"], 
#       "highway":["platform", "primary", "secondary"],
#       "railway":["construction", 'station']
#     }

#     combine_dict = {
#       'bus': 'Bus stop',
#       'parking': 'Parking space',
#       'primary': 'Primary road',
#       'secondary': 'Secondary road',
#       'station': 'BTS/MRT/SRT',
#       'construction': 'BTS/MRT/SRT (Under construction)',
#       'fuel': 'Gas station'
#     }
# else:
#     business_filter = {
#         "education": [
#             "college", "driving_school", "kindergarten", 
#             "Language_school", "library", "toy_library", 
#             "training", "music_school", "school", 
#             "traffic_park", "university"
#         ],
#         "finance": [
#             "atm", "bank", "bureau_de_change"
#         ],
#         "healthcare": [
#             "baby_hatch", "clinic", "dentist",
#             "hosipital", "nursing_home", "pahrmacy",
#             "social_facility", "veterinary"
#         ],
#         "entertainment-art-cultural": [
#             "arts_centre", "casino", "cinema", "monastery"
#             "community_centre", "conference_centre", "events_venue", 
#             "exhibition_centre", "fountain", "gambling", "music_venue"
#             "nightclub", "planetarium", "public_bookcase",
#             "social_centre", "studio", "theatre", "internet_cafe"
#         ],
#         "facilities": [
#             "marketplace", "mailroom", "townhall", 
#             "post_office", "post_depot", "post_box", "police"]
#     }

#     my_filter = {
#         "amenity": [val for k, v in business_filter.items() for val in v]
#     }

#     combine_dict = {val:k for k, v in business_filter.items() for val in v }

100%|██████████| 4/4 [00:00<00:00, 2084.39it/s]


In [42]:
score_type = 'business'
with open('{}_filter_dict.pickle'.format(score_type), 'rb') as f:
    my_filter, combine_dict = pickle.load(f)
    
bangkok_df = pd.read_csv('{}-latest.csv'.format(score_type))

bangkok_df['geometry'] = bangkok_df['geometry'].apply(wkt.loads)
bangkok_gdf = geopandas.GeoDataFrame(bangkok_df, crs='epsg:4326')

In [54]:
result = []
for point in interested_points:
    bottom, left, top, right = list(boundingBox(point['lat'], point['long'], 1))
    AreaPolygon = Polygon(((left, top), (right, top), (right, bottom), (left, bottom)))
    tmp = bangkok_gdf[bangkok_gdf['geometry'].covered_by(AreaPolygon)].reset_index(drop=True)
    tmp = tmp.loc[:, ~tmp.columns.str.contains('^Unnamed')]
    result.append(tmp)

In [56]:
for i in range(len(result)):
  for key, val in combine_dict.items():
    replace_or_combine_tag(result[i], key, val)

clean_result = [
    result[i]
      .dropna(how='all', subset=['name', 'tags', 'operator'])
      .drop_duplicates(subset=['name', 'poi_type'])
    for i in range(len(result))
]

categoried_result = [
    clean_result[i]
      .groupby('poi_type', group_keys=True)
      .apply(lambda x: int(len(x)))
      .to_frame(name="Point {}".format(i)) 
    for i in range(len(clean_result))
]

score_df, rank_df = compute_score(categoried_result)
ret = return_format(clean_result, score_df, rank_df)