In [7]:
import pandas as pd
import geopandas as gpd
import numpy as np
import shapely
import pathlib
import swifter
import matplotlib.pyplot as plt

from shapely import wkt
from shapely.geometry import Point, Polygon, LineString, MultiPoint

import geojson
import h3

DIR_ROOT = os.path.join(pathlib.Path().absolute(), '../../..' )

In [8]:
# list of all metrices used in the analysis
metrics = [
    #'act_day', 
    #'act_stay',
    'act_sum',
    'centr_closes',
    'centr_eigen_w',
    'centr_eigen',
    'centr_infos',
    'day_sum',
    'infl_dist',
    'infl_reg',
    'infl_sum',
    'stay_avg',
    'stay_sum',
    'travel'
]

In [9]:
# merge all metrices with the list of destinations

df = pd.read_csv(DIR_ROOT + '/data/02_processed/destinations.csv', index_col='name')

for metric in metrics:
    df_metric = pd.read_csv(DIR_ROOT + '/data/05_metrics/' + metric + '.csv', index_col=0)
    df_metric.rename(columns={
       'p1': metric + '_p1', 
       'p2': metric + '_p2',
       'p3': metric + '_p3',
       'p4': metric + '_p4',
       'p5': metric + '_p5',
       'pall': metric + '',
    }, inplace=True)
    df_metric.drop(columns=['geometry'], inplace=True) 
    df_metric = (df_metric - df_metric.min()) / (df_metric.max() - df_metric.min())

    # merge to the table with all destinations
    df = pd.concat([df, df_metric], axis=1, join="inner")

In [10]:
df['median'] = df.apply(
    lambda x: x[metrics].median(),
    axis=1
)
df.sort_values('median', ascending=False)

Unnamed: 0.1,Unnamed: 0,id,no_alle,no_activity,no_reise,no_bi,no_gg,x,y,prazision,...,stay_sum_p4,stay_sum_p5,stay_sum,travel_p1,travel_p2,travel_p3,travel_p4,travel_p5,travel,median
Praha,417,421.0,397.0,170.0,174.0,44.0,9.0,50.09,14.41,1.0,...,1.000000,1.000000,1.000000,,1.000000,1.000000,1.000000,0.819536,1.000000,1.000000e+00
Wien,628,632.0,266.0,115.0,123.0,20.0,8.0,48.22,16.39,1.0,...,0.470588,,0.568966,0.432624,0.857116,0.534321,0.572020,0.000000,0.663685,6.563460e-01
Brno,43,44.0,104.0,42.0,48.0,10.0,4.0,49.19,16.61,1.0,...,0.529412,0.666667,0.344828,0.000000,0.293199,0.339593,0.960822,1.000000,0.654039,3.775099e-01
Písek,393,396.0,28.0,13.0,13.0,1.0,1.0,49.31,14.15,1.0,...,0.000000,,0.137931,0.000000,0.234425,0.471919,0.105475,0.000000,0.285314,2.816955e-01
Znojmo,663,667.0,59.0,27.0,27.0,3.0,2.0,48.86,16.05,1.0,...,0.117647,,0.137931,0.000000,0.073710,0.269838,0.175651,0.000000,0.155545,2.764230e-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Kravaře u České Lípy,227,228.0,3.0,1.0,1.0,1.0,0.0,50.63,14.39,1.0,...,,,0.000000,,0.000000,0.011810,0.000000,0.000000,0.004992,1.789343e-03
Most,311,313.0,6.0,1.0,1.0,2.0,2.0,50.50,13.64,1.0,...,,,0.000000,,0.000000,0.010906,0.000000,0.000000,0.005027,1.093390e-03
Villach,597,601.0,2.0,1.0,1.0,0.0,0.0,46.61,13.86,1.0,...,0.000000,,0.000000,,,,0.212455,,0.200914,5.434500e-08
Friesach,112,113.0,11.0,4.0,4.0,2.0,1.0,46.98,14.33,1.0,...,0.000000,,0.000000,,,0.000000,0.136599,,0.085920,0.000000e+00


In [11]:
# get order for each metric

df_matrices_order = pd.DataFrame(index=df.index)

for metric in metrics:
    df_copy = df.copy()
    df_copy.sort_values(metric, inplace=True, ascending=False)
    df_copy.reset_index(inplace=True)
    df_copy['rank'] = df_copy.index.values
    df_copy.set_index('index', inplace=True)
    df_matrices_order[metric] = df_copy[['rank']] + 1

df_matrices_order.loc['Praha']

act_day          11
act_stay          6
act_sum           1
centr_closes      5
centr_eigen_w     1
centr_eigen       1
centr_infos       1
day_sum           1
infl_dist        22
infl_reg          1
infl_sum          1
stay_avg          3
stay_sum          1
travel            1
Name: Praha, dtype: int64

In [12]:
import altair as alt

max_rank = 5

# get list of important places
imp_places = []
for metric in metrics:
    imp_places_m = df_matrices_order[df_matrices_order[metric] <= max_rank].index
    imp_places.extend(imp_places_m)

imp_places = np.unique(np.array(imp_places))


imp_places = df_matrices_order.loc[imp_places]
imp_places['name'] = imp_places.index

alt.renderers.set_embed_options(
    padding={"left": 5, "right": 10, "bottom": 5, "top": 10}
)

rank_chart = alt.Chart(imp_places).transform_window(
    index='count()'
).transform_fold(
    metrics
).properties(
    width=500,
).encode(
    color='name:N',
    x=alt.X(
        'key:N',
        axis=alt.Axis(title='metrics')
    ),
    y=alt.Y(
        'value:Q', 
        scale=alt.Scale(zero=False, domain=[max_rank + 0.5,0.5]),
        axis=alt.Axis(values=[1,2,3,4,5], title='rank', tickMinStep=1, format='.0f'),
    ),
)

lines = rank_chart.mark_line(
    clip=True
)

circles = rank_chart.mark_circle(
    clip=True,
    size=100
)

lines + circles