In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import shapely
import pathlib
import swifter
import matplotlib.pyplot as plt

from shapely import wkt
from shapely.geometry import Point, Polygon, LineString, MultiPoint

import geojson
import h3

DIR_ROOT = os.path.join(pathlib.Path().absolute(), '../../..' )

In [4]:
# list of all metrices used in the analysis
metrics = [
    #'act_day', 
    #'act_stay',
    'act_sum',
    'day_sum',
    'stay_avg',
    'stay_sum',
    
    'travel',
 
    'centr_closes',
    'centr_between',
    'centr_eigen',
    'centr_infos',
    
    'infl_dist',
    'infl_reg',
    'infl_sum',
]

In [5]:
# merge all metrices with the list of destinations

df = pd.read_csv(DIR_ROOT + '/data/02_processed/destinations.csv', index_col='name')

for metric in metrics:
    df_metric = pd.read_csv(DIR_ROOT + '/data/05_metrics/' + metric + '.csv', index_col=0)
    df_metric.rename(columns={
       'p1': metric + '_p1', 
       'p2': metric + '_p2',
       'p3': metric + '_p3',
       'p4': metric + '_p4',
       'p5': metric + '_p5',
       'pall': metric + '',
    }, inplace=True)
    df_metric.drop(columns=['geometry'], inplace=True) 
    df_metric = (df_metric - df_metric.min()) / (df_metric.max() - df_metric.min())

    # merge to the table with all destinations
    df = pd.concat([df, df_metric], axis=1, join="inner")

In [6]:
df['median'] = df.apply(
    lambda x: x[metrics].median(),
    axis=1
)
df.sort_values('median', ascending=False)

Unnamed: 0.1,Unnamed: 0,id,no_alle,no_activity,no_reise,no_bi,no_gg,x,y,prazision,...,infl_reg_p4,infl_reg_p5,infl_reg,infl_sum_p1,infl_sum_p2,infl_sum_p3,infl_sum_p4,infl_sum_p5,infl_sum,median
Praha,417,421.0,397.0,170.0,174.0,44.0,9.0,50.09,14.41,1.0,...,1.000000,1.000000,1.000000,,1.000,1.000000,1.000000,1.000000,1.000000,1.000000
Wien,628,632.0,266.0,115.0,123.0,20.0,8.0,48.22,16.39,1.0,...,0.655172,0.000000,0.714286,1.0,1.000,0.418919,0.864865,,0.735577,0.688986
Brno,43,44.0,104.0,42.0,48.0,10.0,4.0,49.19,16.61,1.0,...,0.551724,0.454545,0.595238,,0.325,0.135135,0.418919,0.411765,0.307692,0.377510
Písek,393,396.0,28.0,13.0,13.0,1.0,1.0,49.31,14.15,1.0,...,0.206897,0.000000,0.404762,,0.100,0.162162,0.094595,,0.120192,0.281695
Krems an der Donau,228,229.0,72.0,30.0,30.0,5.0,7.0,48.41,15.62,1.0,...,0.068966,0.000000,0.166667,1.0,0.425,,0.027027,,0.173077,0.253944
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Marchegg,279,280.0,7.0,2.0,2.0,2.0,1.0,48.26,16.89,1.0,...,0.000000,0.000000,0.000000,,,,,,,0.001032
Mariazell,281,282.0,5.0,2.0,2.0,0.0,1.0,47.77,15.32,1.0,...,0.000000,0.000000,0.000000,,,,,,,0.000392
Seitenstetten,494,498.0,5.0,1.0,1.0,3.0,0.0,48.04,14.65,1.0,...,0.000000,0.000000,0.023810,,0.000,,,,0.000000,0.000000
Friesach,112,113.0,11.0,4.0,4.0,2.0,1.0,46.98,14.33,1.0,...,0.034483,0.000000,0.023810,,,,0.000000,,0.000000,0.000000


In [7]:
# get order for each metric

df_matrices_order = pd.DataFrame(index=df.index)

for metric in metrics:
    df_copy = df.copy()
    df_copy.sort_values(metric, inplace=True, ascending=False)
    df_copy.reset_index(inplace=True)
    df_copy['rank'] = df_copy.index.values
    df_copy.set_index('index', inplace=True)
    df_matrices_order[metric] = df_copy[['rank']] + 1

df_matrices_order.loc['Praha']

act_sum           1
day_sum           1
stay_avg          4
stay_sum          1
travel            1
centr_closes      5
centr_between     1
centr_eigen       1
centr_infos       1
infl_dist        22
infl_reg          1
infl_sum          1
Name: Praha, dtype: int64

In [38]:
import altair as alt

max_rank = 10

# get list of important places
imp_places = []
for metric in metrics:
    imp_places_m = df_matrices_order[df_matrices_order[metric] <= max_rank].index
    imp_places.extend(imp_places_m)

imp_places = np.unique(np.array(imp_places))


imp_places = df_matrices_order.loc[imp_places]
imp_places['name'] = imp_places.index

alt.renderers.set_embed_options(
    padding={"left": 5, "right": 10, "bottom": 5, "top": 10}
)

rank_chart = alt.Chart(imp_places).transform_window(
    index='count()'
).transform_fold(
    metrics
).properties(
    width=1000,
    height=600
).encode(
    color=alt.Color('name:N', legend=None),
    x=alt.X(
        'key:N',
        axis=alt.Axis(title='metrics'),
        sort=metrics
    ),
    y=alt.Y(
        'value:Q', 
        scale=alt.Scale(zero=False, domain=[max_rank + 0.5,0.5]),
        axis=alt.Axis(values=[i for i in range(1, max_rank)], title='rank', format='.0f'),
    ),
)

lines = rank_chart.mark_line(
    clip=True
)

circles = rank_chart.mark_circle(
    clip=True,
    size=100
)

labels = rank_chart.mark_text(
    clip=True,
    align='center',
    baseline='middle',
    fill='black',
    dy=-10 
).encode(
    text='name:N'
)

lines + circles + labels