In [None]:
import os
import psutil
import geopandas as gpd
from matrixconverters.read_ptv import ReadPTVMatrix
import xarray as xr
from collections import namedtuple, defaultdict
import pandas as pd

In [None]:
os.chdir("..")

In [None]:
from params.project_params import ENCODING_CP1252, CRS_EPSG_ID_WGS84
from scripts.constants import GEOMETRY
from scripts.functions import get_gdf_mobility_stations, get_gdf_mobility_stations_with_npvm_zone, execute_road_routing, RoadRoutingError

# Read data from files

In [None]:
path_to_npvm_zones_shp = os.path.join('data', 'Verkehrszonen_Schweiz_NPVM_2017_shp.zip')

In [None]:
path_mobility_stations = os.path.join('data', 'mobility-stationen-und-fahrzeuge-schweiz.csv')

In [None]:
FileNameOldNew = namedtuple("FileNameOldNew", ["file_name_old", "file_name_new"])

In [None]:
skim_path_per_type = {
    'PT_JT': FileNameOldNew('DWV_2017_ÖV_Reisezeit_CH_binär.mtx', 'pt_jt.nc'),
    'PT_NT': FileNameOldNew('144_NTR_(OEV).mtx', 'pt_nt.nc'),
    'PT_Dist': FileNameOldNew('DWV_2017_ÖV_Distanz_CH_binär.mtx', 'pt_dist.nc'),
    'Road_JT': FileNameOldNew('DWV_2017_Strasse_Reisezeit_CH_binär.mtx', 'road_jt.nc'),
    'Road_Dist': FileNameOldNew('DWV_2017_Strasse_Distanz_CH_binär.mtx', 'road_dist.nc')
}

In [None]:
process = psutil.Process()

In [None]:
def print_memory_usage():
    print(process.memory_info().rss / 1024 ** 2)

In [None]:
print_memory_usage()

In [None]:
gdf_npvm_zones = gpd.read_file(path_to_npvm_zones_shp, encoding=ENCODING_CP1252).to_crs(CRS_EPSG_ID_WGS84)
print_memory_usage()

In [None]:
gdf_mobility_stations = get_gdf_mobility_stations(path_mobility_stations)
print_memory_usage()

In [None]:
gdf_mobility_stations

In [None]:
gdf_mobility_stations_with_npvm_zone = get_gdf_mobility_stations_with_npvm_zone(gdf_mobility_stations, gdf_npvm_zones)
print_memory_usage()

In [None]:
gdf_mobility_stations_with_npvm_zone

In [None]:
npvm_zones_with_mobility_station = list(set(gdf_mobility_stations_with_npvm_zone['ID'].values))

# Filter skims to mobility station and write files to disk

In [None]:
def read_skim_filter_and_write(ty, path_old, path_new):
    print_memory_usage()
    print(path_old, path_new)
    skim = ReadPTVMatrix(os.path.join('data', path_old))
    if ty.startswith('PT'):
        skim_filtered = skim.sel(destinations=npvm_zones_with_mobility_station).matrix.rename({"destinations": "zone_mobility_station"})
    elif ty.startswith('Road'):
        skim_filtered = skim.sel(origins=npvm_zones_with_mobility_station).matrix.rename({"origins": "zone_mobility_station"})
    skim_filtered.to_netcdf(os.path.join('data', path_new))
    print_memory_usage()

In [None]:
skim_per_type = {}

In [None]:
for ty, (name_old, name_new) in skim_path_per_type.items():
    read_skim_filter_and_write(ty, name_old, name_new)

# Read filtered files from disk

In [None]:
%%time
print_memory_usage()
skim_filtered_per_type = {}
skims = []
for ty, (_, name_new) in skim_path_per_type.items():
    skim_filtered_per_type[ty] = xr.open_dataset(os.path.join('data', name_new))
print_memory_usage()

# Run query data

## Origin and destination NPVM-zone

In [None]:
from_zone_id = 223401003
to_zone_id = 352101001

In [None]:
orig_easting, orig_northing, dest_easting, dest_northing = 7.4234812, 46.9366421, 7.4388954, 46.7303113

# Filter skims to origin and destination

In [None]:
def calc_costs_df(df_data, vtts, pt_min_per_transfer, pt_chf_per_km, road_chf_per_km):
    return vtts / 60 * (20 * df_data['penalty_not_foot'] + df_data['pt_jt'] + pt_min_per_transfer * df_data['pt_nt'] + df_data['road_jt']) + pt_chf_per_km * df_data['pt_dist'] + road_chf_per_km * df_data['road_dist']

In [None]:
def get_relevant_mob_stations(best_zones_costs_per_vtts):
    relevant_mob_stations = []
    for df_ in best_zones_costs_per_vtts.values():
        relevant_mob_stations += df_['zone_mobility_station'].to_list()
    relevant_mob_stations = set(relevant_mob_stations)
    return relevant_mob_stations

In [None]:
from math import sin, cos, sqrt, atan2, radians

# Approximate radius of earth in km
R = 6373.0

def calc_distance(easting_1, northing_1, easting_2, northing_2):
    lat1 = radians(northing_1)
    lon1 = radians(easting_1)
    lat2 = radians(northing_2)
    lon2 = radians(easting_2)
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    distance = R * c
    return distance

In [None]:
def calc_dist_origin(x):
    return calc_distance(orig_easting, orig_northing, float(x["easting"]), float(x["northing"]))

In [None]:
calc_distance(orig_easting, orig_northing, dest_easting, dest_northing)

In [None]:
def calc_foot_penalty(x):
    dist = calc_distance(orig_easting, orig_northing, float(x["easting"]), float(x["northing"]))
    return min(dist / 20, 1.0)

In [None]:
def calc_penalty_not_foot(x):
    return min(x["dist_from_origin"] / 20, 1.0)

In [None]:
gdf_mobility_stations_with_npvm_zone

In [None]:
def execute(from_zone_id, to_zone_id):
    pt_jt = skim_filtered_per_type['PT_JT'].sel(origins=from_zone_id, drop=True).matrix
    pt_nt = skim_filtered_per_type['PT_NT'].sel(origins=from_zone_id, drop=True).matrix
    pt_dist = skim_filtered_per_type['PT_Dist'].sel(origins=from_zone_id, drop=True).matrix

    road_jt = skim_filtered_per_type['Road_JT'].sel(destinations=to_zone_id, drop=True).matrix
    road_dist = skim_filtered_per_type['Road_Dist'].sel(destinations=to_zone_id, drop=True).matrix
    
    df_pt_jt = pt_jt.to_dataframe().reset_index().rename(columns={'matrix': 'pt_jt'})
    df_pt_nt = pt_nt.to_dataframe().reset_index().rename(columns={'matrix': 'pt_nt'})
    df_pt_dist = pt_dist.to_dataframe().reset_index().rename(columns={'matrix': 'pt_dist'})

    df_road_jt = road_jt.to_dataframe().reset_index().rename(columns={'matrix': 'road_jt'})
    df_road_dist = road_dist.to_dataframe().reset_index().rename(columns={'matrix': 'road_dist'})
    
    df_data = df_pt_jt
    for df in [df_pt_nt, df_pt_dist, df_road_jt, df_road_dist]:
        df_data = df_data.merge(df)
    fields = ['zone_mobility_station', 'pt_jt', 'pt_nt', 'pt_dist', 'road_jt',
       'road_dist', 'Stationsnummer', 'ID', 'Name', 'geometry', 'easting',
       'northing']
    df_data = df_data.merge(gdf_mobility_stations_with_npvm_zone, left_on='zone_mobility_station', right_on="ID")[fields]
    print(df_data.columns)
    df_data["dist_from_origin"] = df_data.apply(calc_dist_origin, axis=1)
    df_data["penalty_not_foot"] = df_data.apply(calc_penalty_not_foot, axis=1)
    
    factor = 1.1
    
    while True:
        best_zones_costs_per_vtts = {}
        for vtts in range(0, 205, 5):
            df_data['costs'] = calc_costs_df(df_data, vtts, 20.0, 0.16, 0.75)
            min_costs = df_data['costs'].min()
            best_zones_costs_per_vtts[vtts] = df_data[['zone_mobility_station', 'costs']][df_data['costs'] <= factor * min_costs]
            best_zones_costs_per_vtts[vtts] = df_data[df_data['costs'] <= factor * min_costs]

        relevant_mob_stations = get_relevant_mob_stations(best_zones_costs_per_vtts)
        
        df_data_relevant_zones = df_data[df_data.zone_mobility_station.isin(relevant_mob_stations)]
        df_data_relevant_mob_stations = df_data_relevant_zones.merge(gdf_mobility_stations_with_npvm_zone, left_on='zone_mobility_station', right_on="ID")
        if len(df_data_relevant_mob_stations) <= 200:
            print(factor)
            print(len(df_data_relevant_mob_stations))
            break
        else:
            print(len(df_data_relevant_mob_stations))
            factor -= 0.01
    list_potential_mobility_stations = list(df_data[["Stationsnummer", "easting", "northing"]].to_dict("records"))
    try:
        # id is MOBILITY_STATIONSNUMMER if osrm_routing
        road_dist_from_pot_mob_stat_to_dest_per_id, road_durations_from_pot_mob_stat_to_dest_per_id = execute_road_routing(list_potential_mobility_stations, gdf_dest_with_npvm_zone_id)
    except Exception:
        raise RoadRoutingError(
            "could not get road distances and durations from potential mobility stations to destination ")
    return df_data_relevant_mob_stations, best_zones_costs_per_vtts
    
#     data_per_zone = df_data[df_data.zone_mobility_station.isin(relevant_mob_stations)].to_dict('records')
#     data_per_zone = {x['zone_mobility_station']: x for x in data_per_zone}
    
#     mob_stations_per_npvm_zone = defaultdict(list)
#     infos_per_mob_station = {}
#     for e in gdf_mobility_stations_with_npvm_zone[gdf_mobility_stations_with_npvm_zone.ID.isin(relevant_mob_stations)].to_dict('records'):
#         mob_st_nr = e['Stationsnummer']
#         mob_st_name = e['Name']
#         zone_id = e['ID']
#         easting = e['easting']
#         northing = e['northing']
#         mob_stations_per_npvm_zone[zone_id] += [mob_st_nr]
#         if mob_st_nr in infos_per_mob_station:
#             raise ValueError('something wrong')
#         infos_per_mob_station[mob_st_nr] = {
#             'station_nr': mob_st_nr,
#             'station_name': mob_st_name,
#             'zone_id': zone_id,
#             'station_easting': easting,
#             'station_northing': northing
#         }
    # return {
    #     'best_zones_costs_per_vtts': {k: df.to_dict('records') for k, df in best_zones_costs_per_vtts.items()}, 
    #     'data_per_zone': data_per_zone,
    #     'mobility_stations_per_zone': dict(mob_stations_per_npvm_zone),
    #     'infos_per_mobility_station': infos_per_mob_station
    # }

In [None]:
%%time
df_data_relevant_mob_stations, best_zones_costs_per_vtts = execute(from_zone_id, to_zone_id)

In [None]:
best_zones_costs_per_vtts[200]

In [None]:
res.sort_values(by="penalty_not_foot")