In [1]:
import requests
import time
import urllib
from neo4j import GraphDatabase
import gzip
import pandas as pd
from datetime import date, datetime
from collections import defaultdict
from pathlib import Path
import csv
from neo4j import Driver
import datapane as dp
import numpy as np
import folium
import json

In [2]:
c = json.load(open(file='config.json'))

URI = c["uri"]
USER = c["user"]
PASS = c["password"]
DATABASE = c["database"]
DATA_FOLDER = c["data_folder"]

RINF_TOKEN_ENDPOINT = c["rinf_token_endpoint"]
RINF_OP_ENDPOINT = c["rinf_op_endpoint"]

In [3]:
from math import atan2, cos, radians, sin, sqrt
from neo4j import Transaction
from pandas import DataFrame


class OP_TNS:
    CreatedAt: datetime
    LatitudeEpsg4326: float
    LongitudeEpsg4326: float
    Name: str
    OpType: str
    UniqueOPID: str
    OpTafTapCode: str
    ValidToUtc: datetime
    ValidFromUtc: datetime

def create_rinf_http_session(RINF_TOKEN_ENDPOINT):
    session = requests.session()
    response = session.post(RINF_TOKEN_ENDPOINT, data="grant_type=password&username=dev.tnöw@railcargo.com&password=Dev.tnw1492")
    response.raise_for_status()
    token = response.json().get('access_token')
    print('Auth Token received from RINF API')
    session.headers.update(Authorization = f"Bearer {token}")
    return session

def load_ops_from_rinf(session):
    opResponse = session.get('https://rinf.era.europa.eu/api/OperationalPoints?$expand=TafTAPCodes')
    opResponse.raise_for_status()
    opIdsInRinf = []
    for op in opResponse.json()["value"]:
        op['OPTafTapCode'] = op['TafTAPCodes'][0]["Value"] if len(op['TafTAPCodes']) > 0 else None
        opIdsInRinf.append(op)
    # opIdsInRinf = list(set(opIdsInRinf)) # Removed duplicates (RINF API contains multiple states per OPID)
    print(f"Found {len(opIdsInRinf)} OPs in RINF API")
    return opIdsInRinf

def df_to_csv(df: DataFrame, filename):
    path = DATA_FOLDER + "/" + filename

    csv = df.to_csv(encoding="utf-8-sig", index=False, sep=';')

    with open(path, "bw") as f:
        b = bytes(csv, encoding="utf-8-sig")
        f.write(b)

def df_to_csv_gz(df, filename):
    path = DATA_FOLDER + "/" + filename

    csv = df.to_csv(encoding="utf-8", index=False)

    with open(path + ".gz", "bw") as f:
        b = gzip.compress(bytes(csv, encoding="utf-8"), compresslevel=9)
        f.write(b)
        
def load_objects_from_gzip(path):
    df = pd.read_csv(path, compression='gzip')
    return df

def ops_from_neo4j_query(tx: Transaction) -> list:
    query = ''' MATCH (a:Op)-[hd:HAS_DETAILS]-(opd:OpDetails) 
                WHERE hd.VersionValidFrom <= datetime() 
                AND (hd.VersionValidTo >= datetime() OR hd.VersionValidTo IS NULL)
                RETURN properties(opd)'''
    result = tx.run(query)

    ops = []
    for record in result:
        dict = record.values()[0]
        ops.append(dict)

    return ops

def load_ops_from_neo4j(DATABASE, driver: Driver):
    with driver.session(database=DATABASE) as session:
        ops = session.write_transaction(ops_from_neo4j_query)
        print(f"Found {len(ops)} OPs in DB")
    return ops

def distance_in_km_between_two_coordinates(lat1, lon1, lat2, lon2):
    R = 6373.0

    lat1 = radians(lat1)
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(float(sqrt(a)), float(sqrt(1 - a)))

    distance = R * c
    
    return distance

In [4]:
# Load data from RINF

driver = GraphDatabase.driver(URI, auth=(USER, PASS))

session = create_rinf_http_session(RINF_TOKEN_ENDPOINT)

opIdsInRinf = load_ops_from_rinf(session)

df = pd.DataFrame(opIdsInRinf)
df.columns

print(df)

df_to_csv_gz(df, f'RINF_OPs.csv')



Auth Token received from RINF API
Found 60731 OPs in RINF API
            ID  VersionID                                Name  \
0            2        245                          Pampilhosa   
1            2        256                              PIREAS   
2            2        271      Rakenduspunkt TallinnVäikepiir   
3            2        274                     Valstybės siena   
4            2        379                             Banovci   
...        ...        ...                                 ...   
60726  5661845        616       Zone portuaire de Lauterbourg   
60727  5661917        616                    GPMB - Le Verdon   
60728  5662149        616                 GPMB - Bassens Aval   
60729  5662245        616                GPMB - Bassens Amont   
60730  5662318        616  Grand Port Maritime de La Rochelle   

                        Type    Country          ValidityDateStart  \
0                    station   Portugal  2018-01-01T00:00:00+01:00   
1         passeng

In [5]:
# Load data from neo4j

path = Path.cwd() / DATA_FOLDER
path.mkdir(parents=True, exist_ok=True)

with GraphDatabase.driver(URI, auth=(USER, PASS)) as driver:
    ops = load_ops_from_neo4j(DATABASE, driver)
    op_tns = pd.DataFrame(ops)

Found 51757 OPs in DB


In [6]:
# Load data from csv.gz

df = load_objects_from_gzip(f'{DATA_FOLDER}/RINF_OPs.csv.gz')

In [7]:
op_tns

Unnamed: 0,LatitudeEpsg4326,ValidToUtc,OpTafTapCode,CreatedAt,DetailsId,OpType,CountryCode,UniqueOPID,ValidFromUtc,LongitudeEpsg4326,Name
0,48.196366,2099-12-30T23:00:00.000000000+00:00,AT01001,2022-02-22T08:26:26.742000000+00:00,4b431409-7502-471f-b426-547a0f877385,Station,AT,ATWs,2021-12-14T23:00:00.000000000+00:00,16.337332,Wien Westbf (in Ws)
1,48.193634,2099-12-30T23:00:00.000000000+00:00,AT01002,2022-02-22T08:21:41.810000000+00:00,33a69bb0-56e4-4265-a297-891b650c33d4,Junction,AT,ATOw,2021-12-14T23:00:00.000000000+00:00,16.324385,Wien Westbf-Fbf (in Ws)
2,48.191442,2099-12-30T23:00:00.000000000+00:00,AT01026,2022-02-22T08:28:26.906000000+00:00,2020c28e-aa62-4b02-a74f-a58051c8c12e,PassengerStop,AT,ATNl H1,2021-12-14T23:00:00.000000000+00:00,15.847421,Ollersbach
3,48.190243,2099-12-30T23:00:00.000000000+00:00,AT01027,2022-02-22T08:26:13.162000000+00:00,bf924b87-3ca6-4456-afc9-871f230bd58b,Station,AT,ATKrt,2021-12-14T23:00:00.000000000+00:00,15.821011,Kirchstetten
4,48.210685,2099-12-30T23:00:00.000000000+00:00,AT90280,2022-02-22T08:24:38.211000000+00:00,92c0c7c8-3c27-464f-950d-eabbaa349e8f,Junction,AT,ATHdd,2021-12-14T23:00:00.000000000+00:00,16.220116,Abzw Knoten Hadersdorf
...,...,...,...,...,...,...,...,...,...,...,...
51752,48.161892,2050-12-30T23:00:00.000000000+00:00,,2022-02-22T13:10:05.748000000+00:00,bd62b1fa-2635-4e63-b288-0d96f1044db4,PrivateSiding,SK,SK300053,2020-12-31T23:00:00.000000000+00:00,17.193758,"Letisko M.R.Štefánika - Airport Bratislava, a...."
51753,48.153629,2050-12-30T23:00:00.000000000+00:00,,2022-02-22T13:10:06.577000000+00:00,953f011f-1313-4adc-920c-683eecd16711,PrivateSiding,SK,SK300054,2020-12-31T23:00:00.000000000+00:00,17.924012,"Lesy Slovenskej republiky, štátny podnik Odšte..."
51754,48.577854,2050-12-30T23:00:00.000000000+00:00,,2022-02-22T13:10:07.281000000+00:00,b388aa94-602b-4f9a-ba6b-b3f14dfcc3b6,PrivateSiding,SK,SK300055,2020-12-31T23:00:00.000000000+00:00,18.870526,"Veolia Utilities Žiar nad Hronom, a.s. (Žiar n..."
51755,48.694384,2050-12-30T23:00:00.000000000+00:00,,2022-02-22T13:10:08.044000000+00:00,3b0a957a-af71-4cd9-a2ac-d95192f81b84,PrivateSiding,SK,SK300056,2020-12-31T23:00:00.000000000+00:00,21.276124,"INVITA, s.r.o. (Košice - Jazerná)"


In [8]:
# Transform/Harmonize RINF dataframe

from math import isnan
from numpy import NaN
from pandas import NaT
from pytz import utc
import dateutil
import neo4j


op_rinf_harmonized = pd.DataFrame(df, columns=['UOPID', 'Latitude', 'Longitude', 'Name', 'OPTafTapCode', 'Country', 'ValidityDateStart', 'ValidityDateEnd'])
op_rinf_harmonized = op_rinf_harmonized.rename(columns={"UOPID": "UniqueOPID", "Latitude": "LatitudeEpsg4326", "Longitude": "LongitudeEpsg4326"}).reset_index(drop=True).sort_values(['Country', 'UniqueOPID'])

op_tns_harmonized = pd.DataFrame(op_tns, columns=["UniqueOPID", "LatitudeEpsg4326", "LongitudeEpsg4326", "Name", "OpTafTapCode", "CountryCode", "ValidFromUtc", "ValidToUtc"])
op_tns_harmonized = op_tns_harmonized.rename(columns={"CountryCode": "Country", "OpTafTapCode": "OPTafTapCode", "ValidFromUtc": "ValidityDateStart", "ValidToUtc": "ValidityDateEnd"}).reset_index(drop=True).sort_values(['Country', 'UniqueOPID'])

# display(op_tns_harmonized)

def parse_date(x):
    if(type(x) == neo4j.time.DateTime):
        return x.to_native()
    if(type(x) == str):
        return dateutil.parser.isoparse(x)
    return ''

op_rinf_harmonized['ValidityDateStart'] = op_rinf_harmonized['ValidityDateStart'].apply(lambda x: parse_date(x))
op_rinf_harmonized['ValidityDateEnd'] = op_rinf_harmonized['ValidityDateEnd'].apply(lambda x: parse_date(x))
op_tns_harmonized['ValidityDateStart'] = op_tns_harmonized['ValidityDateStart'].apply(lambda x: parse_date(x))
op_tns_harmonized['ValidityDateEnd'] = op_tns_harmonized['ValidityDateEnd'].apply(lambda x: parse_date(x))

op_rinf_harmonized = op_rinf_harmonized.replace(NaN, '', regex=True)#.replace(NaT, '', regex=True)
op_tns_harmonized = op_tns_harmonized.replace(NaN, '', regex=True)#.replace(NaT, '', regex=True)

# Use empty strings instead of NaN and NaT for easier comparison operations afterwards
# op_rinf_harmonized = op_rinf_harmonized.replace(np.nan, '', regex=True)#.replace(NaT, '', regex=True)
# op_tns_harmonized = op_tns_harmonized.replace(np.nan, '', regex=True)#.replace(NaT, '', regex=True)

# Add IsCurrentlyValid column
op_rinf_harmonized['IsCurrentlyValid'] = op_rinf_harmonized.apply(lambda x: 'No validity' if type(x.ValidityDateStart) == str else 'Valid' if (x.ValidityDateStart.replace(tzinfo=None) <= datetime.now() and (type(x.ValidityDateEnd) is str or (x.ValidityDateEnd is NaT or x.ValidityDateEnd.replace(tzinfo=None) >= datetime.now()))) else 'Invalid', axis=1)
op_tns_harmonized['IsCurrentlyValid'] = op_tns_harmonized.apply(lambda x: 'No validity' if type(x.ValidityDateStart) == str else 'Valid' if (x.ValidityDateStart.replace(tzinfo=None) <= datetime.now() and (type(x.ValidityDateEnd) is str or (x.ValidityDateEnd is NaT or x.ValidityDateEnd.replace(tzinfo=None) >= datetime.now()))) else 'Invalid', axis=1)



In [9]:
op_tns_harmonized

Unnamed: 0,UniqueOPID,LatitudeEpsg4326,LongitudeEpsg4326,Name,OPTafTapCode,Country,ValidityDateStart,ValidityDateEnd,IsCurrentlyValid
463,ATAa,48.164923,16.330233,W.Mat.-Altmannsdorf (in Wbf),AT05102,AT,2021-12-14 23:00:00+00:00,2099-12-30 23:00:00+00:00,Valid
777,ATAb,48.069163,14.755127,Aschbach,AT01054,AT,2021-12-14 23:00:00+00:00,2099-12-30 23:00:00+00:00,Valid
1464,ATAbf,46.753358,12.511086,Abfaltersbach West,AT03877,AT,2021-12-14 23:00:00+00:00,2099-12-30 23:00:00+00:00,Valid
1474,ATAbf A1,46.780965,12.647149,AB (Awanst) - Abf_A1,AT05104,AT,2021-12-14 23:00:00+00:00,2099-12-30 23:00:00+00:00,Valid
1465,ATAbf H1,46.758125,12.529545,Abfaltersbach,AT05999,AT,2021-12-14 23:00:00+00:00,2099-12-30 23:00:00+00:00,Valid
...,...,...,...,...,...,...,...,...,...
51753,SK300054,48.153629,17.924012,"Lesy Slovenskej republiky, štátny podnik Odšte...",,SK,2020-12-31 23:00:00+00:00,2050-12-30 23:00:00+00:00,Valid
51754,SK300055,48.577854,18.870526,"Veolia Utilities Žiar nad Hronom, a.s. (Žiar n...",,SK,2020-12-31 23:00:00+00:00,2050-12-30 23:00:00+00:00,Valid
51755,SK300056,48.694384,21.276124,"INVITA, s.r.o. (Košice - Jazerná)",,SK,2020-12-31 23:00:00+00:00,2050-12-30 23:00:00+00:00,Valid
51756,SK300057,48.630430,21.244360,"U.S. Steel Košice, s.r.o. (Haniska pri Košicia...",SK04388,SK,2020-12-31 23:00:00+00:00,2050-12-30 23:00:00+00:00,Valid


In [10]:
# Filter data for current valid OPs only (maybe invalidated ones still are interesting?)

# op_rinf_harmonized = op_rinf_harmonized[op_rinf_harmonized["IsCurrentlyValid"].isin(['Valid', 'No validity'])]


In [11]:
# display(op_rinf_harmonized[op_rinf_harmonized.groupby(['UniqueOPID', 'IsCurrentlyValid'])['UniqueOPID'].transform('size') > 1].sort_values('UniqueOPID'))

# op_rinf_harmonized[op_rinf_harmonized.sort_values(['UniqueOPID', 'IsCurrentlyValid']).duplicated(subset=['UniqueOPID'], keep='last')].sort_values('UniqueOPID')

# display(op_rinf_harmonized[op_rinf_harmonized.UniqueOPID.eq('EU00001')])

# display(op_rinf_harmonized(op_rinf_harmonized[op_rinf_harmonized.groupby(['UniqueOPID', 'IsCurrentlyValid'])['UniqueOPID'].transform('size') > 1]))

import math


unique_ops_rinf_preffered_valids = op_rinf_harmonized.sort_values(['UniqueOPID', 'IsCurrentlyValid']).drop_duplicates(['UniqueOPID'], keep='last')

# display(unique_ops_rinf_preffered_valids)
# display(op_tns_harmonized)

merged_ops = op_tns_harmonized.merge(unique_ops_rinf_preffered_valids, on='UniqueOPID', how='inner',suffixes=['_TNS', '_RINF'])
# display(merged_ops)

def get_differences(x):
    diff = []
    distance_between_coords = distance_in_km_between_two_coordinates(x.LatitudeEpsg4326_TNS, x.LongitudeEpsg4326_TNS, x.LatitudeEpsg4326_RINF, x.LongitudeEpsg4326_RINF)
    if(distance_between_coords > 5):
        diff.append(f'Distance: {round(distance_between_coords)}km')
    if(x.Name_TNS != x.Name_RINF):
        diff.append('Name')
    if(x.OPTafTapCode_TNS != x.OPTafTapCode_RINF):
        diff.append('OPTafTapCode')
    if(x.IsCurrentlyValid_TNS != x.IsCurrentlyValid_RINF):
        diff.append('Validity')
    return ', '.join(diff)

merged_ops.insert(1, 'Differences', value='')
merged_ops['Differences'] = merged_ops.apply(lambda x: get_differences(x), axis=1)
merged_ops = merged_ops[merged_ops['Differences'].apply(lambda x: x != '')]


# display(op_tns_harmonized.set_index('UniqueOPID').index.intersection(unique_ops_rinf_preffered_valids.set_index('UniqueOPID').index))
# display(op_tns_harmonized.reset_index(drop=True))
# display(unique_ops_rinf_preffered_valids.reset_index(drop=True))
# display(op_tns_harmonized.reset_index(drop=True).index.intersection(unique_ops_rinf_preffered_valids.reset_index(drop=True).index))



In [12]:
merged_ops[merged_ops['Differences'].apply(lambda x: 'Distance' in x)]

Unnamed: 0,UniqueOPID,Differences,LatitudeEpsg4326_TNS,LongitudeEpsg4326_TNS,Name_TNS,OPTafTapCode_TNS,Country_TNS,ValidityDateStart_TNS,ValidityDateEnd_TNS,IsCurrentlyValid_TNS,LatitudeEpsg4326_RINF,LongitudeEpsg4326_RINF,Name_RINF,OPTafTapCode_RINF,Country_RINF,ValidityDateStart_RINF,ValidityDateEnd_RINF,IsCurrentlyValid_RINF
7137,CZ38106,Distance: 6km,49.163480,16.798400,Blazovice-CEMO,CZ38106,CZ,2012-06-09 22:00:00+00:00,2050-12-30 23:00:00+00:00,Valid,49.214126,16.777572,Blazovice-CEMO,CZ38106,Czech Republic,2022-04-27 00:00:00+02:00,2050-12-31 00:00:00+01:00,Valid
18873,ES08020,Distance: 207km,40.523550,-3.882370,CAMBIADOR DE BURGOS,,ES,2017-03-15 23:00:00+00:00,,Valid,42.372720,-3.659550,CAMBIADOR DE BURGOS,,Spain,2017-03-16 00:00:00+01:00,,Valid
21767,FR0000000578,Distance: 10km,47.638840,0.163228,Bif 508000/561000,,FR,1799-12-31 23:00:00+00:00,2381-12-31 23:00:00+00:00,Valid,47.684106,0.274754,Bif 508000/561000,,France,1800-01-01 00:00:00+01:00,2382-01-01 00:00:00+01:00,Valid
21953,FR0000000806,Distance: 25km,48.537506,7.500181,Bif 111000/111064,,FR,1799-12-31 23:00:00+00:00,2381-12-31 23:00:00+00:00,Valid,48.744563,7.377747,Bif 111000/111064,,France,1800-01-01 00:00:00+01:00,2382-01-01 00:00:00+01:00,Valid
26223,FR0000005751,Distance: 6km,45.844313,5.784001,Faisceau V11 à V15,,FR,1799-12-31 23:00:00+00:00,2381-12-31 23:00:00+00:00,Valid,45.892218,5.814990,Faisceau V11 à V15,,France,1800-01-01 00:00:00+01:00,2382-01-01 00:00:00+01:00,Valid
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32162,FR9900003864,Distance: 92km,45.522216,4.213536,Technical change,,FR,,,No validity,44.736570,3.850659,Technical change,,France,,,No validity
32163,FR9900003865,Distance: 316km,43.419080,3.728881,Technical change,,FR,,,No validity,45.428456,0.915581,Technical change,,France,,,No validity
32164,FR9900003866,Distance: 471km,43.957755,4.530404,Technical change,,FR,,,No validity,46.833846,0.096425,Technical change,,France,,,No validity
32165,FR9900003868,Distance: 72km,49.243087,1.227901,Technical change,,FR,,,No validity,48.906006,2.073020,Technical change,,France,,,No validity


In [13]:
# def hl(d):
#     temp_df = pd.DataFrame(columns=d.columns, index=d.index)
#     temp_df.loc[d['LatitudeEpsg4326_TNS'].ne(d['LatitudeEpsg4326_RINF']), ['LatitudeEpsg4326_TNS', 'LatitudeEpsg4326_RINF']] = 'background: yellow'
#     return temp_df
    
# styled_merged_ops = merged_ops.style.apply(hl, axis=None)

# display(styled_merged_ops)

In [14]:
merged_ops

Unnamed: 0,UniqueOPID,Differences,LatitudeEpsg4326_TNS,LongitudeEpsg4326_TNS,Name_TNS,OPTafTapCode_TNS,Country_TNS,ValidityDateStart_TNS,ValidityDateEnd_TNS,IsCurrentlyValid_TNS,LatitudeEpsg4326_RINF,LongitudeEpsg4326_RINF,Name_RINF,OPTafTapCode_RINF,Country_RINF,ValidityDateStart_RINF,ValidityDateEnd_RINF,IsCurrentlyValid_RINF
1726,EU00060,"Name, OPTafTapCode",48.593068,14.433714,EU Staatsgrenze nächst Summerau,EU00060,AT,2021-12-14 23:00:00+00:00,2099-12-30 23:00:00+00:00,Valid,48.593068,14.433714,H.Dvoriste st.hr.,CZ70222,Czech Republic,2022-05-05 00:00:00+02:00,2050-12-31 00:00:00+01:00,Valid
1727,EU00061,"Name, OPTafTapCode",48.764445,14.968331,EU Staatsgrenze nächst Gmünd N.Ö.,EU00061,AT,2021-12-14 23:00:00+00:00,2099-12-30 23:00:00+00:00,Valid,48.764445,14.968331,C.Velenice st.hr.,CZ70632,Czech Republic,2022-05-05 00:00:00+02:00,2050-12-31 00:00:00+01:00,Valid
1728,EU00062,"Name, OPTafTapCode",48.773654,16.014680,EU Staatsgrenze nächst Retz,EU00062,AT,2021-12-14 23:00:00+00:00,2099-12-30 23:00:00+00:00,Valid,48.773654,16.014680,Znojmo st.hr.,CZ30065,Czech Republic,2021-12-12 00:00:00+01:00,2050-12-31 00:00:00+01:00,Valid
1729,EU00063,"Name, OPTafTapCode",48.712544,16.868384,EU Staatsgrenze nächst Bernhardsthal,EU00063,AT,2021-12-14 23:00:00+00:00,2099-12-30 23:00:00+00:00,Valid,48.712544,16.868384,Breclav st.hr.,CZ30055,Czech Republic,2021-12-12 00:00:00+01:00,2050-12-31 00:00:00+01:00,Valid
1736,EU00109,"Name, OPTafTapCode",48.102834,17.084221,EU Staatsgrenze nächst Kittsee,EU00109,AT,2021-12-14 23:00:00+00:00,2099-12-30 23:00:00+00:00,Valid,48.102834,17.084221,Bratislava-Petržalka št. hr.,SK10046,Slovak Republic,2021-09-29 00:00:00+02:00,2050-01-01 00:00:00+01:00,Valid
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48900,SEHdr,Name,56.653500,12.886300,Halmstad rangerbangård,SE01239,SE,,,No validity,56.653500,12.886300,Halmstads rangerbangård,SE01239,Sweden,,,No validity
49075,SEKs,Name,59.377900,13.499000,Karlstad central,SE01953,SE,,,No validity,59.377900,13.499000,Karlstads central,SE01953,Sweden,,,No validity
49259,SENyc,Name,58.900200,17.949700,Nynäshamns Centrum,SE10705,SE,,,No validity,58.900200,17.949700,Nynäshamns centrum,SE10705,Sweden,,,No validity
49460,SESuc,Name,62.386600,17.314900,Sundsvall c,SE03869,SE,,,No validity,62.386600,17.314900,Sundsvalls central,SE03869,Sweden,,,No validity


In [15]:
len(merged_ops[merged_ops.apply(lambda x: x.IsCurrentlyValid_TNS == 'Valid' and x.IsCurrentlyValid_RINF == 'Invalid', axis=1)])

76

In [24]:
# Create HTML report

from codecs import unicode_escape_encode
from encodings.utf_8_sig import encode
import locale

# In TNS but not in RINF
ops_in_tns_missing_in_rinf = op_tns_harmonized[~op_tns_harmonized["UniqueOPID"].isin(op_rinf_harmonized["UniqueOPID"])].sort_values(['Country', 'UniqueOPID'])

# In RINF but not in TNS
ops_in_rinf_missing_in_tns = op_rinf_harmonized[op_rinf_harmonized.apply(lambda x: x.IsCurrentlyValid != 'Invalid', axis=1)][~op_rinf_harmonized["UniqueOPID"].isin(op_tns_harmonized["UniqueOPID"])].sort_values(['Country', 'UniqueOPID'])

# RINF UniqueOPIDs with more than 1 valid OPs
ops_multiple_valids = op_rinf_harmonized[op_rinf_harmonized.groupby(['UniqueOPID', 'IsCurrentlyValid'])['UniqueOPID'].transform('size') > 1].sort_values('UniqueOPID')

# Only show difference of OPs that have mutliple valid stations (for example border points which are duplicates with EUxxxx and CZxxxx for example)
merged_ops_without_duplicates = merged_ops[~merged_ops["UniqueOPID"]
                                           .isin(ops_multiple_valids["UniqueOPID"])]
merged_ops_without_duplicates = merged_ops_without_duplicates.drop(columns=['Country_TNS', 'Country_RINF'])

# RINF expired OPs
expired_ops_in_tns = len(merged_ops[merged_ops.apply(lambda x: x.IsCurrentlyValid_TNS == 'Valid' and x.IsCurrentlyValid_RINF == 'Invalid', axis=1)])


# Maps
map_tns_missing = folium.Map(location=[48.210033, 16.363449],zoom_start=5)
ops_in_tns_missing_in_rinf.apply(lambda row:folium.CircleMarker(location=[row["LatitudeEpsg4326"], row["LongitudeEpsg4326"]], radius=2, tooltip=(str(("Unique OP ID: " + row["UniqueOPID"] + "</br>" + row["Name"]).encode('raw_unicode_escape')))[2:-1])
                                             .add_to(map_tns_missing), axis=1)


map_rinf_missing = folium.Map(location=[48.210033, 16.363449],zoom_start=5)
ops_in_rinf_missing_in_tns.apply(lambda row:folium.CircleMarker(location=[row["LatitudeEpsg4326"], row["LongitudeEpsg4326"]], radius=2, tooltip=(str(("Unique OP ID: " + row["UniqueOPID"] + "</br>" + row["Name"]).encode('raw_unicode_escape')))[2:-1])
                                             .add_to(map_rinf_missing), axis=1)

map_duplicate_ops = folium.Map(location=[48.210033, 16.363449],zoom_start=5)
ops_multiple_valids.apply(lambda row:folium.CircleMarker(location=[row["LatitudeEpsg4326"], row["LongitudeEpsg4326"]], radius=2, tooltip=(str(("Unique OP ID: " + row["UniqueOPID"] + "</br>" + row["Name"]).encode('raw_unicode_escape')))[2:-1])
                                             .add_to(map_duplicate_ops), axis=1)

map_different_ops = folium.Map(location=[48.210033, 16.363449],zoom_start=5)
merged_ops_without_duplicates.apply(lambda row:folium.CircleMarker(location=[row["LatitudeEpsg4326_RINF"], row["LongitudeEpsg4326_RINF"]], radius=2, tooltip=(str(("Unique OP ID: " + row["UniqueOPID"] + "</br>" + row["Name_RINF"]).encode('raw_unicode_escape')))[2:-1])
                                             .add_to(map_different_ops), axis=1)


# def hl(d):
#     temp_df = pd.DataFrame(columns=d.columns, index=d.index)
#     temp_df.loc[d['LatitudeEpsg4326_TNS'].ne(d['LatitudeEpsg4326_RINF']), ['LatitudeEpsg4326_TNS', 'LatitudeEpsg4326_RINF']] = 'background: yellow'
#     temp_df.loc[d['LongitudeEpsg4326_TNS'].ne(d['LongitudeEpsg4326_RINF']), ['LongitudeEpsg4326_TNS', 'LongitudeEpsg4326_RINF']] = 'background: yellow'
#     temp_df.loc[d['OPTafTapCode_TNS'].ne(d['OPTafTapCode_RINF']), ['OPTafTapCode_TNS', 'OPTafTapCode_RINF']] = 'background: yellow'
#     temp_df.loc[d['Name_TNS'].ne(d['Name_RINF']), ['Name_TNS', 'Name_RINF']] = 'background: yellow'
#     temp_df.loc[d['IsCurrentlyValid_TNS'].ne(d['IsCurrentlyValid_RINF']), ['IsCurrentlyValid_TNS', 'IsCurrentlyValid_RINF']] = 'background: yellow'
#     return temp_df
    
# styled_merged_ops = merged_ops.style.apply(hl, axis=None)
locale.setlocale(locale.LC_ALL, 'de_DE')

stats_group = dp.Group(
    dp.BigNumber(heading="Unknown OPs in TNS", value=locale.format("%d", len(ops_in_tns_missing_in_rinf), grouping=True)),
    dp.BigNumber(heading="New OPs in RINF", value=locale.format("%d", len(ops_in_rinf_missing_in_tns), grouping=True)),
    dp.BigNumber(heading="Duplicate OPs in RINF", value=locale.format("%d", len(ops_multiple_valids), grouping=True)),
    dp.BigNumber(heading="OP updates in RINF (differences detected)", value=locale.format("%d", len(merged_ops_without_duplicates), grouping=True)),
    dp.BigNumber(heading="Expired OPs in TNS", value=locale.format("%d", expired_ops_in_tns, grouping=True), is_positive_intent=True),
    columns=5,
)

table = dp.DataTable(ops_in_tns_missing_in_rinf, )
table2 = dp.DataTable(ops_in_rinf_missing_in_tns)
table3 = dp.DataTable(ops_multiple_valids)
table4 = dp.DataTable(merged_ops_without_duplicates)
report = dp.Report(blocks=[stats_group, dp.Select(blocks=
                                     [dp.Group(blocks=[table, dp.Plot(map_tns_missing)], label='OPs in TNS that are not in RINF'),
                                      dp.Group(blocks=[table2, dp.Plot(map_rinf_missing)], label='OPs in RINF that are not in TNS'),
                                      dp.Group(blocks=[table3, dp.Plot(map_duplicate_ops)], label='OPs that have multiple entries with same validity'), 
                                      dp.Group(blocks=[table4, dp.Plot(map_different_ops)], label='OPs that have different values in TNS <-> RINF')])])
    # dp.HTML("<h1>OPs in TNS that are not in RINF</h1>"), dp.Group(blocks=[table, dp.Plot(map_tns_missing)]), 
    # dp.Divider(),
    # dp.HTML("<h1>OPs in RINF that are not in TNS</h1>"), table2, dp.Plot(map_rinf_missing),
    # dp.Divider(),
    # dp.HTML("<h1>OPs that have duplicate entries with same validity</h1>"), table3, dp.Plot(map_duplicate_ops),
    # dp.Divider(),
    # dp.HTML("<h1>OPs that have different values in TNS <-> RINF</h1> (Note: does not include station duplicates from above)"), table4, dp.Plot(map_different_ops)])
report.save(path="RINF_Report.html", formatting=dp.ReportFormatting(width=dp.ReportWidth.FULL))

# ops_found_in_rinf = op_rinf_harmonized[op_rinf_harmonized["UniqueOPID"].isin(op_tns_harmonized["UniqueOPID"])]
# ops_found_in_rinf.compare(op_tns_harmonized, align_axis='UniqueOPID')


# print(html)
# ProfileReport(
#     op_tns[~op_tns["UniqueOPID"].isin(op_rinf["UniqueOPID"])]
#     )
# conf = {'title': 'Test', 'notebook',}
# ProfileReport(ops_in_tns_missing_in_rinf, config={'title': 'Test'})

  ops_in_rinf_missing_in_tns = op_rinf_harmonized[op_rinf_harmonized.apply(lambda x: x.IsCurrentlyValid != 'Invalid', axis=1)][~op_rinf_harmonized["UniqueOPID"].isin(op_tns_harmonized["UniqueOPID"])].sort_values(['Country', 'UniqueOPID'])
  dp.BigNumber(heading="Unknown OPs in TNS", value=locale.format("%d", len(ops_in_tns_missing_in_rinf), grouping=True)),
  dp.BigNumber(heading="New OPs in RINF", value=locale.format("%d", len(ops_in_rinf_missing_in_tns), grouping=True)),
  dp.BigNumber(heading="Duplicate OPs in RINF", value=locale.format("%d", len(ops_multiple_valids), grouping=True)),
  dp.BigNumber(heading="OP updates in RINF (differences detected)", value=locale.format("%d", len(merged_ops_without_duplicates), grouping=True)),
  dp.BigNumber(heading="Expired OPs in TNS", value=locale.format("%d", expired_ops_in_tns, grouping=True), is_positive_intent=True),
  elif isinstance(df.index, pd.Int64Index):
  elif isinstance(df.index, pd.Int64Index):
  elif isinstance(df.index, pd.Int64I

Report saved to ./RINF_Report.html

In [None]:
# Push new RINF report to git