In [1]:
import sys
import re

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

sys.path.append("..")

from src.data.import_data_pks import import_data, hierarchize_data
from src.visualization.visualize import get_colormap, get_keypicker, get_existence_chart

In [2]:
def get_df_colors(year=2022):
    df = data_bund_hr.loc[data_bund_hr.year==year]
    colors = {k: grp.color.iloc[0] for k, grp in df.groupby("key")}
    return df, colors

In [4]:
data_raw = pd.read_parquet("../data/processed/pks.parquet")
data_bund = data_raw.loc[data_raw.state == "Bund"]
data_bund_hr_all = hierarchize_data(data_bund)

In [6]:
data_bund_hr_all.loc[data_bund_hr_all.parent.eq("621100")]

Unnamed: 0,key,year,state,label,index,count,freq,attempts,clearance,label_change,color,level,parent
5446,621110,2018,Bund,Widerstand gegen Vollstreckungsbeamte und glei...,10216,21556,26.036222,0,21390,True,#00bfb6,5,621100
5447,621110,2019,Bund,Widerstand gegen Vollstreckungsbeamte und glei...,10200,21207,25.544689,0,20954,False,#00bfb6,5,621100
5448,621110,2020,Bund,Widerstand gegen Vollstreckungsbeamte und glei...,10216,20963,25.205999,0,20763,False,#00bfb6,5,621100
5449,621110,2021,Bund,Widerstand gegen Vollstreckungsbeamte und glei...,10199,21146,25.42961,0,20911,False,#00bfb6,5,621100
5450,621120,2018,Bund,Tätlicher Angriff auf Vollstreckungsbeamte und...,10233,11704,14.136572,0,11538,True,#bf0008,5,621100
5451,621120,2019,Bund,Tätlicher Angriff auf Vollstreckungsbeamte und...,10217,14919,17.970539,0,14626,False,#bf0008,5,621100
5452,621120,2020,Bund,Tätlicher Angriff auf Vollstreckungsbeamte und...,10233,15797,18.994379,0,15525,False,#bf0008,5,621100
5453,621120,2021,Bund,Tätlicher Angriff auf Vollstreckungsbeamte und...,10216,16787,20.187594,0,16411,False,#bf0008,5,621100


In [8]:
data_bund_hr_all.loc[data_bund_hr_all.key.eq("621050")]

Unnamed: 0,key,year,state,label,index,count,freq,attempts,clearance,label_change,color,level,parent
5432,621050,2013,Bund,Gefangenenmeuterei,7998,10,0.0,4,8,True,#bf0001,5,553200
5433,621050,2014,Bund,Gefangenenmeuterei,7978,7,0.0,2,7,False,#bf0001,5,553200
5434,621050,2015,Bund,Gefangenenmeuterei,9536,26,0.032021,3,26,False,#bf0001,5,553200
5435,621050,2016,Bund,Gefangenenmeuterei,9825,18,0.021904,7,17,False,#bf0001,5,553200
5436,621050,2017,Bund,Gefangenenmeuterei,10301,9,0.010906,2,9,False,#bf0001,5,553200
5437,621050,2018,Bund,Gefangenenmeuterei,10182,12,0.014494,3,11,False,#bf0001,5,553200
5438,621050,2019,Bund,Gefangenenmeuterei,10166,13,0.015659,5,12,False,#bf0001,5,553200
5439,621050,2020,Bund,Gefangenenmeuterei,10182,7,0.008417,2,7,False,#bf0001,5,553200
5440,621050,2021,Bund,Gefangenenmeuterei,10165,6,0.007215,4,6,False,#bf0001,5,553200


In [4]:
# df_selection["label_change"] = False
# df_plot = pd.DataFrame()
# for i, grp in df_selection.groupby(["key", "label"]):
#     this = grp.copy()
#     this.loc[this.index[0], "label_change"] = True
#     df_plot = pd.concat([df_plot, this])
# df_plot = data_bund_hr.loc[data_bund_hr.key.isin(selected_keys)]
# df_plot= df_plot.assign(label = df_plot.apply(lambda row: row["label"] if row["label_change"] else "", axis=1))
# df_plot = df_plot.sort_values(["key", "year"])
# df_plot

In [5]:
colormap = {k: grp.color.iloc[0] for k, grp in data_bund_hr.groupby("key")}

In [6]:
hovertemplate = """
                <b>%{customdata[1]}</b><br><br>
                %{customdata[0]}<br>
                <extra>%{customdata[2]} Fälle</extra>"""
hovertemplate = re.sub(r"([ ]{2,})|(\n)", "", hovertemplate)

In [7]:
selected_keys = ['621000', '621010', '621020', '621021', '621029', '621030', '621040', '621050', '621100', '621110', '621120']

In [10]:
data_bund_hr

Unnamed: 0,key,year,state,label,index,count,freq,attempts,clearance,label_change,color,level,parent
0,****00,2013,Bund,Diebstahl insgesamt und zwar:,5615,2382743,2959.100000,276151,655176,True,#bf5f00,1,
1,***100,2013,Bund,Diebstahl insgesamt von Kraftwagen einschl. un...,5632,37427,46.500000,7528,9759,True,#bf4c00,4,****00
2,***200,2013,Bund,Diebstahl insgesamt von Mopeds und Krafträdern...,5649,28299,35.100000,3545,6090,True,#bf9800,4,****00
3,***300,2013,Bund,Diebstahl insgesamt von Fahrrädern einschl. un...,5666,316857,393.500000,5108,30543,True,#99bf00,4,****00
4,***400,2013,Bund,Diebstahl insgesamt von Schusswaffen,5683,816,1.000000,66,292,True,#4cbf00,4,****00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
410,898100,2022,Bund,Umweltstraftaten gemäß 29. Abschnitt des StGB,7003,9611,11.546531,180,5088,False,#02bf00,4,898000
411,898200,2022,Bund,Sonstige Straftaten nach dem StGB mit Umweltre...,7020,4711,5.659734,177,2575,False,#0002bf,4,898000
412,898300,2022,Bund,Straftaten auf dem Umwelt- und Verbraucherschu...,7037,17474,20.993037,164,12908,False,#bf0002,4,898000
413,899000,2022,Bund,Straßenkriminalität,7054,1084688,1303.130079,62057,202857,False,#bf0000,3,740000


In [9]:
l = len(selected_keys)
fig = get_existence_chart(data_bund_hr, selected_keys, colormap)

fig.show()

In [57]:
x = df_plot.loc[df_plot.key.isin(mykeys)].drop_duplicates()[["key", "label", "label_change"]]
x

Unnamed: 0,key,label,label_change
532,621000,"Widerstand gegen die Staatsgewalt §§ 111, 113,...",True
533,621000,,False
552,621000,,False
569,621000,,False
597,621000,,False
...,...,...,...
598,621110,,False
600,621120,Tätlicher Angriff auf Vollstreckungsbeamte und...,True
600,621120,,False
600,621120,Tätlicher Angriff auf Vollstreckungsbeamte und...,True


In [9]:
[98, "aei"]

[98, 'aei']