# Dataset

In [None]:
!pip install eurostat geopandas

In [None]:
!wget https://github.com/datanizing/ix-jupyter-visualisierung/raw/main/europe.geo.json

In [None]:
import pandas as pd
import eurostat
realnames_i = eurostat.get_dic('indic')
df = eurostat.get_data_df("ei_bsco_m")
df = df.rename(columns={"geo\\time": "country"})

Spalten aufräumen

In [None]:
from datetime import datetime
df.columns = [datetime.strptime(f.split("M")[0] + "-" + f.split("M")[1] + "-01", "%Y-%m-%d")
                if f.startswith("20") or f.startswith("19") else f for f in df.columns]

# Korrelationen der Indikatoren miteinander vergleichen

In [None]:
import scipy.stats as stats
indicators = df["indic"].unique()
corr = []
for i1 in indicators:
    res = []
    d1 = df[(df["indic"] == i1) & (df["s_adj"] == "NSA") & (df["country"] == "DE")].transpose()
    for i2 in indicators:
        d2 = df[(df["indic"] == i2) & (df["s_adj"] == "NSA") & (df["country"] == "DE")].transpose()
        r, p = stats.pearsonr(d1[[isinstance(d, datetime) and d.year>=2000 for d in d1.index]].iloc[:,0].values, 
                              d2[[isinstance(d, datetime) and d.year>=2000 for d in d2.index]].iloc[:,0].values)
        res.append(r)
    corr.append(res)

In [None]:
y20 = [datetime(y, m, 1) for y in range(2000, 2021) for m in range(1, 13)][:-2]
de20 = df[(df["s_adj"] == "NSA") & (df["country"] == "DE")].set_index("indic")[y20].transpose()
de20.index = pd.DatetimeIndex(de20.index)

In [None]:
de20

In [None]:
import scipy.stats as stats
corr = []
indicators = de20.columns
# Korrelationen berechnen
for i1 in indicators:
    res = []
    for i2 in indicators:
        r, p = stats.pearsonr(de20[i1].values, de20[i2].values)
        res.append(r)
    corr.append(res)

# in Dataframe mit richtigen Spalte und Zeilen wandeln
real_indicators = [realnames_i[i] for i in indicators]
ihm = pd.DataFrame(corr, index=real_indicators, columns=real_indicators)
ihm

# Heatmaps

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.figure(figsize=(12,12))
sns.heatmap(ihm, cmap="viridis", vmin=-1, vmax=1)

In [None]:
plt.figure(figsize=(12,12))
sns.heatmap(ihm, cmap="RdBu", vmin=-1, vmax=1)

# Scatterplots

In [None]:
de20.plot.scatter(x="BS-SFSH", y="BS-CSMCI")

In [None]:
import seaborn as sns
import scipy.stats as stats
sns.jointplot(x=de20["BS-SFSH"], y=de20["BS-CSMCI"], kind="reg")

Beispiel für ein Diagramm mit zu vielen Details :D

In [None]:
import matplotlib.pyplot as plt
g = sns.jointplot(x=de20["BS-SFSH"], y=de20["BS-CSMCI"], scatter=False, kind="reg")
#g.annotate(stats.pearsonr)
# aktuelle Ersparnisse als Größe der Bubbles
# take only first month
de20s = de20[de20.index.month==1].copy()
# take every other year
de20s = de20s[::2]
# remove M01 from index
de20s.index = de20s.index.map(str).str.replace("-01-01 00:00:00", "")
sns.scatterplot(x=de20s["BS-SFSH"], y=de20s["BS-CSMCI"], s=de20s["BS-SV-PR"],
                hue=de20s.index, legend=True)
# Detailplot ohne Legende
sns.scatterplot(x=de20["BS-SFSH"], y=de20["BS-CSMCI"], s=de20["BS-SV-PR"],
                hue=de20.index, legend=False)
plt.gca().annotate("pearsonr=%0.2f p=%e"%stats.pearsonr(de20["BS-SFSH"], de20["BS-CSMCI"]), xy=(5, -25))

In [None]:
import scipy.stats
scipy.stats.linregress(de20["BS-SFSH"], de20["BS-CSMCI"])

# GeoPandas

In [None]:
import geopandas
bl_geo = geopandas.read_file("europe.geo.json")
# die EU verwendet UK als Name, richtig ist aber GB
bl_geo.loc[bl_geo["iso_a2"] == "GB", "iso_a2"] = "UK"
bl_geo[["iso_a2", "geometry"]]

In [None]:
bl_geo[~bl_geo["iso_a2"].isin(["RU", "IS", "UA", "BY", "MD"])].plot(figsize=(10,10))

In [None]:
hm = df[(df["indic"] == "BS-CSMCI") & (df["s_adj"] == "NSA")]
ghm = geopandas.GeoDataFrame(pd.merge(hm, bl_geo, left_on="country", right_on='iso_a2', how="outer"))

In [None]:
ghm.plot(column=datetime(2020, 8, 1), legend=True, legend_kwds={'orientation': "horizontal"}, figsize=(10,10))

In [None]:
ghm[~ghm["iso_a2"].isin(["RU", "IS", "UA", "BY", "MD"])].plot(column=datetime(2020, 8, 1), 
         legend=True, 
         legend_kwds={'orientation': "horizontal"}, 
         missing_kwds={
           "color": "lightgrey",
    },
         figsize=(10,10))