In [1]:
import pandas as pd
import sys 
sys.path.append('../../modules')
import eumf_data as data
import numpy as np
from scipy import stats
from statsmodels.api import tsa

In [2]:
countries = {
    "FR": "Frankreich",
    "GB": "Vereinigtes Königreich",
    "IT": "Italien",
    "ES": "Spanien",
    "PL": "Polen",
    "RO": "Rumänien",
    "NL": "Niederlande",
    "BE": "Belgien",
    "GR": "Griechenland",
    "CZ": "Tschechien",
    "PT": "Portugal",
    "SE": "Schweden",
    "HU": "Ungarn",
    "AT": "Österreich",
    "CH": "Schweiz",
    "BG": "Bulgarien",
    "DK": "Dänemark",
    "FI": "Finnland",
    "SK": "Slowakei",
    "IE": "Irland",
    "HR": "Kroatien",
    "LT": "Lettland",
    "SI": "Slowenien",
    "LV": "Litauen",
    "EE": "Estland",
    "CY": "Zypern",
    "LU": "Luxemburg",
}

def filter_panel(df, country_filter, column_names):
    return (
        df[column_names.keys()]
        .rename(columns=column_names)
        .stack()
        .loc[pd.IndexSlice[:, country_filter], :]
    )



## Anmeldungen absolut

In [3]:
rates = data.load_migration_rates_from_csv()

rates_total = rates["value"].sum().sort_values(ascending=False)
rates_total.to_csv("data/mvp/anmeldungen_total.csv")

rates_yearly = rates.resample("1Y").sum().transpose().droplevel(0)
rates_yearly.columns = rates_yearly.columns.year
rates_yearly.to_csv("data/mvp/anmeldungen_jaehrlich.csv")

rates = rates.rename(columns=countries)


In [4]:
iso3 = {
    "FR": "FRA",
    "GB": "GBR",
    "IT": "ITA",
    "ES": "ESP",
    "PL": "POL",
    "RO": "ROU",
    "NL": "NLD",
    "BE": "BEL",
    "GR": "GRC",
    "CZ": "CZE",
    "PT": "PRT",
    "SE": "SWE",
    "HU": "HUN",
    "AT": "AUT",
    "CH": "CHE",
    "BG": "BGR",
    "DK": "DNK",
    "FI": "FIN",
    "SK": "SVK",
    "IE": "IRL",
    "HR": "HRV",
    "LT": "LTU",
    "SI": "SVN",
    "LV": "LVA",
    "EE": "EST",
    "CY": "CYP",
    "LU": "LUX",
}


(
    rates_yearly.rename(index=iso3)
    .stack()
    .rename("Anmeldungen")
    .to_frame()
    .assign(dest="DEU")
).to_csv("data/mvp/anmeldungen_jaehrlich_stacked.csv")



In [5]:
coordinates = pd.read_table(
    "data/mvp/countries_codes_and_coordinates.csv", sep=",", skipinitialspace=True
)
pd.merge(
    coordinates[["Alpha-2 code", "Latitude (average)", "Longitude (average)"]],
    rates_yearly,  # .reset_index(),
    left_on="Alpha-2 code",
    right_index=True,
    # left_on="country",
).to_csv("data/mvp/anmeldungen_jaehrlich_coordinates.csv")


## Trends und Anmeldungen normiert

In [6]:
trends = data.load_trends_from_csv().rename(columns=countries)

df_trends = trends["2010-01-01":]
df_trends = df_trends / df_trends["2010-01-01":"2010-12-01"].mean() * 100

df_rates = rates
df_rates = df_rates / df_rates["2010-01-01":"2010-12-01"].mean() * 100

keyword_labels = {}

df_all = pd.concat([df_rates, df_trends], axis=1)
df_all.stack().to_csv("data/mvp/all.csv")



In [7]:
country_filter = ("Italien", "Spanien", "Portugal", "Griechenland")
column_names = {
    "value": "Anmeldungen in Deutschland",
    "19": "Suche nach Jobs in Deutschland (Google Trends Index)",
}

filter_panel(df_all, country_filter, column_names).to_csv(
    "data/mvp/kw_19_suedeuropa.csv"
)



In [8]:
country_filter = ("Italien", "Spanien", "Portugal", "Griechenland")
column_names = {
    "value": "Anmeldungen in Deutschland",
    "19": "Suche nach Jobs in Deutschland (Google Trends Index)",
}

a = filter_panel(df_all, country_filter, column_names)
b = filter_panel(df_all.rolling("365D").mean(), country_filter, column_names)
pd.merge(a, b, left_index=True, right_index=True).to_csv(
    "data/mvp/kw_19_suedeuropa_rolling.csv"
)



In [9]:
country_filter = ("Italien", "Spanien", "Portugal", "Griechenland")
column_names = {
    "value": "Anmeldungen in Deutschland",
    "19": "Suche nach Jobs in Deutschland (Google Trends Index)",
}

filter_panel(df_all.resample("3M").mean()["2010-01-01":"2020-01-01"], country_filter, column_names).to_csv(
    "data/mvp/kw_19_suedeuropa_quartal.csv"
)



In [300]:
country_filter = countries.values()
column_names = {
    "value": "Anmeldungen in Deutschland",
    "19": "Suche nach Jobs in Deutschland (Google Trends Index)",
}

filter_panel(df_all, country_filter, column_names).to_csv("data/mvp/kw_19.csv")



In [301]:
country_filter = "Kroatien"
column_names = {
    "value": "Anmeldungen in Deutschland",
    "19": "Suche nach Jobs in Deutschland (Google Trends Index)",
}

filter_panel(df_all, country_filter, column_names).to_csv("data/mvp/kw_19_kroatien.csv")

In [10]:
country_filter = "Kroatien"
column_names = {
    "value": "Anmeldungen in Deutschland",
    "19": "Suche nach Jobs in Deutschland (Google Trends Index)",
}

filter_panel(df_all.resample("3M").mean()["2010-01-01":"2020-01-01"], country_filter, column_names).to_csv("data/mvp/kw_19_kroatien_quartal.csv")

In [12]:
country_filter = "Vereinigtes Königreich"
column_names = {
    "value": "Anmeldungen in Deutschland",
    "19": "Suche nach Jobs in Deutschland (Google Trends Index)",
}

filter_panel(df_all, country_filter, column_names).to_csv("data/mvp/kw_19_uk.csv")

In [13]:
country_filter = "Vereinigtes Königreich"
column_names = {
    "value": "Anmeldungen in Deutschland",
    "19": "Suche nach Jobs in Deutschland (Google Trends Index)",
}

filter_panel(df_all.resample("3M").mean()["2010-01-01":"2020-01-01"], country_filter, column_names).to_csv("data/mvp/kw_19_uk_quartal.csv")

In [303]:
country_filter = "Rumänien"
column_names = {
    "value": "Anmeldungen in Deutschland",
    "19": "Suche nach Jobs in Deutschland (Google Trends Index)",
}

filter_panel(df_all, country_filter, column_names).to_csv("data/mvp/kw_19_rumaenien.csv")

In [14]:
country_filter = "Rumänien"
column_names = {
    "value": "Anmeldungen in Deutschland",
    "19": "Suche nach Jobs in Deutschland (Google Trends Index)",
}

filter_panel(df_all.resample("3M").mean()["2010-01-01":"2020-01-01"], country_filter, column_names).to_csv("data/mvp/kw_19_rumaenien_quartal.csv")

In [304]:
country_filter = "Italien"
column_names = {
    "value": "Anmeldungen in Deutschland",
    "55": "Google Trends Index",
}

filter_panel(df_all, country_filter, column_names).to_csv("data/mvp/kw_55_italien.csv")


In [305]:
country_filter = "Italien"
column_names = {
    "value": "Anmeldungen in Deutschland",
    "33": "Google Trends Index",
}

filter_panel(df_all, country_filter, column_names).to_csv("data/mvp/kw_33_italien.csv")


In [306]:
country_filter = ("Italien", "Spanien", "Portugal", "Griechenland")
column_names = {
    "value": "Anmeldungen in Deutschland",
    "24": "Suche nach Gehalt in Deutschland (Google Trends Index)",
}

filter_panel(df_all, country_filter, column_names).to_csv(
    "data/mvp/kw_24_suedeuropa.csv"
)


In [307]:
country_filter = (
    "Österreich",
    "Schweiz",
    "Niederlande",
    "Rumänien",
    "Polen",
    "Vereinigtes Königreich",
    "Frankreich",
)
column_names = {
    "value": "Anmeldungen in Deutschland",
    "24": "Suche nach Gehalt in Deutschland (Google Trends Index)",
}

filter_panel(df_all, country_filter, column_names).to_csv("data/mvp/kw_24_andere.csv")



In [None]:
country_filter = countries.values()
column_names = {
    "value": "Anmeldungen in Deutschland",
    "11": "Google Trends: Visum/Einreisebestimmungen Deutschland",
    "14": "Google Trends: Rente Deutschland",
    "19": "Google Trends: Jobs/Arbeit Deutschland",
    "21": "Google Trends: Einkommen/Steuern Deutschland",
    "22": "Google Trends: BIP Deutschland",
    "24": "Google Trends: Gehalt Deutschland",
    "26": "Google Trends: Wirtschaft Deutschland",
    "37": "Google Trends: Abitur/Universität Deutschland",
    "42": "Google Trends: Studium Deutschland",
    "43": "Google Trends: Bachelor/Master/Promotion Deutschland",
    "48": "Google Trends: Konto Deutschland",
    "49": "Google Trends: Wohnung Deutschland",
    "52": "Google Trends: Miete Deutschland",
    "112": "Google Trends: Mindestlohn",
    "113": "Google Trends: Sozialhilfe",
    "114": "Google Trends: Rente",
    "115": "Google Trends: Arbeitslosigkeit",
    "117": "Google Trends: Inflation",
    "118": "Google Trends: Arbeitslosengeld",
    "119": "Google Trends: Arbeit",
    "121": "Google Trends: Einkommen/Steuer",
    "122": "Google Trends: BIP",
    "123": "Google Trends: Jobs",
    "124": "Google Trends: Gehalt",
    "125": "Google Trends: Brutto/Netto/Zulagen",
}

filter_panel(df_all.resample("3M").mean(), country_filter, column_names).to_csv(
    "data/mvp/alles.csv"
)



## Trends: transformierte Daten

In [15]:
df_rates_trans = rates.resample("3M").mean()["2010-01-01":"2019-12-31"]
# df_rates_trans = (
#     (df_rates_trans - df_rates_trans.shift(12)) / df_rates_trans.shift(12) * 100
# ) #/ df_rates_trans["2010-01-01":"2010-12-01"].mean() * 100
df_rates_trans = np.log(df_rates_trans + 10) - np.log(df_rates_trans.shift(4) + 10)

df_trends_trans = trends.resample("3M").mean()["2010-01-01":"2019-12-31"]
# df_trends_trans = (
#     (df_trends_trans - df_trends_trans.shift(12)) / df_trends_trans.shift(12) * 100
# )  # / df_trends_trans["2010-01-01":"2010-12-01"].mean() * 100
df_trends_trans = np.log(df_trends_trans + 1) - np.log(df_trends_trans.shift(4) + 1)


df_all_trans = pd.concat((df_rates_trans, df_trends_trans), axis=1)


In [65]:
country_filter = ("Italien", "Spanien", "Portugal", "Griechenland")
column_names = {
    "value": "Anmeldungen in Deutschland",
    "19": "Suche nach Jobs in Deutschland (Google Trends Index)",
}

filter_panel(df_all_trans, country_filter, column_names).to_csv(
    "data/mvp/kw_19_suedeuropa_transformiert.csv"
)



In [18]:
t_min = "2011-01-01"
t_max = "2019-01-01"
country_filter = ("Griechenland", "Italien", "Portugal", "Spanien")

get_ccf = lambda c: tsa.ccf(
    df_all_trans["value", c][t_min:t_max], df_all_trans["19", c][t_min:t_max], adjusted=False
)
df_ccf = pd.DataFrame({c: get_ccf(c)[:12] for c in country_filter})
df_ccf.stack().rename("Suche nach Jobs vs. Anmeldungen").to_csv("data/mvp/kw_19_suedeuropa_ccf.csv")
