In [None]:
# Load libraries
library(tidyverse)


In [None]:
# This cell downloads the original data from data.bs.ch and saves it in the raw folder
# It has been downloaded on 2023-03-28

download_link <- "https://data.bs.ch/api/explore/v2.1/catalog/datasets/100138/exports/csv?lang=de&refine=ereignis_typ%3A%22Wegzug%22&refine=ereignis_typ%3A%22Zuzug%22&facet=facet(name%3D%22ereignis_typ%22%2C%20disjunctive%3Dtrue)&qv1=(datum%3E%3D%222006-01-01%22)&timezone=Europe%2FBerlin&use_labels=true&delimiter=%3B"

download.file(download_link, destfile = "raw/100138.csv")


In [None]:
# Load original data
original_data <- read_delim("raw/100138.csv", delim = ";")


In [35]:
# Make sure the dataset doesn't contain migrations before 2006
# ...or migrations withing Basel
filtered_data <- filter(original_data, Jahr > 2005)
filtered_data <- filter(filtered_data, Wanderungstyp != "Umzug")


In [36]:
# Save new dataframe with less columns
data_reduced <- select(
  filtered_data,
  -"Datum",
  -"Kalenderwoche",
  -"Startdatum Woche",
  -"Tag-Nr.",
  -"Wochentag",
  -"Aufenthaltsdauer in Jahren",
  -"Von Gemeinde",
  -"Nach Gemeinde"
)


In [37]:
# Remove spaces from column names
colnames(data_reduced) <- make.names(colnames(data_reduced))


In [38]:
# Rename column names
data_reduced <- rename(data_reduced,
  Staatsangehoerigkeit = Staatsangehörigkeit,
  VonKontinent = Von.Kontinent,
  VonLand = Von.Land,
  VonKanton = Von.Kanton,
  VonWohnviertel = Von.Wohnviertel,
  NachKontinent = Nach.Kontinent,
  NachLand = Nach.Land,
  NachKanton = Nach.Kanton,
  NachWohnviertel = Nach.Wohnviertel
)


In [39]:
# Replace "Unbekannt" or "unbekannt" or "--" with NA
data_reduced <- mutate_all(data_reduced, ~ ifelse(. %in% c("Unbekannt", "unbekannt", "--"), NA, .))


In [40]:
# Sort data
data_reduced <- arrange(data_reduced, Jahr, Monat)


In [41]:
# Peek into the data
head(data_reduced, 3)
tail(data_reduced, 3)


Wanderungstyp,Jahr,Monat,Staatsangehoerigkeit,Geschlecht,Alter,VonKontinent,VonLand,VonKanton,VonWohnviertel,NachKontinent,NachLand,NachKanton,NachWohnviertel,Anzahl
<chr>,<dbl>,<dbl>,<chr>,<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>
Zuzug,2006,1,Ausländer,M,12,Schweiz,Schweiz,AR,,Schweiz,Schweiz,BS,Clara,1
Zuzug,2006,1,Ausländer,M,24,Europa (ohne Schweiz),Deutschland,,,Schweiz,Schweiz,BS,Riehen,1
Zuzug,2006,1,Ausländer,M,25,Schweiz,Schweiz,BL,,Schweiz,Schweiz,BS,St. Johann,1


Wanderungstyp,Jahr,Monat,Staatsangehoerigkeit,Geschlecht,Alter,VonKontinent,VonLand,VonKanton,VonWohnviertel,NachKontinent,NachLand,NachKanton,NachWohnviertel,Anzahl
<chr>,<dbl>,<dbl>,<chr>,<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>
Zuzug,2023,10,Ausländer,M,23,Asien,Indien,,,Schweiz,Schweiz,BS,Wettstein,1
Zuzug,2023,10,Ausländer,W,53,Asien,Georgien,,,Schweiz,Schweiz,BS,Gundeldingen,1
Zuzug,2023,10,Ausländer,W,48,Schweiz,Schweiz,ZH,,Schweiz,Schweiz,BS,Matthäus,1


In [42]:
# Repeat each row n times if Anzahl is > 1
# For the final visualization, we want to have an individual data point for each migration

# Create a vector of row indices to repeat
row_indices <- rep(seq_len(nrow(data_reduced)), ifelse(data_reduced$Anzahl > 1, data_reduced$Anzahl - 1, 0))

# Create the expanded dataframe by indexing the original dataframe
expanded_data <- data_reduced[row_indices, ]


In [43]:
# Bind the copied rows to the  data
data_modified <- rbind(data_reduced, expanded_data)


In [44]:
# Re-sort the data
data_modified <- arrange(data_modified, Jahr, Monat, Alter)


In [45]:
# Remove the Anzahl column, it's no longer needed
data_modified <- select(data_modified, -"Anzahl")


In [47]:
# The geojson of Switzerland used in the frontend doesn't have
# canton abbreviations, so we'll map our data here accordingly

cantons <- list(
  AG = "Aargau",
  AI = "Appenzell Innerrhoden",
  AR = "Appenzell Ausserrhoden",
  BE = "Bern",
  BL = "Basel-Landschaft",
  BS = "Basel-Stadt",
  FR = "Fribourg",
  GE = "Genève",
  GL = "Glarus",
  GR = "Graubünden",
  JU = "Jura",
  LU = "Luzern",
  NE = "Neuchâtel",
  NW = "Nidwalden",
  OW = "Obwalden",
  SG = "St. Gallen",
  SH = "Schaffhausen",
  SO = "Solothurn",
  SZ = "Schwyz",
  TG = "Thurgau",
  TI = "Ticino",
  UR = "Uri",
  VD = "Vaud",
  VS = "Valais",
  ZG = "Zug",
  ZH = "Zürich",
  CH = NA
)


In [48]:
# The geojson of the world used in the frontend doesn't necessarily
# have German country names, so we'll map our data here accordingly.

# Also, the world map's scale doesn't allow to plot every small country/state/island
# like Andorra, Singapore, etc. These are mapped to its surrounding areas,
# even though that might not be politically correct.
# I apologize if this offends anyone. It's about plotting the very
# rough location on the map, not about political belongings.

countries <- list(
  "Aegypten" = "Egypt",
  "Aethiopien" = "Ethiopia",
  "Afghanistan" = "Afghanistan",
  "Albanien" = "Albania",
  "Algerien" = "Algeria",
  "Andorra" = "Spain",
  "Angola" = "Angola",
  "Antarktis & Antarktische Ins." = "Unknown",
  "Argentinien" = "Argentina",
  "Armenien" = "Armenia",
  "Aruba" = "Venezuela",
  "Aserbaidschan" = "Azerbaijan",
  "Australien" = "Australia",
  "Bahamas" = "Bahamas",
  "Bahrain" = "Saudi Arabia",
  "Bangladesch" = "Bangladesh",
  "Barbados" = "Venezuela",
  "Belgien" = "Belgium",
  "Belize" = "Belize",
  "Benin" = "Benin",
  "Bermuda Inseln" = "Puerto Rico",
  "Bhutan" = "Bhutan",
  "Bolivien" = "Bolivia",
  "Bosnien-Herzegowina" = "Bosnia and Herz.",
  "Botswana" = "Botswana",
  "Brasilien" = "Brazil",
  "Brit.Territ.im Indischen Ozean" = NA,
  "Brunei" = "Malaysia",
  "Brunei Darussalam" = "Malaysia",
  "Bulgarien" = "Bulgaria",
  "Bundesrep. Jugoslawien" = "Serbia",
  "Burkina Faso" = "Burkina Faso",
  "Burundi" = "Burundi",
  "Chile" = "Chile",
  "China" = "China",
  "Costa Rica" = "Costa Rica",
  "Dänemark" = "Denmark",
  "Deutschland" = "Germany",
  "Dominica" = "Puerto Rico",
  "Dominikanische Republik" = "Dominican Rep.",
  "Ecuador" = "Ecuador",
  "El Salvador" = "El Salvador",
  "Elfenbeinküste" = "Côte d'Ivoire",
  "Eritrea" = "Eritrea",
  "Estland" = "Estonia",
  "Fidschi" = "Fiji",
  "Finnland" = "Finland",
  "Frankreich" = "France",
  "Französisch Polynesien" = "Fiji",
  "Gabun" = "Gabon",
  "Gambia" = "Gambia",
  "Georgien" = "Georgia",
  "Ghana" = "Ghana",
  "Gibraltar" = "Spain",
  "Grenada" = "Venezuela",
  "Griechenland" = "Greece",
  "Grossbritannien" = "United Kingdom",
  "Grönland" = "Greenland",
  "Guadeloupe" = "Puerto Rico",
  "Guatemala" = "Guatemala",
  "Guernsey" = "France",
  "Guinea-Bissau" = "Guinea-Bissau",
  "Guinea" = "Guinea",
  "Guyana (Französisch)" = "Suriname",
  "Guyana (Republik)" = "Guyana",
  "Guyana" = "Guyana",
  "Haiti" = "Haiti",
  "Honduras" = "Honduras",
  "Hong Kong" = "China",
  "Indien" = "India",
  "Indonesien" = "Indonesia",
  "Irak" = "Iraq",
  "Iran" = "Iran",
  "Irland" = "Ireland",
  "Island" = "Iceland",
  "Israel" = "Israel",
  "Italien" = "Italy",
  "Jamaika" = "Jamaica",
  "Japan" = "Japan",
  "Jemen (Arabische Republik)" = "Yemen",
  "Jordanien" = "Jordan",
  "Jungferninseln (UK)" = "Puerto Rico",
  "Jungferninseln (USA)" = "Puerto Rico",
  "Kaimaninseln" = "Cuba",
  "Kambodscha" = "Cambodia",
  "Kamerun" = "Cameroon",
  "Kanada" = "Canada",
  "Kanalinseln" = "France",
  "Kanarische Inseln" = "Morocco",
  "Kap Verde" = "Senegal",
  "Kasachstan" = "Kazakhstan",
  "Katar" = "Qatar",
  "Kenia" = "Kenya",
  "Kirgistan" = "Kyrgyzstan",
  "Kolumbien" = "Colombia",
  "Kongo (Brazzaville)" = "Dem. Rep. Congo",
  "Kongo (Kinshasa)" = "Dem. Rep. Congo",
  "Korea (Nord-)" = "North Korea",
  "Korea (Süd-)" = "South Korea",
  "Kosovo" = "Kosovo",
  "Kroatien" = "Croatia",
  "Kuba" = "Cuba",
  "Kuwait" = "Kuwait",
  "Laos" = "Laos",
  "Lesotho" = "Lesotho",
  "Lettland" = "Latvia",
  "Libanon" = "Lebanon",
  "Liberia" = "Liberia",
  "Libyen" = "Libya",
  "Liechtenstein" = "Austria",
  "Litauen" = "Lithuania",
  "Luxemburg" = "Luxembourg",
  "Madagaskar" = "Madagascar",
  "Malawi" = "Malawi",
  "Malaysia" = "Malaysia",
  "Malediven" = "Sri Lanka",
  "Mali" = "Mali",
  "Malta" = "Italy",
  "Marokko" = "Morocco",
  "Martinique" = "Puerto Rico",
  "Mauretanien" = "Mauritania",
  "Mauritius" = "Madagascar",
  "Mayotte" = "Madagascar",
  "Mexiko" = "Mexico",
  "Moldawien" = "Moldova",
  "Monaco" = "France",
  "Mongolei" = "Mongolia",
  "Montenegro" = "Montenegro",
  "Mosambik" = "Mozambique",
  "Myanmar" = "Myanmar",
  "Namibia" = "Namibia",
  "Nepal" = "Nepal",
  "Neuseeland" = "New Zealand",
  "Neukaledonien" = "Fiji",
  "Nicaragua" = "Nicaragua",
  "Niederlande" = "Netherlands",
  "Niederländische Antillen" = "Dominican Rep.",
  "Niger" = "Niger",
  "Nigeria" = "Nigeria",
  "Nordmazedonien" = "North Macedonia",
  "Norwegen" = "Norway",
  "Oesterreich" = "Austria",
  "Oman" = "Oman",
  "Pakistan" = "Pakistan",
  "Palästina" = "Palestine",
  "Panama" = "Panama",
  "Papua-Neuguinea" = "Papua New Guinea",
  "Paraguay" = "Paraguay",
  "Peru" = "Peru",
  "Philippinen" = "Philippines",
  "Polen" = "Poland",
  "Portugal" = "Portugal",
  "Provinz Wojwodina" = "Serbia",
  "Reunion" = "Madagascar",
  "Ruanda" = "Rwanda",
  "Rumänien" = "Romania",
  "Russland" = "Russia",
  "Sambia" = "Zambia",
  "Samoa" = "Fiji",
  "San Marino" = "Italy",
  "Saudi-Arabien" = "Saudi Arabia",
  "Schweden" = "Sweden",
  "Schweiz" = "Switzerland",
  "Senegal" = "Senegal",
  "Serbien" = "Serbia",
  "Seychellen" = "Madagascar",
  "Sierra Leone" = "Sierra Leone",
  "Simbabwe" = "Zimbabwe",
  "Singapur" = "Malaysia",
  "Slowakische Republik" = "Slovakia",
  "Slowenien" = "Slovenia",
  "Somalia" = "Somalia",
  "Spanien" = "Spain",
  "Sri Lanka" = "Sri Lanka",
  "St.Kitts und Nevis" = "Puerto Rico",
  "St.Lucia" = "Puerto Rico",
  "St.Vincent und die Grenadinen" = "Puerto Rico",
  "Südafrika" = "South Africa",
  "Sudan" = "Sudan",
  "Südsudan" = "Sudan",
  "Surinam" = "Suriname",
  "Syrien" = "Syria",
  "Tadschikistan" = "Tajikistan",
  "Taiwan (Chinesisches Taipei)" = "Taiwan",
  "Tansania" = "Tanzania",
  "Thailand" = "Thailand",
  "Tibet" = "Bhutan",
  "Togo" = "Togo",
  "Tonga" = "Fiji",
  "Trinidad und Tobago" = "Trinidad and Tobago",
  "Tschad" = "Chad",
  "Tschechische Republik" = "Czechia",
  "Tunesien" = "Tunisia",
  "Türkei" = "Turkey",
  "Turkmenistan" = "Turkmenistan",
  "Uganda" = "Uganda",
  "Ukraine" = "Ukraine",
  "Ungarn" = "Hungary",
  "Uruguay" = "Uruguay",
  "Usbekistan" = "Uzbekistan",
  "Vanuatu" = "Vanuatu",
  "Vatikanstadt" = "Italy",
  "Venezuela" = "Venezuela",
  "Vereinigte Arabische Emirate" = "United Arab Emirates",
  "Vereinigte Staaten von Amerika" = "United States of America",
  "Vietnam" = "Vietnam",
  "Weissrussland" = "Belarus",
  "Zentralafrikanische Republik" = "Central African Rep",
  "Zentralserbien" = "Serbia",
  "Zypern" = "Cyprus"
)


In [49]:
# We are going to add some data to each row so we don't have to do it in the frontend later
# We want to keep the starting map and region as well as the end map and region of each row (e.g. "Basel"/"St. Johann" or "World"/"USA", etc.)

data_enhanced <- add_column(data_modified,
  StartKarte = NA_character_,
  StartRegion = NA_character_,
  EndKarte = NA_character_,
  EndRegion = NA_character_
)


In [50]:
# Set the starting maps
data_enhanced <- mutate(data_enhanced,
  StartKarte = case_when(
    Wanderungstyp == "Wegzug" ~ "basel", # "Basel" for all emigrants
    VonKontinent == "Schweiz" ~ "switzerland",
    VonKontinent == "Europa (ohne Schweiz)" &
      (VonLand != "Russland" & VonLand != "Türkei" & VonLand != "Zypern" | is.na(VonLand)) ~
      "europe",
    VonLand == "Russland" | VonLand == "Türkei" | VonLand == "Zypern" ~ "world",
    is.na(VonKontinent) ~ NA,
    TRUE ~ "world"
  )
)


In [51]:
# Set the starting region
data_enhanced <- mutate(data_enhanced,
  StartRegion = ifelse(
    StartKarte == "basel",
    VonWohnviertel, as.character(ifelse(
      StartKarte == "switzerland",
      ifelse(is.na(VonKanton), NA, cantons[VonKanton]),
      ifelse(is.na(VonLand), NA, countries[VonLand])
    ))
  )
)


In [52]:
# Set the ending maps
data_enhanced <- mutate(data_enhanced,
  EndKarte = case_when(
    Wanderungstyp == "Zuzug" ~ "basel", # "Basel" for all imigrants
    NachKontinent == "Schweiz" ~ "switzerland",
    NachKontinent == "Europa (ohne Schweiz)" &
      (
        NachLand != "Russland" &
          NachLand != "Türkei" &
          NachLand != "Zypern" |
          is.na(NachLand)) ~
      "europe",
    NachLand == "Russland" |
      NachLand == "Türkei" |
      NachLand == "Zypern" ~ "world",
    is.na(NachKontinent) ~ NA,
    TRUE ~ "world"
  )
)


In [53]:
# Set the ending region
data_enhanced <- mutate(data_enhanced,
  EndRegion = ifelse(
    EndKarte == "basel",
    NachWohnviertel, as.character(ifelse(
      EndKarte == "switzerland",
      ifelse(is.na(NachKanton), NA, cantons[NachKanton]),
      ifelse(is.na(NachLand), NA, countries[NachLand])
    ))
  )
)


In [54]:
# Save project data as CSV
write_csv(data_enhanced, "tidy/migration.csv")


In [55]:
# In order to host the data on GitHub, we need to divide the migration data into smaller chunks.
# GitHub has a file size limit of 50MB.
# However, we are going to create much more chunks - one per month!!
# On the website, users will be able to select a starting year and month.
# The site will load only the needed data.
# Once the animation is done, the subsequent data will be loaded.


In [None]:
# Read final data
final_data <- read_delim("tidy/migration.csv", delim = ",")


In [58]:
chunks <- final_data %>%
  mutate(JahrMonat = paste(Jahr, Monat, sep = "-"), .before = 1) %>%
  group_by(JahrMonat) %>%
  group_split()


In [59]:
# Save each chunk as a separate csv file
output_dir <- "tidy/chunks/"
for (i in seq_along(chunks)) {
  write_csv(chunks[[i]], paste0(output_dir, chunks[[i]][1, "JahrMonat"], ".csv"))
}


In [None]:
# Copy files to the web apps directory
source_directory <- "tidy/chunks"
target_directory <- "../basel-migration/public/data/migration"

# Get list of file names in the source directory
file_list <- list.files(source_directory, full.names = TRUE)

# Copy files to the target directory
file.copy(from = file_list, to = target_directory, overwrite = TRUE)
