In [5]:
# Load required packages
library(haven)      # for reading .por files
library(dplyr)      # for data manipulation
library(stats)      # for t-test

# Load the dataset
df <- read_por("./data/ZA4584_v1-0-0.por/ZA4584_v1-0-0.por")

# Define survey years of interest
selected_years <- c(2010, 2012, 2014)

# Filter the dataset to include only relevant years
df_filtered <- df %>% filter(YEAR %in% selected_years)

# Map father's education levels to labels
# V1234 is the father's education variable
# V545 is the life satisfaction variable

# Extract respondents' life satisfaction scores by father's education
life_satisfaction_by_fathers_education <- list(
  "Kein Abschluss (Vater)" = df_filtered %>%
    filter(V1234 %in% 1, V545 <= 10) %>%
    pull(V545),
  
  "Hauptschule (Vater)" = df_filtered %>%
    filter(V1234 %in% 2, V545 <= 10) %>%
    pull(V545),
  
  "Mittlere Reife (Vater)" = df_filtered %>%
    filter(V1234 %in% 3, V545 <= 10) %>%
    pull(V545),
  
  "(Fach-)Hochschulreife (Vater)" = df_filtered %>%
    filter(V1234 %in% c(4, 5), V545 <= 10) %>%
    pull(V545)
)

# Optionally access each group as a vector
kein_abschluss_vater <- life_satisfaction_by_fathers_education[["Kein Abschluss (Vater)"]]
hauptschule_vater <- life_satisfaction_by_fathers_education[["Hauptschule (Vater)"]]
mittlere_reife_vater <- life_satisfaction_by_fathers_education[["Mittlere Reife (Vater)"]]
hochschulreife_vater <- life_satisfaction_by_fathers_education[["(Fach-)Hochschulreife (Vater)"]]

# Perform Welch's t-test (unequal variance)
t.test(hochschulreife_vater, hauptschule_vater,
       alternative = "two.sided", var.equal = FALSE, na.rm = TRUE)




	Welch Two Sample t-test

data:  hochschulreife_vater and hauptschule_vater
t = 4.3955, df = 2705.4, p-value = 1.148e-05
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 0.1178859 0.3077738
sample estimates:
mean of x mean of y 
 7.737821  7.524991 
