# FMZ Übung 8: **Ensemble Learning**

---
### Ziel der Übung
- Verstehen, warum Ensemble-Learning oft besser ist als Einzelmodelle
- Regression mit mehreren ML-Algorithmen lösen
- Einfache Ensemble-Strategien anwenden:
   - Mean-Ensemble (Durchschnitt)
   - Min-Ensemble (false-negative-freundlichen Vorhersagen)
   - Max-Ensemble (false-positive-freundlichen Vorhersagen)
   - Stacking-Methoden (Metaheuristik)


---
### Benötigte Pakete

In [None]:
install.packages(c("randomForest","e1071","neuralnet"))
library(ggplot2)
library(randomForest)
library(e1071)
library(neuralnet)
library(MASS)
library(rpart)

---
## Realdaten: **Boston Housing**
Wir verwenden das bekannte Datenset `Boston`.
- Zielvariable: `medv` (Hauspreis)
- Inputs: alle anderen Variablen

In [None]:

df <- Boston
?Boston
summary(df)


---
## Aufgabe 1: **Train/Test Datenaufteilen (80/20)**

In [3]:

set.seed(2)
id <- sample(1:nrow(df), 0.8*nrow(df))
train <- df[id, ]
test  <- df[-id, ]


---
## Aufgabe 2: **Fehlermetriken (RMSE, MAE, R²)**

In [4]:

RMSE <- function(y, yhat) sqrt(mean((y-yhat)^2))
MAE  <- function(y, yhat) mean(abs(y-yhat))
R2   <- function(y, yhat) 1 - var(y-yhat)/var(y)


---
## Aufgabe 3: **Drei Regressoren trainieren**

In [5]:

# Linear
m_lm  <- lm(medv ~ ., data=train)

# Random Forest
m_rf  <- randomForest(medv ~ ., data=train, ntree=300)

# SVM
m_svm <- svm(medv ~ ., data=train)


---
## Aufgabe 4: **Einzelprognosen erzeugen**

In [None]:

p_lm  <- predict(m_lm, test)
p_rf  <- predict(m_rf, test)
p_svm <- predict(m_svm, test)


models <- list(
  LM = p_lm,
  RF = p_rf,
  SVM = p_svm
)

sapply(models, function(pred) c(
  RMSE = RMSE(test$medv, pred),
  MAE  = MAE(test$medv, pred),
  R2   = R2(test$medv, pred)
))


---
## Aufgabe 5: **Einfache Ensemble-Methoden anwenden**

In [7]:

# Mean-Ensemble
p_mean <- (p_lm + p_rf + p_svm)/3

# Min-Ensemble
p_min  <- pmin(p_lm, p_rf, p_svm)

# Max-Ensemble
p_max  <- pmax(p_lm, p_rf, p_svm)


---
## Aufgabe 6: **Ensemble-Methode mit gewichteten Mitteln anwenden**

In [8]:

# Gewichteten Mittel mit R²
R2_lm  <- R2(test$medv, p_lm)
R2_rf  <- R2(test$medv, p_rf)
R2_svm <- R2(test$medv, p_svm)

w <- c(R2_lm, R2_rf, R2_svm)
w <- w / sum(w)   # auf (0,1)-Skala normalisieren

p_R2 <- w[1]*p_lm + w[2]*p_rf + w[3]*p_svm

# Optimale Gewichte
objective <- function(w_raw){
  w <- abs(w_raw) / sum(abs(w_raw))
  p <- w[1]*p_lm + w[2]*p_rf + w[3]*p_svm
  RMSE(test$medv, p)
}

opt <- optim(
  par = c(1/3, 1/3, 1/3),
  fn = objective,
  method = "Nelder-Mead"
)

w_opt <- abs(opt$par) / sum(abs(opt$par))

p_opt <- w_opt[1]*p_lm + w_opt[2]*p_rf + w_opt[3]*p_svm


---
## Aufgabe 8: **Stacking-Ensemble-Methode anwenden**

In [13]:

# Stacking-Methode
meta_train <- data.frame(
  train[, -which(names(train)=="medv")],
  lm  = predict(m_lm, train),
  rf  = predict(m_rf, train),
  svm = predict(m_svm, train),
  y   = train$medv
)

meta_test <- data.frame(
  test[, -which(names(test)=="medv")],
  lm  = p_lm,
  rf = p_rf,
  svm  = p_svm,
  y   = test$medv
)

meta_model <- svm(y ~ ., data = meta_train)

p_stack <- predict(meta_model, newdata = meta_test)


---
## Aufgabe 9: **Fehlermetriken berechnen**

In [None]:

models <- list(
  LM = p_lm,
  RF = p_rf,
  SVM = p_svm,
  EN_mean = p_mean,
  EN_min  = p_min,
  EN_max  = p_max,
  EN_meanR2 = p_R2,
  EN_meanOPT = p_opt,
  EN_stack = p_stack
)

sapply(models, function(pred) c(
  RMSE = RMSE(test$medv, pred),
  MAE  = MAE(test$medv, pred),
  R2   = R2(test$medv, pred)
))


---
## Aufgabe 10: **Prognosen grafisch darstellen**

In [None]:

# Plotfunktion
plot_method <- function(method_name, y_true, y_pred){
  r2  <- round(R2(y_true, y_pred), 3)
  rmse <- round(RMSE(y_true, y_pred), 3)

  plot(y_true, y_pred,
       main=paste(method_name, "\nR²=", r2, " RMSE=", rmse),
       xlab="Real", ylab="Predicted",
       pch=16, col=gray(0.4))
   abline(a=0, b=1, col=2)
}

par(mfrow=c(2,2))
for(m in names(models)){
  plot_method(m, test$medv, models[[m]])
}
