# Nachbildung Mathematischer Operationen durch ML-Modelle

## 1. Erstellung der Datensätze

In [None]:
import numpy as np
import pandas as pd

N = 100000

x1 = np.random.uniform(-10, 10, N)
x2 = np.random.uniform(-10, 10, N)

y = x1 + x2   # oder x1 * x2, x1 - x2

data = pd.DataFrame({
    "x1": x1,
    "x2": x2,
    "y": y
})

data.to_csv("addition_dataset.csv", index=False)


## 2. SVM 

In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
import numpy as np

### 2.1 Addition

In [12]:


# Load the dataset
data = pd.read_csv("addition_dataset.csv")

X = data[["x1", "x2"]].values
y = data["y"].values

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


svr = SVR(kernel="linear", C=10, epsilon=0.01)
svr.fit(X_train, y_train)

# Make predictions on Test set
y_pred = svr.predict(X_test)


# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
relative_error = np.mean(np.abs(y_test - y_pred) / (np.abs(y_test) + 1e-8))

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")
print(f"Mean Relative Error: {relative_error}")

print("C:", svr.C)
print("Epsilon:", svr.epsilon)
print("Kernel:", svr.kernel)
print("Gamma:", svr.gamma if hasattr(svr, "gamma") else None)


Mean Squared Error: 1.732199741685354e-05
Mean Absolute Error: 0.0034251641630859268
Mean Relative Error: 0.0013018782870338798
C: 10
Epsilon: 0.01
Kernel: linear
Gamma: scale


### 2.2 Multiplikation - rbf-Kernel

In [13]:

# Load the dataset
data = pd.read_csv("multiplication_dataset.csv")

data = data.sample(n=20000, random_state=42)


X = data[["x1", "x2"]].values
y = data["y"].values

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


svr = SVR(kernel="rbf", C=10, gamma="scale", epsilon=0.01)
svr.fit(X_train, y_train)

# Make predictions on Test set
y_pred = svr.predict(X_test)


# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
relative_error = np.mean(np.abs(y_test - y_pred) / (np.abs(y_test) + 1e-8))

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")
print(f"Mean Relative Error: {relative_error}")

print("C:", svr.C)
print("Epsilon:", svr.epsilon)
print("Kernel:", svr.kernel)
print("Gamma:", svr.gamma if hasattr(svr, "gamma") else None)

Mean Squared Error: 0.030964836792467337
Mean Absolute Error: 0.04658233828763949
Mean Relative Error: 0.0021908643269368507
C: 10
Epsilon: 0.01
Kernel: rbf
Gamma: scale


## 3. Entscheidungsbäume & Random Forests

### 3.1 Addition Entscheidungsbäume

In [14]:


# CSV laden
data = pd.read_csv("addition_dataset.csv")

# Features und Zielvariable
X = data[["x1", "x2"]]
y = data["y"]

# Train-Test-Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Entscheidungsbaum (Regression)
model = DecisionTreeRegressor(
    max_depth=5,          # Begrenzung gegen Overfitting
    random_state=42
)

# Training
model.fit(X_train, y_train)

# Vorhersage
y_pred = model.predict(X_test)

# Evaluation
r2 = r2_score(y_test, y_pred)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
relative_error = np.mean(np.abs(y_test - y_pred) / (np.abs(y_test) + 1e-8))

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")
print(f"Mean Relative Error: {relative_error}")

print(f"MSE: {mse:.4f}")
print(f"R²: {r2:.4f}")


Mean Squared Error: 2.5472653238507763
Mean Absolute Error: 1.3356850223180594
Mean Relative Error: 1.7176408068193347
MSE: 2.5473
R²: 0.9617


### 3.2 Entscheidungsbaum - Multiplikation

In [15]:

# CSV laden
data = pd.read_csv("multiplication_dataset.csv")

# Features und Zielvariable
X = data[["x1", "x2"]]
y = data["y"]

# Train-Test-Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Entscheidungsbaum (Regression)
model = DecisionTreeRegressor(
    max_depth=5,          # Begrenzung gegen Overfitting
    random_state=42
)

# Training
model.fit(X_train, y_train)

# Vorhersage
y_pred = model.predict(X_test)

# Evaluation
r2 = r2_score(y_test, y_pred)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
relative_error = np.mean(np.abs(y_test - y_pred) / (np.abs(y_test) + 1e-8))

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")
print(f"Mean Relative Error: {relative_error}")

print(f"MSE: {mse:.4f}")
print(f"R²: {r2:.4f}")


Mean Squared Error: 280.94728577859456
Mean Absolute Error: 13.00443039944547
Mean Relative Error: 5.905916185772301
MSE: 280.9473
R²: 0.7466


### 3.3 Random Forest - Additon

In [16]:



# CSV laden
data = pd.read_csv("addition_dataset.csv")

# Features und Zielvariable
X = data[["x1", "x2"]]
y = data["y"]

# Train-Test-Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Random Forest mit Bagging
model = RandomForestRegressor(
    n_estimators=200,      # Anzahl Bäume
    max_depth=10,          # Begrenzung der Baumtiefe
    bootstrap=True,        # Bagging (Standard, hier explizit)
    random_state=42,
    n_jobs=-1
)

# Training
model.fit(X_train, y_train)

# Vorhersage
y_pred = model.predict(X_test)

# Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
relative_error = np.mean(np.abs(y_test - y_pred) / (np.abs(y_test) + 1e-8))

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")
print(f"Mean Relative Error: {relative_error}")
print(f"R²: {r2:.4f}")


Mean Squared Error: 0.00900843232429587
Mean Absolute Error: 0.07167191405536459
Mean Relative Error: 0.09854638980988975
R²: 0.9999


### 3.4 Random Forest - Multiplikation

In [3]:
# CSV laden
data = pd.read_csv("multiplication_dataset.csv")

# Features und Zielvariable
X = data[["x1", "x2"]]
y = data["y"]

# Train-Test-Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Random Forest mit Bagging
model = RandomForestRegressor(
    n_estimators=200,      # Anzahl Bäume
    max_depth=10,          # Begrenzung der Baumtiefe
    bootstrap=True,        # Bagging (Standard, hier explizit)
    random_state=42,
    n_jobs=-1
)

# Training
model.fit(X_train, y_train)

# Vorhersage
y_pred = model.predict(X_test)

# Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
relative_error = np.mean(np.abs(y_test - y_pred) / (np.abs(y_test) + 1e-8))

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")
print(f"Mean Relative Error: {relative_error}")
print(f"R²: {r2:.4f}")


Mean Squared Error: 4.211337741312613
Mean Absolute Error: 1.356066506751319
Mean Relative Error: 0.24003553792986318
R²: 0.9962
