In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objs as go

from keras.models import Sequential, load_model
from keras.layers import LSTM, Dense
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

from sklearn.linear_model import BayesianRidge
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score



df = pd.read_csv('GOOGL_historical_data.csv')

# Use only the "Close" price (raw values, no scaling)
close_data = df['Close'].values.reshape(-1, 1)

# Split into train/test sets (80% / 20%)
split_percent = 0.8
split_index = int(len(close_data) * split_percent)
close_train = close_data[:split_index]
close_test = close_data[split_index:]

date_train = df['Date'][:split_index]
date_test = df['Date'][split_index:]

# Define lookback window
look_back = 5




In [2]:
from docx import Document

# === TIMESERIES GENERATORS ===
train_generator = TimeseriesGenerator(close_train, close_train, length=look_back, batch_size=1)
test_generator = TimeseriesGenerator(close_test, close_test, length=look_back, batch_size=1)

# === LSTM MODEL (Load or Train) ===
if os.path.exists("lstm_model_raw.h5"):
    model = load_model("lstm_model_raw.h5", compile=False)
    print("✅ LSTM model loaded.")
else:
    model = Sequential()
    model.add(LSTM(10, activation='relu', input_shape=(look_back, 1)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    model.fit(train_generator, epochs=50, verbose=0)  # Training mit Rohdaten
    #model.save("lstm_model_raw.h5")
    print("✅ LSTM model trained and saved.")

# === FUNKTION ZUR AUSWERTUNG EINES ZEITRAUMS ===
def evaluate_period(label, close_data, dates, look_back, model):
    results = {}
    if len(close_data) > look_back:
        generator = TimeseriesGenerator(close_data, close_data, length=look_back, batch_size=1)
        prediction = model.predict(generator).reshape(-1)
        actual_adj = close_data.reshape(-1)[look_back:]
        dates_adj = dates[look_back:]

        mse = mean_squared_error(actual_adj, prediction)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(actual_adj, prediction)
        r2 = r2_score(actual_adj, prediction)

        mean_price = np.mean(actual_adj)
        rel_rmse = rmse / mean_price * 100
        rel_mae = mae / mean_price * 100
        mape = np.mean(np.abs((actual_adj - prediction) / actual_adj)) * 100

        results = {
            "label": label,
            "MSE": mse,
            "RMSE": rmse,
            "Rel_RMSE": rel_rmse,
            "MAE": mae,
            "Rel_MAE": rel_mae,
            "MAPE": mape,
            "R2": r2
        }
    return results

# === SPEZIFISCHE ZEITRÄUME DEFINIEREN ===
time_periods = {
    "Gesamter Testsplit": (date_test.min(), date_test.max()),
    "Finanzkrise 2008": ("2008-09-01", "2009-03-31"),
    "Corona-Krise": ("2020-03-01", "2020-06-30"),
    "Russland-Ukraine Krieg": ("2022-02-24", "2022-06-30"),
    "Stabiler Aufwärtstrend": ("2017-01-01", "2017-12-31")
}

# === ALLE ERGEBNISSE SAMMELN ===
all_results = []

for label, (start, end) in time_periods.items():
    mask = (df['Date'] >= start) & (df['Date'] <= end)
    close_period = df.loc[mask, 'Close'].values.reshape(-1,1)
    dates_period = df.loc[mask, 'Date'].values
    res = evaluate_period(label, close_period, dates_period, look_back, model)
    if res:
        all_results.append(res)

# === ERGEBNISSE IN WORD SPEICHERN ===
doc = Document()
doc.add_heading("LSTM Modell Performance Ergebnisse", level=1)

for res in all_results:
    doc.add_heading(res["label"], level=2)
    doc.add_paragraph(f"MSE: {res['MSE']:.4f}")
    doc.add_paragraph(f"RMSE: {res['RMSE']:.4f}  |  Rel. RMSE: {res['Rel_RMSE']:.2f}%")
    doc.add_paragraph(f"MAE: {res['MAE']:.4f}   |  Rel. MAE: {res['Rel_MAE']:.2f}%")
    doc.add_paragraph(f"MAPE: {res['MAPE']:.2f}%")
    doc.add_paragraph(f"R²: {res['R2']:.4f}")

output_path = "LSTM_Performance.docx"
doc.save(output_path)
print(f"\n✅ Ergebnisse wurden gespeichert in: {output_path}")


  super().__init__(**kwargs)
  self._warn_if_super_not_called()


KeyboardInterrupt: 

In [3]:
#Bayesian Modell

#  Helper function to convert data into input/output format manually
def create_dataset(data, look_back):
    X, y = [], []
    for i in range(len(data) - look_back):
        X.append(data[i:i+look_back, 0])  # Input: sequence of past values
        y.append(data[i + look_back, 0])  # Output: next value
    return np.array(X), np.array(y)

# Prepare training and test data for regression models (using raw prices)
X_train, y_train = create_dataset(close_train, look_back)
X_test, y_test = create_dataset(close_test, look_back)

# Initialize and train a Bayesian Ridge Regression model
bayesian_model = BayesianRidge()
bayesian_model.fit(X_train, y_train)

# Make predictions on test data (no inverse transform needed)
bayesian_pred = bayesian_model.predict(X_test)

# Actual target values (raw)
y_test_actual = y_test
date_test_bayes_adj = date_test[look_back:]

# === PERFORMANCE METRICS (Bayesian Regression) ===
mse_bayes = mean_squared_error(y_test_actual, bayesian_pred)
rmse_bayes = np.sqrt(mse_bayes)
mae_bayes = mean_absolute_error(y_test_actual, bayesian_pred)
r2_bayes = r2_score(y_test_actual, bayesian_pred)

print("\n📊 Bayesian Regression Model Performance (No Scaling):")
print(f"Mean Squared Error (MSE): {mse_bayes:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse_bayes:.4f}")
print(f"Mean Absolute Error (MAE): {mae_bayes:.4f}")
print(f"R² Score: {r2_bayes:.4f}")


📊 Bayesian Regression Model Performance (No Scaling):
Mean Squared Error (MSE): 6.0723
Root Mean Squared Error (RMSE): 2.4642
Mean Absolute Error (MAE): 1.7853
R² Score: 0.9909


In [10]:

from docx import Document
import matplotlib.pyplot as plt
from docx.shared import Inches
import matplotlib.pyplot as plt
import numpy as np
from docx.shared import Inches

# === Helper-Funktion: Daten in [X, y] Format bringen ===
def create_dataset(data, look_back):
    X, y = [], []
    for i in range(len(data) - look_back):
        X.append(data[i:i+look_back, 0])  # Input: Sequenz der letzten Werte
        y.append(data[i + look_back, 0])  # Output: nächster Wert
    return np.array(X), np.array(y)

# === Daten vorbereiten (Rohpreise, kein Scaling) ===
X_train, y_train = create_dataset(close_train, look_back)
X_test, y_test = create_dataset(close_test, look_back)

# === Modell initialisieren und trainieren ===
bayesian_model = BayesianRidge()
bayesian_model.fit(X_train, y_train)

# === Vorhersagen auf Testdaten ===
bayesian_pred = bayesian_model.predict(X_test)

# === Tatsächliche Werte (Testdaten) ===
y_test_actual = y_test
date_test_bayes_adj = date_test[look_back:]

# === Helper function für Bayesian Regression Zeitauswertung ===
def evaluate_period_bayes(label, close_data, dates, look_back, model):
    results = {}
    if len(close_data) > look_back:
        # Daten in [X, y] Format umwandeln
        X, y = create_dataset(close_data, look_back)
        pred = model.predict(X)
        actual_adj = y
        dates_adj = dates[look_back:]

        # Klassische Fehlermaße
        mse = mean_squared_error(actual_adj, pred)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(actual_adj, pred)
        r2 = r2_score(actual_adj, pred)

        # Relative Fehler
        mean_price = np.mean(actual_adj)
        rel_rmse = rmse / mean_price * 100
        rel_mae = mae / mean_price * 100
        mape = np.mean(np.abs((actual_adj - pred) / actual_adj)) * 100

        # Ergebnisse sammeln
        results = {
            "label": label,
            "MSE": mse,
            "RMSE": rmse,
            "Rel_RMSE": rel_rmse,
            "MAE": mae,
            "Rel_MAE": rel_mae,
            "MAPE": mape,
            "R2": r2
        }
    return results

# === SPEZIFISCHE ZEITRÄUME DEFINIEREN ===
time_periods = {
    "Gesamter Testsplit": (date_test.min(), date_test.max()),
    "Finanzkrise 2008": ("2008-09-01", "2009-03-31"),
    "Corona-Krise": ("2020-03-01", "2020-06-30"),
    "Russland-Ukraine Krieg": ("2022-02-24", "2022-06-30"),
    "Stabiler Aufwärtstrend": ("2017-01-01", "2017-12-31")
}

# === ALLE ERGEBNISSE SAMMELN ===
all_results = []

for label, (start, end) in time_periods.items():
    mask = (df['Date'] >= start) & (df['Date'] <= end)
    close_period = df.loc[mask, 'Close'].values.reshape(-1,1)
    dates_period = df.loc[mask, 'Date'].values
    res = evaluate_period_bayes(label, close_period, dates_period, look_back, bayesian_model)
    if res:
        all_results.append(res)

# === ERGEBNISSE IN WORD SPEICHERN ===
doc = Document()
doc.add_heading("Bayesian Regression Performance Ergebnisse", level=1)

for res in all_results:
    doc.add_heading(res["label"], level=2)
    doc.add_paragraph(f"MSE: {res['MSE']:.4f}")
    doc.add_paragraph(f"RMSE: {res['RMSE']:.4f}  |  Rel. RMSE: {res['Rel_RMSE']:.2f}%")
    doc.add_paragraph(f"MAE: {res['MAE']:.4f}   |  Rel. MAE: {res['Rel_MAE']:.2f}%")
    doc.add_paragraph(f"MAPE: {res['MAPE']:.2f}%")
    doc.add_paragraph(f"R²: {res['R2']:.4f}")

output_path = "Bayesian_Performance.docx"
doc.save(output_path)
print(f"\n✅ Ergebnisse wurden gespeichert in: {output_path}")


from docx import Document
from docx.shared import Inches
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error

from docx import Document
from docx.shared import Inches
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.metrics import mean_absolute_error

# === ERGEBNISSE IN WORD SPEICHERN + RESIDUALPLOTS (vereinfacht) ===
doc = Document()
doc.add_heading("Bayesian Regression Performance Ergebnisse", level=1)

for res in all_results:
    # --- Standard-Kennzahlen ins Word-Dokument ---
    doc.add_heading(res["label"], level=2)
    doc.add_paragraph(f"MSE: {res['MSE']:.4f}")
    doc.add_paragraph(f"RMSE: {res['RMSE']:.4f}  |  Rel. RMSE: {res['Rel_RMSE']:.2f}%")
    doc.add_paragraph(f"MAE: {res['MAE']:.4f}   |  Rel. MAE: {res['Rel_MAE']:.2f}%")
    doc.add_paragraph(f"MAPE: {res['MAPE']:.2f}%")
    doc.add_paragraph(f"R²: {res['R2']:.4f}")
 # --- Residuen berechnen ---
    mask = (df['Date'] >= time_periods[res["label"]][0]) & (df['Date'] <= time_periods[res["label"]][1])
    close_period = df.loc[mask, 'Close'].values.reshape(-1, 1)
    dates_period = pd.to_datetime(df.loc[mask, 'Date'])   # als echtes Datum behalten!

    # Predictions für diesen Zeitraum generieren
    X, y_actual = [], []
    for i in range(look_back, len(close_period)):
        X.append(close_period[i-look_back:i])
        y_actual.append(close_period[i])
    X = np.array(X).reshape(len(X), look_back)
    y_actual = np.array(y_actual).flatten()
    y_pred = bayesian_model.predict(X)

    residuals = y_actual - y_pred
    plot_dates = dates_period.iloc[look_back:]   # Datumsreihe für Residuen

    # === Residualplot ===
    mean_error = np.mean(residuals)
    mae = mean_absolute_error(y_actual, y_pred)

    plt.figure(figsize=(10, 5))
    
    # Residuen als Linie mit Punkten
    plt.plot(plot_dates, residuals, color="orange", marker="o", markersize=2, linewidth=1)

    # Horizontale Linien für Bias (ME) und ±MAE
    plt.axhline(mean_error, color="red", linestyle="--", linewidth=1)
    plt.axhline(mae, color="blue", linestyle=":", linewidth=1)
    plt.axhline(-mae, color="blue", linestyle=":", linewidth=1)

    # Layout
    plt.title(f"📉 Residuals – {res['label']}")
    plt.xlabel("Date")
    plt.ylabel("Prediction Error (Actual - Predicted)")

    # X-Achse: Ticks nur alle 3 Monate
    ax = plt.gca()
    ax.xaxis.set_major_locator(mdates.MonthLocator(interval=3))
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%b %Y"))

    plt.xticks(rotation=45)
    plt.tight_layout()

    # Plot speichern und ins Word einfügen
    img_path = f"residuals_{res['label'].replace(' ', '_')}.png"
    plt.savefig(img_path, dpi=150)
    plt.close()
    doc.add_picture(img_path, width=Inches(5.5))


✅ Ergebnisse wurden gespeichert in: Bayesian_Performance.docx


  plt.tight_layout()
  plt.savefig(img_path, dpi=150)
  plt.tight_layout()
  plt.savefig(img_path, dpi=150)
  plt.tight_layout()
  plt.savefig(img_path, dpi=150)
  plt.tight_layout()
  plt.savefig(img_path, dpi=150)
  plt.tight_layout()
  plt.savefig(img_path, dpi=150)


In [None]:
### LSTM mit MinMaxScaler

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# Normalize to range [0, 1]
scaler = MinMaxScaler()
close_data_scaled = scaler.fit_transform(close_data)

# Split into train/test sets (80% / 20%)
split_percent = 0.8
split_index = int(len(close_data_scaled) * split_percent)
close_train = close_data_scaled[:split_index]
close_test = close_data_scaled[split_index:]

date_train = df['Date'][:split_index]
date_test = df['Date'][split_index:]

# Define lookback window
look_back = 10

# === TIMESERIES GENERATORS ===
train_generator = TimeseriesGenerator(close_train, close_train, length=look_back, batch_size=1)
test_generator = TimeseriesGenerator(close_test, close_test, length=look_back, batch_size=1)

# === LSTM MODEL (Load or Train) ===
if os.path.exists("lstm_model.h5"):
    model = load_model("lstm_model.h5", compile=False)
    print("✅ LSTM model loaded.")
else:
    model = Sequential()
    model.add(LSTM(10, activation='relu', input_shape=(look_back, 1)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    model.fit(train_generator, epochs=8, verbose=1)  # you can change epochs for testing
    #model.save("lstm_model.h5")
    print("✅ LSTM model trained and saved.")

# === PREDICTION ===
prediction_scaled = model.predict(test_generator)
prediction = scaler.inverse_transform(prediction_scaled.reshape(-1, 1)).reshape(-1)

# Actual test values
close_test_actual = scaler.inverse_transform(close_test).reshape(-1)

# Adjust for lookback
close_test_actual_adj = close_test_actual[look_back:]
date_test_lstm_adj = date_test[look_back:]

# === PERFORMANCE METRICS ===
mse = mean_squared_error(close_test_actual_adj, prediction)
rmse = np.sqrt(mse)
mae = mean_absolute_error(close_test_actual_adj, prediction)
r2 = r2_score(close_test_actual_adj, prediction)

print("\n📊 LSTM Model Performance:")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"R² Score: {r2:.4f}")

Epoch 1/8


  super().__init__(**kwargs)
  self._warn_if_super_not_called()


[1m4090/4090[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 1ms/step - loss: 6.1821e-04
Epoch 2/8
[1m4090/4090[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - loss: 3.7920e-05
Epoch 3/8
[1m4090/4090[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - loss: 3.5902e-05
Epoch 4/8
[1m4090/4090[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - loss: 3.2310e-05
Epoch 5/8
[1m4090/4090[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - loss: 3.0979e-05
Epoch 6/8
[1m4090/4090[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - loss: 3.1041e-05
Epoch 7/8
[1m4090/4090[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - loss: 2.9389e-05
Epoch 8/8
[1m4090/4090[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step - loss: 2.7476e-05
✅ LSTM model trained and saved.
[1m1015/1015[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 669us/step

📊 LSTM Model Performance:
Mean Squared Error (MSE)