# Bibliotheken


In [1]:
# SP_AP1_DataScience_02_DataPreparation.ipynb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import os


def prepare_data(filepath, ticker, asset_type):
    try:
        stock_data = pd.read_csv(filepath, index_col=False, parse_dates=['Date'], thousands=',')
        stock_data.set_index('Date', inplace=True)

        for col in ['Volume', 'Open', 'Close', 'High', 'Low', 'Adj Close']:
            if stock_data[col].dtype == object:
                stock_data[col] = (stock_data[col].str.replace('M', '*10**6').str.replace('B', '*10**9')).apply(lambda x: pd.eval(x, target=stock_data))

        stock_data.fillna(stock_data.mean(), inplace=True)

        # --- Feature Engineering ---
        stock_data["SMA_50"] = stock_data["Close"].rolling(window=50).mean()
        stock_data["SMA_200"] = stock_data["Close"].rolling(window=200).mean()

        # Bollinger Bänder
        stock_data['SMA_20'] = stock_data['Close'].rolling(window=20).mean()
        stock_data['StdDev_20'] = stock_data['Close'].rolling(window=20).std()
        stock_data['Upper'] = stock_data['SMA_20'] + 2 * stock_data['StdDev_20']
        stock_data['Lower'] = stock_data['SMA_20'] - 2 * stock_data['StdDev_20']

        # Relative Stärke Index (RSI)
        delta = stock_data['Close'].diff()
        up = delta.clip(lower=0)
        down = -1 * delta.clip(upper=0)
        ema_up = up.ewm(com=13, adjust=False).mean()
        ema_down = down.ewm(com=13, adjust=False).mean()
        rs = ema_up / ema_down
        stock_data['RSI'] = 100 - (100 / (1 + rs))

        # MACD (Moving Average Convergence Divergence)
        stock_data['EMA_12'] = stock_data['Close'].ewm(span=12, adjust=False).mean()
        stock_data['EMA_26'] = stock_data['Close'].ewm(span=26, adjust=False).mean()
        stock_data['MACD'] = stock_data['EMA_12'] - stock_data['EMA_26']
        stock_data['Signal'] = stock_data['MACD'].ewm(span=9, adjust=False).mean()



        # --- Datenvorbereitung für das Modell ---
        features_to_scale = ['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',
                           'SMA_50', 'SMA_200', 'SMA_20', 'StdDev_20', 'Upper', 'Lower', 'RSI',
                           'EMA_12', 'EMA_26', 'MACD', 'Signal']
        scaler = MinMaxScaler()
        stock_data[features_to_scale] = scaler.fit_transform(stock_data[features_to_scale])


        # --- Visualisierung ---
        plt.figure(figsize=(12, 6))
        plt.plot(stock_data["Close"], label="Close")
        plt.plot(stock_data["SMA_50"], label="SMA 50")
        plt.plot(stock_data["SMA_200"], label="SMA 200")
        plt.title(f"Aktienkurs und gleitende Durchschnitte von {ticker}")
        plt.legend()
        plt.show()

        # --- Daten speichern ---
        output_folder = "financial_data"  # Definieren Sie den Ausgabeordner
        prepared_filename = f"prepared_{ticker.replace('^', '')}_{asset_type.lower()}_data.csv"
        stock_data.to_csv(os.path.join(output_folder, prepared_filename)) #  os.path.join verwenden

    except FileNotFoundError:
        print(f"Fehler: Datei '{filepath}' nicht gefunden.")
        return None
    except Exception as e:
        print(f"Ein Fehler ist aufgetreten: {e}")
        return None
    
    return stock_data


# --- Hauptteil des Skripts ---

output_folder = "financial_data"
tickers = {
    "Stocks": ["AAPL", "MSFT", "GOOG"],
    "ETFs": ["ACWI", "SPY"],
    "Indices": ["^GSPC", "^DJI"],
}

for asset_type, ticker_list in tickers.items():
    for ticker in ticker_list:
        filename = f"{ticker}_{asset_type.lower()}_data.csv"
        filepath = os.path.join(output_folder, filename) # output_folder verwenden
        
        prepared_data = prepare_data(filepath, ticker, asset_type)  # asset_type übergeben

    if prepared_data is not None:
        prepared_data.to_excel("prepared_data.xlsx")  # Als Excel-Datei speichern

    

ModuleNotFoundError: No module named 'matplotlib'