# Datenaufbereitung

In [62]:
# Import der benötigten Bibliotheken
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Datensatz Apple Aktie einlesen
df = pd.read_csv("AAPL.csv", index_col = ["Date"], parse_dates=["Date"]).drop(["Adj Close"], axis = 1)

In [63]:
print(df.head())

                Open      High       Low     Close     Volume
Date                                                         
1980-12-12  0.128348  0.128906  0.128348  0.128348  469033600
1980-12-15  0.122210  0.122210  0.121652  0.121652  175884800
1980-12-16  0.113281  0.113281  0.112723  0.112723  105728000
1980-12-17  0.115513  0.116071  0.115513  0.115513   86441600
1980-12-18  0.118862  0.119420  0.118862  0.118862   73449600


In [64]:
print(df.info())
print(df.describe())
print(df.isna().sum())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 10683 entries, 1980-12-12 to 2023-04-27
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    10683 non-null  float64
 1   High    10683 non-null  float64
 2   Low     10683 non-null  float64
 3   Close   10683 non-null  float64
 4   Volume  10683 non-null  int64  
dtypes: float64(4), int64(1)
memory usage: 500.8 KB
None
               Open          High           Low         Close        Volume
count  10683.000000  10683.000000  10683.000000  10683.000000  1.068300e+04
mean      17.475500     17.675462     17.281943     17.486984  3.256989e+08
std       36.877935     37.325868     36.457515     36.912783  3.373485e+08
min        0.049665      0.049665      0.049107      0.049107  0.000000e+00
25%        0.288783      0.297991      0.283482      0.290179  1.197112e+08
50%        0.491071      0.500000      0.484375      0.491607  2.127776e+08
75%       17.223928     17.

In [53]:
# Schlusskurs des darauffolgenden Tages in den Datensatz mitaufnehmen
df["close_next"] = df["Close"].shift(-1)
df = df.dropna()

In [54]:
# Importieren des aufbereiteten Datensatzes des NASDAQ Composite Index
%store -r df_IXIC

In [55]:
print(df_IXIC.head())

            diff_IXIC
Date                 
1980-12-12   1.308610
1980-12-15   1.464284
1980-12-16  -1.102979
1980-12-17   1.115280
1980-12-18   1.025663


In [61]:
# Erstellung einer neuen Spalte mit der täglichen prozentualen Veränderung des NASDAQ Composite Index
df["IXIC"] = df_IXIC["diff_IXIC"]

In [57]:
print(df.head(-1))

                  Open        High         Low       Close     Volume   
Date                                                                    
1980-12-12    0.128348    0.128906    0.128348    0.128348  469033600  \
1980-12-15    0.122210    0.122210    0.121652    0.121652  175884800   
1980-12-16    0.113281    0.113281    0.112723    0.112723  105728000   
1980-12-17    0.115513    0.116071    0.115513    0.115513   86441600   
1980-12-18    0.118862    0.119420    0.118862    0.118862   73449600   
...                ...         ...         ...         ...        ...   
2023-04-19  165.800003  168.160004  165.539993  167.630005   47720200   
2023-04-20  166.089996  167.869995  165.559998  166.649994   52456400   
2023-04-21  165.050003  166.449997  164.490005  165.020004   58311900   
2023-04-24  165.000000  165.600006  163.889999  165.330002   41949600   
2023-04-25  165.190002  166.309998  163.729996  163.770004   48714100   

            close_next      IXIC  
Date           

In [58]:
# Hinzunahme des 20 Tage EMA in den Datensatz
%store -r ema_list

del ema_list[-1]

df.drop(df.index[:20], inplace = True)

ema_list = pd.Series(ema_list, index=df.index)

df["ema_20"] = ema_list

In [59]:
print(df.head())

                Open      High       Low     Close    Volume  close_next   
Date                                                                       
1981-01-13  0.136719  0.136719  0.136161  0.136161  23049600    0.136719  \
1981-01-14  0.136719  0.137277  0.136719  0.136719  14291200    0.139509   
1981-01-15  0.139509  0.140625  0.139509  0.139509  14067200    0.138393   
1981-01-16  0.138951  0.138951  0.138393  0.138393  13395200    0.146763   
1981-01-19  0.146763  0.147321  0.146763  0.146763  41574400    0.142299   

                IXIC    ema_20  
Date                            
1981-01-13 -0.196532  0.138357  
1981-01-14  0.520070  0.138201  
1981-01-15  0.386781  0.138326  
1981-01-16  0.640480  0.138332  
1981-01-19  0.223734  0.139135  


In [60]:
# Speicher des Datensatzes als df
%store df

Stored 'df' (DataFrame)
