In [14]:
import pyodbc
import yfinance as yf
import pandas as pd
from sqlalchemy import create_engine
import datetime
from datetime import date, timedelta
from statsmodels.tsa.arima.model import ARIMA
import statsmodels.api as sm

# Verbindungsstring
conn_str = (
    r'Driver=SQL Server;'
    r'Server=.\SQLEXPRESS;'
    r'Database=studienprojekt;'
    r'Trusted_Connection=yes;'
)

# Verbindung zur Datenbank herstellen
cnxn = pyodbc.connect(conn_str)

# SQLAlchemy connectable erstellen
engine = create_engine('mssql+pyodbc://', creator=lambda: cnxn)

# Tabelle prüfen
table_name = "stock_data"  # Name der Zieltabelle
start_date = "2022-07-01"  # Startdatum, ab dem Daten überprüft werden sollen
end_date = "2023-07-01"  # Enddatum, bis zu dem Daten überprüft werden sollen

# Daten aus der Tabelle in einen DataFrame laden
select_query = f"SELECT * FROM {table_name} WHERE date >= ? AND date <= ?"
data = pd.read_sql(select_query, con=engine, params=(start_date, end_date))

# added
data["date"] = pd.to_datetime(data["date"])
data = data[["date", "open", "high", "low", "close", "volume", "type", "company"]]
data.reset_index(drop=True, inplace=True)

# Verbindung schließen
cnxn.close()

# DataFrame anzeigen
print(data.tail())




          date        open        high         low       close    volume type  \
246 2023-06-26  186.830002  188.050003  185.229996  185.270004  48088700  ACT   
247 2023-06-27  185.889999  188.389999  185.669998  188.059998  50730800  ACT   
248 2023-06-28  187.929993  189.899994  187.600006  189.250000  51216800  ACT   
249 2023-06-29  189.080002  190.070007  188.940002  189.589996  46347300  ACT   
250 2023-06-30  191.630005  194.479996  191.259995  193.970001  85069600  ACT   

    company  
246    AAPL  
247    AAPL  
248    AAPL  
249    AAPL  
250    AAPL  


In [10]:
from statsmodels.tsa.seasonal import seasonal_decompose
result = seasonal_decompose(data["close"], model='multiplicative', period=30)


p, d, q = 5, 1, 2

model = ARIMA(data["close"], order=(p,d,q))
fitted = model.fit()

print(fitted.summary())

predictions = fitted.predict()
print(predictions)

import statsmodels.api as sm
import warnings
model=sm.tsa.statespace.SARIMAX(data['close'],
                                order=(p, d, q),
                                seasonal_order=(p, d, q, 12))
model=model.fit()
print(model.summary())

# Vorhersagen erstellen
start_point = len(data)
end_point = start_point + 30
predictions = model.predict(start_point, end_point)

# Erstelle Datumsindex für die Vorhersagen
dates = pd.date_range(start=data["date"].iloc[-1] + pd.Timedelta(days=1), periods=30)

# DataFrame für die Vorhersagen erstellen
predictions_df = pd.DataFrame({'date': dates, 'close': predictions})

# Vorhersagen zum ursprünglichen DataFrame hinzufügen
data_f = pd.concat([data, predictions])

# DataFrame anzeigen
print(data_f)

                               SARIMAX Results                                
Dep. Variable:                  close   No. Observations:                  251
Model:                 ARIMA(5, 1, 2)   Log Likelihood                -604.830
Date:                Fri, 07 Jul 2023   AIC                           1225.659
Time:                        18:11:13   BIC                           1253.831
Sample:                             0   HQIC                          1236.998
                                - 251                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.1212      0.242     -0.501      0.616      -0.595       0.353
ar.L2         -0.6587      0.183     -3.598      0.000      -1.017      -0.300
ar.L3         -0.0242      0.088     -0.275      0.7



                                     SARIMAX Results                                      
Dep. Variable:                              close   No. Observations:                  251
Model:             SARIMAX(5, 1, 2)x(5, 1, 2, 12)   Log Likelihood                -591.181
Date:                            Fri, 07 Jul 2023   AIC                           1212.362
Time:                                    18:11:36   BIC                           1264.446
Sample:                                         0   HQIC                          1233.353
                                            - 251                                         
Covariance Type:                              opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.6296      0.320      1.964      0.049       0.001       1.258
ar.L2          0.0679      0.332   

ValueError: array length 30 does not match index length 31

In [None]:
print(data.tail())

In [None]:
print(predictions)

In [5]:
# Vorhersagen in ein neues DataFrame umwandeln
pred_df = pd.DataFrame(predictions, columns=["close"])

# Datumsspalte erstellen, die auf den Tag nach dem letzten Datum in den vorhandenen Daten folgt
pred_df["date"] = pd.date_range(start=data["date"].iloc[-1] + timedelta(days=1), periods=len(pred_df), freq='D')

# Die Reihenfolge der Spalten anpassen
pred_df = pred_df[["date", "close"]]

# Die anderen Spalten in pred_df mit NaN-Werten füllen
for col in ["open", "high", "low", "volume", "type", "company"]:
    pred_df[col] = pd.np.nan

# Das Vorhersage-DataFrame an das ursprüngliche DataFrame anhängen
new_df = pd.concat([data, pred_df])

# DataFrame anzeigen
print(new_df.tail())


          date        open        high         low       close      volume  \
246 2023-06-26  186.830002  188.050003  185.229996  185.270004  48088700.0   
247 2023-06-27  185.889999  188.389999  185.669998  188.059998  50730800.0   
248 2023-06-28  187.929993  189.899994  187.600006  189.250000  51216800.0   
249 2023-06-29  189.080002  190.070007  188.940002  189.589996  46347300.0   
250 2023-06-30  191.630005  194.479996  191.259995  193.970001  85069600.0   

    type company  
246  ACT    AAPL  
247  ACT    AAPL  
248  ACT    AAPL  
249  ACT    AAPL  
250  ACT    AAPL  


  pred_df[col] = pd.np.nan
  pred_df[col] = pd.np.nan
  pred_df[col] = pd.np.nan
  pred_df[col] = pd.np.nan
  pred_df[col] = pd.np.nan
  pred_df[col] = pd.np.nan


In [11]:
print(data)

          date        open        high         low       close    volume type  \
0   2022-07-01  136.039993  139.039993  135.660004  138.929993  71051600  ACT   
1   2022-07-05  137.770004  141.610001  136.929993  141.559998  73353800  ACT   
2   2022-07-06  141.350006  144.119995  141.080002  142.919998  74064300  ACT   
3   2022-07-07  143.289993  146.550003  143.279999  146.350006  66253700  ACT   
4   2022-07-08  145.259995  147.550003  145.000000  147.039993  64547800  ACT   
..         ...         ...         ...         ...         ...       ...  ...   
246 2023-06-26  186.830002  188.050003  185.229996  185.270004  48088700  ACT   
247 2023-06-27  185.889999  188.389999  185.669998  188.059998  50730800  ACT   
248 2023-06-28  187.929993  189.899994  187.600006  189.250000  51216800  ACT   
249 2023-06-29  189.080002  190.070007  188.940002  189.589996  46347300  ACT   
250 2023-06-30  191.630005  194.479996  191.259995  193.970001  85069600  ACT   

    company  
0      AAPL  

In [13]:
print(predictions_df)

NameError: name 'predictions_df' is not defined