In [15]:
import requests
import pandas as pd

# Your API key from Alpha Vantage
api_key = "4FHTO2GAT3NL1EZ8"

# Fetch stock data
symbol = "IYW"

stock_url = f'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=IYW&apikey={api_key}&outputsize=full'
stock_r = requests.get(stock_url)
stock_data = stock_r.json()
time_series_data = stock_data['Time Series (Daily)']

# Convert the data into a DataFrame
df = pd.DataFrame(time_series_data).T
df.index = pd.to_datetime(df.index)

# Fetch Federal Funds Rate data
fed_url = f'https://www.alphavantage.co/query?function=FEDERAL_FUNDS_RATE&interval=daily&apikey={api_key}'
fed_r = requests.get(fed_url)
fed_data = fed_r.json()
fed_time_series_data = fed_data['data']
fed_dates = [entry['date'] for entry in fed_time_series_data]
fed_rates = [float(entry['value']) for entry in fed_time_series_data]
fed_df = pd.DataFrame({'Federal Funds Rate': fed_rates}, index=fed_dates)
fed_df.index = pd.to_datetime(fed_df.index)

# Merge DataFrames based on date index
plus_fed_df = df.join(fed_df, how='left')

# Fetch unemployment data
job_url = f'https://www.alphavantage.co/query?function=UNEMPLOYMENT&apikey={api_key}'
job_r = requests.get(job_url)
job_data = job_r.json()
job_time_series_data = job_data['data']
job_dates = [entry['date'] for entry in job_time_series_data]
job_rates = [float(entry['value']) for entry in job_time_series_data]
job_df = pd.DataFrame({'Unemployment Rate': job_rates}, index=job_dates)
job_df.index = pd.to_datetime(job_df.index)

# Resample unemployment data to match stock data frequency (daily)
job_df_resampled = job_df.resample('D').ffill()

# Merge resampled unemployment data into plus_fed_df
final_df = plus_fed_df.join(job_df_resampled, how='left')

# Fetch CPI data
cpi_url = f'https://www.alphavantage.co/query?function=CPI&interval=monthly&apikey={api_key}'
cpi_r = requests.get(cpi_url)
cpi_data = cpi_r.json()['data']
cpi_dates = [entry['date'] for entry in cpi_data]
cpi_values = [float(entry['value']) for entry in cpi_data]
cpi_df = pd.DataFrame({'CPI': cpi_values}, index=cpi_dates)
cpi_df.index = pd.to_datetime(cpi_df.index)

# Merge CPI data into final_df
final_feature_df = final_df.join(cpi_df, how='left')

# Calculate 20-day EMA of closing column
final_feature_df['20EMA'] = final_feature_df['4. close'].rolling(window=20).mean()

# Drop rows with NaN values
final_feature_df_cleaned = final_feature_df.dropna()

# Keep only necessary columns
final_feature_df_cleaned = final_feature_df_cleaned.drop(columns=['2. high', '3. low'])

# Print the final DataFrame
final_feature_df_cleaned.head(15)

Unnamed: 0,1. open,4. close,5. volume,Federal Funds Rate,Unemployment Rate,CPI,20EMA
2023-06-01,102.75,104.19,638213,5.08,3.6,305.109,106.112
2023-05-01,92.95,93.17,471856,4.83,3.7,304.127,95.5905
2023-03-01,83.23,82.71,524305,4.58,3.5,301.836,86.5925
2023-02-01,82.63,85.02,1126919,4.33,3.6,300.84,85.575
2022-12-01,81.48,81.72,405319,3.83,3.5,296.797,77.21
2022-11-01,77.34,75.53,509470,3.08,3.6,297.711,76.8885
2022-09-01,82.9,83.12,452121,2.33,3.5,296.808,79.993
2022-08-01,88.41,89.01,237822,2.33,3.7,296.171,91.0715
2022-07-01,79.46,79.93,323715,1.58,3.5,296.276,83.958
2022-06-01,89.25,88.04,480184,0.83,3.6,296.311,83.512


In [7]:
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import math

In [8]:
x = final_feature_df_cleaned[["1. open", "5. volume","20EMA","Federal Funds Rate","Unemployment Rate","20EMA","CPI"]].to_numpy()
y = final_feature_df_cleaned["4. close"].to_numpy()

# Normalize the input data
scaler = MinMaxScaler()
x_scaled = scaler.fit_transform(x)
y_scaled = scaler.fit_transform(y.reshape(-1, 1))

# Prepare sequence data
sequence_length = 100  # Length of input sequences
sequences = []
targets = []

for i in range(len(y_scaled) - sequence_length - 60):
    sequences.append(x_scaled[i:i+sequence_length])
    targets.append(y_scaled[i+sequence_length:i+sequence_length+60])

# Convert lists to NumPy arrays
sequences = np.array(sequences)
targets = np.array(targets)

# Split into training and testing sets
split_ratio = 0.8
split_index = int(split_ratio * len(sequences))

x_train = sequences[:split_index]
y_train = targets[:split_index]

x_test = sequences[split_index:]
y_test = targets[split_index:]

# Build LSTM model
model_lstm = Sequential()
model_lstm.add(LSTM(128, return_sequences=True, input_shape=(sequence_length, x.shape[1])))
model_lstm.add(Dropout(0.2))  # Add Dropout layer after each LSTM layer

model_lstm.add(LSTM(64, return_sequences=False))
model_lstm.add(Dropout(0.2))  # Add Dropout layer after each LSTM layer

model_lstm.add(Dense(25))
model_lstm.add(Dense(60))  # Predict 60 days of prices

# Compile the model
model_lstm.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
model_lstm.fit(x_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Make predictions using the model
predictions_scaled = model_lstm.predict(x_test)

# Inverse normalize the predicted results
predictions = scaler.inverse_transform(predictions_scaled)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [9]:
y_test_2d = y_test.reshape(-1, 1)
predictions_2d = predictions.reshape(-1, 1)

rmse_test = np.sqrt(mean_squared_error(y_test_2d, predictions_2d))
rmse_test

rmse = np.sqrt(mean_squared_error(y_test_2d, predictions_2d))


In [10]:
model_lstm.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 100, 128)          69632     
                                                                 
 dropout (Dropout)           (None, 100, 128)          0         
                                                                 
 lstm_1 (LSTM)               (None, 64)                49408     
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense (Dense)               (None, 25)                1625      
                                                                 
 dense_1 (Dense)             (None, 60)                1560      
                                                                 
Total params: 122,225
Trainable params: 122,225
Non-trai

In [None]:
# Visualize the predicted results
plt.figure(figsize=(10, 6))
plt.plot(y[-60:], label='Actual Prices')
plt.plot(predictions[0], label='Predicted Prices')
plt.xlabel('Days')
plt.ylabel('Price')
plt.title('Actual vs. Predicted Prices for the Next 60 Days')
plt.legend()
plt.show()